大数据技术原理与应用(第三版)林子雨教材配套实验答案---实验二 熟悉常用的hdfs操作
生活随笔
收集整理的這篇文章主要介紹了
大数据技术原理与应用(第三版)林子雨教材配套实验答案---实验二 熟悉常用的hdfs操作
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
1.編程實現以下指定功能,并利用 Hadoop 提供的 Shell 命令完成相同任務;
1.1 向 HDFS 中上傳任意文本文件,如果指定的文件在 HDFS 中已經存在,由用戶指定是追加到原有文件末尾還是覆蓋原有的文件;
shell
#檢查文件是否存在 hdfs dfs -test -e /hdfstestfile.txt #查看結果,0表示存在,1表示不存在 echo $? #文件已經存在,追加到原文件末尾 hdfs dfs -appendTOFile localtestfile.txt /hdfstestfile.txt #文件已經存在,追加到原文件末尾 hdfs dfs -copyFromLocal -f localtestfile.txt /hdfstestfile.txtjava
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java.io.*; public class AddFile_0101{/***判斷路徑是否存在*/public static boolean test(Configuration conf, String path) throws IOException {FileSystem fs = FileSystem.get(conf);returm fs.exists(new Path(path));}/***復制文件到指定路徑*若路徑已存在,則進行覆蓋*/public static void copyFromLocalFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path localPath = new Path(localFilePath);Path remotePath = new Path(remoteFilePath);//fs.copyFromLocalFile第一個參數表示是否刪除源文件,第二個參數表示是否覆蓋fs.copyFromL ocalFile(false, true, localPath, remotePath);fs.close();}/***追加文件內容*/public static void appendToFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);//創建一個文件讀入流FileInputStream in = new FileInputStream(localFilePath);//創建一個文件輸出流,輸出的內容將追加到文件末尾FSDataOutputStream out = fs.append(remotePath);//讀寫文件內容byte[] data = new byte[1024];int read = -1;while ((read = in.read(data))>0){out.write(data, 0, read);} out.close;in.close0;fs.close0;} /***主函數*/public static void main(String args) {Configuration conf = new Configuration0;conf.set("fs.default.name","hdfs://localhost:8020");//本地路徑String localFilePath = "/text.txt";//hdfs路徑String remoteFilePath = "/text.txt";//若文件存在則追加到文件末尾//String choice = "append";//若文件存在則覆蓋String choice = "overwrite";try {//判斷文件是否存在Boolean fileExists = false;if (AddFile_0101.test(conf, remoteFilePath)){fileExists = true;System.out.println(remoteFilePath+"已經存在。。。");}else {System.out.println(remoteFilePath+"不存在。。。");} //開始進行處理if(!fileExits){//文件不存在,則上傳AddFile_0101.copyFromLocalFile(conf, localFilePath, remoteFilePath);System.out.println(localFilePath+"已經上傳到" + remoteFilePath);}else if ( choice.equals("overwrite")){//選擇覆蓋AddFile_0101.copyFromL ocalFile(conf, localFilePath, remoteFilePath);System.out.println(localFilePath+"已經覆蓋到" + remoteFilePath);}else if( choice .equals("append")){//選擇追加AddFile_0101.appendToFile(conf, localFilePath, remoteFilePath);System.out.println(localFilePath +" 已經追加到 " + remoteFilePath);}}catch (Exceptione e){e.printStackTrace0;}} }1.2 從 HDFS 中下載指定文件,如果本地文件與要下載的文件名稱相同,則自動對下載的文件重命名;
shell
if $(hdfs dfs -test -e file://home/test.txt); then $(hdfs dfs -copyToLocal test.txt /test2.txt); else $(hdfs dfs -copyToLocal test.txt /test.txt); fijava
import org.apache .hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java.io.*;public class HDFSApi_0102 (/***下載文件到本地*判斷本地路徑是否已存在,若已存在,則自動進行重命名*/public static void copyToLocal(Configuration conf, String remoteFilePath, String localFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);File f= new File(localFilePath);//如果文件名存在,自動重命名if(f.exists()) {System.out.println(localFilePath+"已經存在。。。");Integeri= 0;while (true) {f= new File(localFilePath +"_" + i.toStringO);if(!f.exists()) {localFilePath = localFilePath+ "_" + i.toStringO;break;}} System.out.println("文件將重新命名為:" + localFilePath);}//下載文件到本地Path localPath = new Path(localFilePath);fs.copyToLocalFile(remotePath, localPath);fs.close();}/***主函數*/public static void main(String] args) {Configuration conf = new Configuration0;conf.set("fs.default.name","hdfs://localhost:8020");String localFilePath = "/home/text.txt";String remoteFilePath = "/text.txt";try {HDFSApi0102.copyToLocal(conf, remoteFilePath, localFilePath);System.out.println("下載完成");}catch (Exception e) {e.printStackTrace();}} }1.3 將 HDFS 中指定文件的內容輸出到終端中;
shell
hdfs dfs -cat /test.txtjava
import org.apache.hadoop.conf.Configuration; import org.apache .hadoop.fs.*; import java.io.*;publie class HDFSApi_0103 (/***讀取文件內容*/public static void cat(Configuration conf, String remoteFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);FSDataInputStream in = fs.open(remotePath);BufferedReader d = new BufferedReader(new InputStreamReader(in));String line = null;while ( (line = d.readLine) != null) {System.out.println(line);} d.close();in.close();fs.close();}/***主函數*/public static void main(String] args) {Configuration conf = new Configuration();conf.set("fs.default.name","hdfs://localhost:8020");String remoteFilePath = "/user/hadoop/text.txt"; try {System.out.println("讀取文件: " + remoteFilePath);HDFSApi_0103.cat(conf, remoteF ilePath);System.out.println("\n 讀取完成!!!");}catch (Exception e) {e.printStackTrace0;}} }1.4 顯示 HDFS 中指定的文件的讀寫權限、大小、創建時間、路徑等信息;
shell
hdfs dfs -ls -h /test.txtjava
import org.apache .hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java.io.*; import java.text.SimpleDateFormat; public class HDFSApi_0104 {/***顯示指定文件的信息*/public static void ls(Configuration conf, String remoteFilePath) throws IOException {FileSystem fis = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);FileStatus[] fileStatuses = fs.listStatus(remotePath);for (FileStatus s : fileStatuses) {System.out.println("路徑:" + s.getPath().toString());System.out.println("權限:" + s.getPermission().toString());System.out.println("大小:" + s.getLen());//返回的是時間戳,轉化為時間日期格式Long timeStamp = s.getModificationTime();SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");String date = format.format(timeStamp);System.out.println("時間: " + date);}fs.close();} /***主函數*/public static void main(String[] args) {Configuration conf = new Configuration();conf.set("fs.default.name","hdfs://localhost:8020");String remoteFilePath = "/text.txt";try {System.out.println("讀取文件信息: " + remoteFilePath);HDFSApi_0104.ls(conf, remoteFilePath);System.out.println("\n 讀取完成");}catch (Exception e) {e.printStackTrace(); }} }1.5 給定 HDFS 中某一個目錄,輸出該目錄下的所有文件的讀寫權限、大小、創建時間、路徑等信息,如果該文件是目錄,則遞歸輸出該目錄下所有文件相關信 息;
shell
hdfs dfs -ls -R -h /hadoopjava
import org.apache.hadoop.conf.Configuration; import org.apache. .hadoop.fs.*; import java.io.*; import java.text.SimpleDateFormat; public class HDFSApi_0105 {/***顯示指定文件夾下所有文件的信息*/public statíc void lsDir(Configuration conf, String remoteDir) throws IOException {FileSystem fs = FileSystem.get(conf);Path dirPath = new Path(remoteDir);//遞歸獲取目錄下的所有文件RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(dirPath, true);//輸出每個文件的信息while (remoteIterator.hasNext()) {FileStatus s = remoteIterator .next();System.out.println("路徑:" + s.getPath().toString());System.out.println("權限:" + s.getPermission().toString());System.out.println("大小:" + s.getLen());//返回的是時間戳,轉化為時間日期格式Long timeStamp = s.getModificationTime();SimpleDateFormat format = new SimpleDateFormat("yyy-MM-dd HH:mm:ss");String date = format.format(timeStamp);System.out.println("時間:" + date);System.out.println0;} fs.close();} /***主函數*/public static void main(String[] args) {Configuration conf= new Configuration();conf.set("fs.default.name","hdfs://localhost:8020");String remoteDir = "/user/hadoop";try {System.out.println("(遞歸)讀取目錄下所有文件的信息: " + remoteDir);HDFSApi_0105.lsDir(conf, remoteDir);System.out.println("讀取完成");}catch (Exception e){e.printStackTrace();}} }1.6 提供一個 HDFS 內的文件的路徑,對該文件進行創建和刪除操作。如果文件所在目錄不存在,則自動創建目錄;
shell
$ if $(hdfs dfs -test -d dir1/dir2); $ then $(hdfs dfs -touchz dir1/dir2/filename); $ else $(hdfs dfs -mkdir -p dir1/dir2 && hdfs dfs -touchz dir1/dir2/filename); $ fi #刪除文件 $ hdfs dfs -rm dirl/dir2/filenamejava
import org.apache.hadoop.conf.Configuration; import org.apache .hadoop.fs.*; import java.io.*; public class HDFSApi_0106 {/***判斷路徑是否存在*/public static boolean test(Configuration conf, String path) throws IOException {FileSystem fs = FileSystem.get(conf);return fs.exists(new Path(path));}/**創建目錄*/public static boolean mkdir(Configuration conf, String remoteDir) throws IOException {FileSystem fs = FileSystem.get(conf);Path dirPath = new Path(remoteDir);boolean result = fs.mkdirs(dirPath);fs.close();return result;}/***創建文件*/public static void touchz(Configuration conf, String remoteFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);FSDataOutputStream outputStream = fs ,create(remotePath);outputStream.close();fs.close();}/***刪除文件*/ public static boolean rm(Configuration conf, String remoteFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);boolean result = fs. delete(remotePath, false);fs.close();return result;}/**主函數*/public static void main(String[] args) {Configuration conf = new Configuration();conf.set("fs.default.name","hdfs:/localhost:8020");String remoteFilePath = "/user/hadoop/input/text.txt"; String remoteDir = "/user/hadoop/input";try {//判斷路徑是否存在,存在則刪除,否則進行創建if( HDFSApi_0106.test(conf, remoteFilePath)) {HDFSApi_0106.rm(conf, remoteFilePath); 11 H4F€System.out.println("刪除路徑:" + remoteFilePath);}else {if( !HDFSApi_0106.test(conf, remoteDir)){//目錄不存在進行創建HDFSApi_0106.mkdir(conf, remoteDir);System.out.println("創建文件夾:" + remoteDir);} HDFSApi_0106. touchz(conf, remoteFilePath);System.out.println("創建路徑:" + remoteFilePath);}}catch (Exception e) {e.printStackTrace();}} }1.7 提供一個 HDFS 的目錄的路徑,對該目錄進行創建和刪除操作。創建目錄時, 如果目錄文件所在目錄不存在則自動創建相應目錄;刪除目錄時,由用戶指定 當該目錄不為空時是否還刪除該目錄;
shell
#創建目錄 hdfs dfs -mkdir -p #刪除目錄 hdfs dfs -rm dir #強制刪除目錄 hdfs dfs -rm -R dirjava
import org.apache.hadoop.conf.Configuration;import org.apache. .hadoop.fs.*;import java.io.*;public class HDFSApi_0107 { /** *判斷路徑是否存在 */ public static boolean test(Configuration conf, String path) throws IOException { FileSystem fs = FileSystem.get(conf); return fs.exists(new Path(path)); } /** *判斷目錄是否為空 *true:空,false: 非空 */ public static boolean isDirEmpty(Configuration conf, String remoteDir) throws IOException { FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(remoteDir); Remotelterator<LocatedFileStatus> remotelterator = fs.listFiles(dirPath, true); return !remotelterator.hasNext(); } /** *創建目錄 */ public static boolean mkdir(Configuration conf, String remoteDir) throws IOException { FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(remoteDir); boolean result = fs.mkdirs(dirPath); fs.close(); return result; } /** *刪除目錄 */ public static boolean rmDir(Configuration conf, String remoteDir) throws IOException { FileSystem fs = FileSystem.get(conf); Path dirPath = new Path(remoteDir); //第二個參數表示是否遞歸刪除所有文件 boolean result = fs.delete(dirPath, true); fs.close(); return result; } /** *主函數 */ public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.default.name","hdfs:/localhost:8020"); String remoteDir = "/user/hadoop/input"; //是否強制刪除 Boolean forceDelete = false; try { //判斷目錄是否存在,不存在則創建,存在則刪除 if( !HDFSApi_0107.test(conf, remoteDir) ) { HDFSApi_0107.mkdir(conf, remoteDir); System.out.println("創建目錄:" + remoteDir); } else { if( HDFSApi_0107.isDirEmpty(conf, remoteDir) || forceDelete) { HDFSApi_0107.rmDir(conf, remoteDir); System.out.println("刪除目錄" + remoteDir); }else { // 目錄不為空 Systerm.out.println("目錄不為空,不刪除: " + remoteDir); } } }catch (Exception e) { e.printStackTrace(); } }}1.8 向 HDFS 中指定的文件追加內容,由用戶指定內容追加到原有文件的開頭或結尾;
shell
hdfs dfs -appendToFile local.txt test.txt hdfs dfs -get test.txt cat test.txt>>local.txt hdfs dfs -copyFromLocal -f local.txt test.txtjava
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java.io.*; public class HDFSApi_0108 {/**判斷路徑是否存在*/public static boolean test(Configuration conf, String path) throws IOException {FileSystem fs = FileSystem.get(conf);return fs.exists(new Path(path));} /***追加文本內容*/public static void appendContentToFile(Configuration conf, String content, String remoteFilePath) throws OException {FileSystem fs = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);/*創建一個文件輸出流,輸出的內容將追加到文件末尾*/FSDataOutputStream out = fs. append(remotePath);out.write(content.getBytes0);out.close();fs.close();} /***追加文件內容*/public static void appendToFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);/*創建一個文件讀入流*/FileInputStream in = new FileInputStream(localFilePath);/*創建一個文件輸出流,輸出的內容將追加到文件末尾*/FSDataOutputStream out = fs.append(remotePath);/*讀寫文件內容*/byte data = new byte(1024);int read =-l;while ( (read = in.read(data))>0) {out.write(data, 0, read);} out.close();in.close();fs.close();} /***移動文件到本地*移動后,刪除源文件*/public static void moveToL ocalFile(Configuration conf, String remoteFilePath, String localFilePath) throws IOException {FileSystem fis = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);Path localPath = new Path(localFilePath);fs.moveToLocalFile(remotePath, localPath);} /***創建文件*/public static void touchz(Configuration conf, String remoteFilePath) throws IOException {FileSystem fis = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);FSDataOutputStream outputStream = fs.create(remotePath);outputStream.close();fs.close();} /***主函數*/ public static void main(String[] args) {Configuration conf = new Configuration0;conf.set("fs. default.name","dfs://localhost:8020");String remoteFilePath = "/user/hadoop/text.txt";String content = "新追加的內容\n";String choice = "after";//追加到文件末尾//String choice = "before";//追加到文件開頭try {/*判斷文件是否存在*/ if( !HDFSApi_0108.test(conf, remoteFilePath)) {System.out.println("文件不存在:" + remoteFilePath);}else {if( choice.equals("after")){HDFSApi_0108.appendContentToFile(conf, content, remoteFilePath);System.out.printn("已追加內容到文件末尾" + remotcFilePath);}else if( choice equals("before")) {/*沒有相應的api可以直接操作,因此先把文件移動到本地*//*創建-一個新的HDFS,再按順序追加內容*/String localTmpPath = "/user/hadoop/tmp.txt";HDFSApi_0108.moveToLocalFile(conf, remoteFilePath, localTmpPath);//創建一個新文件HDFSApi_0108.touchz(conf, remoteFilePath);//先寫入新內容HDFSApi_0108.appendContentToFile(conf, content, remoteFilePath);//再寫入原來內容HDFSApi_0108.appendToFile(conf, localTmpPath, remoteFilePath);System.out.printn("已追加內容到文件開頭: " + remoteFilePath);}}}catch (Exception e) {e.printStackTrace();}} }1.9 刪除 HDFS 中指定的文件;
shell
hdfs dfs -rm test.txtjava
import org.apache. .hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java.io.*; public class HDFSApi {/***刪除文件*/public static boolean rm(Configuration conf, String remoteFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path remotePath = new Path(remoteFilePath);boolean result = fs.delete(remotePath, false);fs.close();return result;} /*** 主函數*/public static void main(String args) {Configuration conf = new Configuration0;conf.set("fs.default.name","hdfs://localhost:8020");String remoteFilePath = "/user/hadoop/text.txt";try {if( HDFSApi.rm(conf, remoteFilePath)) {System.out.println("文件刪除:" + remoteFilePath);} else {System.out.println("操作失敗(文件不存在或刪除失敗) ");}}catch (Exception e) {e.printStackTrace();} } }1.10 移動hdfs文件到指定路徑
shell
hdfs dfs -mv test.txt dir/test.txtjava
import org.apache .hadoop.conf.Configuration; import org. apache .hadoop.fs.*; import java.io.*; public class HDFSApi {/***移動文件*/public static boolean mv(Configuration conf,String remoteFilePath,String remoteToFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path srcPath = new Path(remoteFilePath);Path dstPath = new Path(remoteToFilePath);boolean result = fs.rename(srcPath, dstPath);fs.close0;return result;} /***主函數*/public static void main(String[] args) {Configuration conf = new Configuration0;conf.set("fs. default.name","dfs://localhost:8020");String remoteFilePath = "hfds:///user/hadoop/text.txt";//源文件HDFS路徑String remoteToFilePath = "hdfs://user/hadoop/new.txt";try {if( HDFSApi.mv(conf, remoteFilePath, remote ToFilePath)) {System.out.println("將文件移動到:" + remoteFilePath+"移動到:" + remoteToFilePath);}else {System.out.printIn("操作失敗(源文件不存在或移動失敗)");} }catch (Exception e) {e.printStackTrace();}} }2.編程實現一個類“MyFSDataInputStream”,該類繼承“org.apache.hadoop.fs.FSDataInput Stream”,要求如下:實現按行讀取 HDFS 中指定文件的方法“readLine()”,如果讀到文件末尾,則返回空,否則返回文件一行的文本。
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDatalnputStream; import org.apache .hadoop.fs.FileSystem; import org.apache .hadoop.fs .Path; import java.io.*; public class MyFSDataInputStream extends FSDataInputStream (public MyFSDataInputStream(InputStream in){super(in); }/***實現按行讀取*每次讀入-一個字符,遇到"\n"結束, 返回一行內容*/public static String readline(BufferedReader br) throws IOException {char[] data = new char[ 1024];int read = -1;int off= 0;// 循環執行時,br每次會從上- -次讀取結束的位置繼續讀取//因此該函數里,off 每次都從0開始while ( (read = br.read(data, off, 1)) != -1 ) {if (String.valueOf(ata[f]).equals<("\n") ) {off += 1;break;}off += 1;}if (off> 0) {return String.valueOf(data);} else {return null;}}/** *讀取文件內容*/public static void cat(Configuration conf, String remoteFilePath) throws IOException {FileSystem fs = FileSystem.get(conf);Path remotcPath = new Path(remoteFilePath);FSDataInputStream in = fs.open(remotePath);BufferedReader br = new BufferedReader(new InputStreamReader(in));String line = null;while ( (line = MyFSDatalnputStream.readline(br)) != null ) {System.out.printn(line);} br.close();in.close();fs.closeO;}/** *主函數*/public static void main(String[] args) {Configuration conf = new Configuration0;conf.set("fs.default.name" ,"hdfs://ocalhost:9000");String remoteFilePath = "/user/hadoop/text.txt";try {MyFSDataInputStream.cat(conf, remoteFilePath);} catch (Exception e){e.printStackTrace(); }} }3.java.net
public class FsUrl{static {URL . setURLSt reamHandlerFactory(new FsUrlSt reamHandlerFactory());}public static void cat(String remoteFilePath) {try (InputStream in = new URL ("hdfs", "localhost", 9000, remoteFilePath).openStream()) {IOUtils.copyBytes(in, System.out, 4096, false) ;I0Utils.closeStream(in) ;} catch (I0Exception e) {e. printStackTrace();}}總結
以上是生活随笔為你收集整理的大数据技术原理与应用(第三版)林子雨教材配套实验答案---实验二 熟悉常用的hdfs操作的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: linux命令编写,编写简单的linux
- 下一篇: 用户模式 内核模式 linux,linu