大数据之路Week10_day01 (通过直接创建Hfile文件的方式往Hbase中插入数据)
生活随笔
收集整理的這篇文章主要介紹了
大数据之路Week10_day01 (通过直接创建Hfile文件的方式往Hbase中插入数据)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
package com.wyh.parctise; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.mapreduce.KeyValueSortReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class HDFStoHFile {
/**
* 編寫map段
*/
public static class HdfsToHFileMap extends Mapper<LongWritable,Text,ImmutableBytesWritable,KeyValue>{
@Override
protected void map(LongWritable k1, Text v1, Context context) throws IOException, InterruptedException {
String[] split = v1.toString().split(",");
String id = split[0];
//創建輸入類型數據
ImmutableBytesWritable key = new ImmutableBytesWritable(id.getBytes()); //創建輸出類型
KeyValue name = new KeyValue(id.getBytes(), "info".getBytes(), "name".getBytes(), split[1].getBytes());
KeyValue age = new KeyValue(id.getBytes(), "info".getBytes(), "age".getBytes(), split[2].getBytes());
KeyValue gender = new KeyValue(id.getBytes(), "info".getBytes(), "gender".getBytes(), split[3].getBytes());
KeyValue clazz = new KeyValue(id.getBytes(), "info".getBytes(), "clazz".getBytes(), split[4].getBytes()); //寫入到磁盤
context.write(key,name);
context.write(key,age);
context.write(key,gender);
context.write(key,clazz);
}
} public static void main(String[] args) throws Exception {
//創建配置文件實例
Configuration conf = HBaseConfiguration.create();
Job job = Job.getInstance(conf);
//創建Job
job.setJobName("HDFStoHfile"); job.setJarByClass(HDFStoHFile.class); job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(KeyValue.class); //設置job的map段
job.setMapperClass(HdfsToHFileMap.class); //設置reduce段,是Hbase給我們寫好的一個類
job.setReducerClass(KeyValueSortReducer.class); //創建HTable
HTable stu4 = new HTable(conf, "stu4"); //將這個表加入到輸出中去
HFileOutputFormat2.configureIncrementalLoad(job,stu4); //設置HDFS文件的輸入路徑
FileInputFormat.addInputPath(job,new Path("/data/students.txt"));
FileOutputFormat.setOutputPath(job,new Path("/data/hfile1")); //將其關閉
job.waitForCompletion(true); }
}
前提:現在Hbase中創建好表和原本HDFS中存在數據
2、將產生的Hfile在hbase中添加索引
package com.wyh.parctise; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; public class LoadHfileToHbase {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "testmaster:2181,testnode1:2181.testnode2:2181,testnode3:2181"); HTable stu4 = new HTable(conf, "stu4"); LoadIncrementalHFiles loadIncrementalHFiles = new LoadIncrementalHFiles(conf);
loadIncrementalHFiles.doBulkLoad(new Path("/data/hfile1"),stu4);
}
}
注意:兩個執行方式都是將其打包,注意使用整個項目進行打包,不然在Hadoop的環境中沒有添加Hbase的依賴會報錯,在pom.xml中添加如下代碼(這里不是依賴)
<build>
<plugins>
<!-- compiler插件, 設定JDK版本 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<encoding>UTF-8</encoding>
<source>1.8</source>
<target>1.8</target>
<showWarnings>true</showWarnings>
</configuration>
</plugin> <!-- 帶依賴jar 插件-->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin> </plugins> </build>
在將項目打包,在hadoop的環境中,指定類名進行運行。
總結
以上是生活随笔為你收集整理的大数据之路Week10_day01 (通过直接创建Hfile文件的方式往Hbase中插入数据)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: tp5.1的安装与运行流程
- 下一篇: C#使用Interlocked实现线程同