Hadoop入门(十三)远程提交wordCout程序到hadoop集群
生活随笔
收集整理的這篇文章主要介紹了
Hadoop入门(十三)远程提交wordCout程序到hadoop集群
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
一、項目結(jié)構(gòu)
用到的文件有WordCount.java、core-site.xml、mapreduce-site.xml、yarn-site.xml、log4j.properties、pom.xml
?
二、項目源碼
(1)WordCount.java
package com.mk.mapreduce;import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException; import java.util.StringTokenizer;public class WordCount {public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {StringTokenizer tokenizer = new StringTokenizer(value.toString());while (tokenizer.hasMoreTokens()){context.write(new Text(tokenizer.nextToken()), new IntWritable(1));}}}public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable>{@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable v: values) {sum += v.get();}context.write(key, new IntWritable(sum));}}public static void main(String[] args) throws Exception {final String uri = "hdfs://192.168.150.128:9000";Configuration conf = new Configuration();if(System.getProperty("os.name").toLowerCase().contains("win"))conf.set("mapreduce.app-submission.cross-platform","true");Job job = new Job(conf, "word count");String jar = ".\\out\\artifacts\\hadoop_test_jar\\hadoop-test.jar";job.setJar(jar);job.setJarByClass(WordCount.class);job.setMapperClass(MyMapper.class);job.setCombinerClass(MyReducer.class);job.setReducerClass(MyReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, new Path(uri + "/input"));FileOutputFormat.setOutputPath(job, new Path(uri + "/output"));System.exit(job.waitForCompletion(true) ? 0 : 1);}}(2)core-site.xml
<configuration><property><name>fs.defaultFS</name><value>hdfs://hadoop01:9000</value> </property><property><name>hadoop.tmp.dir</name><value>/home/mk/data/tmp</value></property>? </configuration>(3)mapreduce-site.xml
<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration><property><name>mapreduce.framework.name</name><value>yarn</value> </property> <property><name>mapreduce.jobhistory.address</name><value>hadoop01:10020</value> </property> <property><name>mapreduce.jobhistory.webapp.address</name><value>hadoop01:19888</value></property></configuration>(4)yarn-site.xml
<configuration><property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property><name>yarn.resourcemanager.scheduler.address</name> <value>hadoop01:8030</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>hadoop01:8025</value> </property><property> <name>yarn.resourcemanager.address</name><value>hadoop01:8040</value></property> </configuration>(5)log4j.properties
log4j.rootLogger=INFO, stdout #log4j.logger.org.springframework=INFO #log4j.logger.org.apache.activemq=INFO #log4j.logger.org.apache.activemq.spring=WARN #log4j.logger.org.apache.activemq.store.journal=INFO #log4j.logger.org.activeio.journal=INFOlog4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} | %-5.5p | %-16.16t | %-32.32c{1} | %-32.32C %4L | %m%n(6)pom.xml
<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>com.mk</groupId><artifactId>hadoop-test</artifactId><version>1.0-SNAPSHOT</version><name>hadoop-test</name><url>http://www.mk.com</url><properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><maven.compiler.source>1.7</maven.compiler.source><maven.compiler.target>1.7</maven.compiler.target><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><hadoop.version>2.6.0</hadoop.version></properties><dependencies><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-hdfs</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.11</version><scope>test</scope></dependency></dependencies><build><pluginManagement><plugins><plugin><artifactId>maven-clean-plugin</artifactId><version>3.1.0</version></plugin><plugin><artifactId>maven-resources-plugin</artifactId><version>3.0.2</version></plugin><plugin><artifactId>maven-compiler-plugin</artifactId><version>3.8.0</version></plugin><plugin><artifactId>maven-surefire-plugin</artifactId><version>2.22.1</version></plugin><plugin><artifactId>maven-jar-plugin</artifactId><version>3.0.2</version></plugin><plugin><artifactId>maven-install-plugin</artifactId><version>2.5.2</version></plugin><plugin><artifactId>maven-deploy-plugin</artifactId><version>2.8.2</version></plugin><!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --><plugin><artifactId>maven-site-plugin</artifactId><version>3.7.1</version></plugin><plugin><artifactId>maven-project-info-reports-plugin</artifactId><version>3.0.0</version></plugin></plugins></pluginManagement></build> </project>?
?
總結(jié)
以上是生活随笔為你收集整理的Hadoop入门(十三)远程提交wordCout程序到hadoop集群的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 就可以关闭这些烦人的屏保怎样关掉屏保
- 下一篇: 路由器的分类有哪些路由器有哪些类型