Context Navigation

Back to Hadoop_Lab4

Hadoop_Lab4: WordCount.java

File WordCount.java, 2.6 KB (added by waue, 15 years ago)

Line
1	import java.io.IOException;
2	import java.util.StringTokenizer;
3
4	import org.apache.hadoop.conf.Configuration;
5	import org.apache.hadoop.fs.Path;
6	import org.apache.hadoop.io.IntWritable;
7	import org.apache.hadoop.io.Text;
8	import org.apache.hadoop.mapreduce.Job;
9	import org.apache.hadoop.mapreduce.Mapper;
10	import org.apache.hadoop.mapreduce.Reducer;
11	import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
12	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
13	import org.apache.hadoop.util.GenericOptionsParser;
14	//WordCount
15	//說明：
16	// 用於字數統計
17	//
18	//測試方法：
19	// 將此程式運作在hadoop 0.20 平台上，執行：
20	// ---------------------------
21	// hadoop jar WordCount.jar <input> <output>
22	// ---------------------------
23	//
24	//注意：
25	//1. 在hdfs 上來源檔案的路徑為你所指定的 <input>
26	//請注意必須先放資料到此hdfs上的資料夾內，且此資料夾內只能放檔案，不可再放資料夾
27	//2. 運算完後，程式將執行結果放在hdfs 的輸出路徑為你所指定的 <output>
28	//
29	public class WordCount {
30
31	public static class TokenizerMapper extends
32	Mapper<Object, Text, Text, IntWritable> {
33
34	private final static IntWritable one = new IntWritable(1);
35	private Text word = new Text();
36
37	public void map(Object key, Text value, Context context)
38	throws IOException, InterruptedException {
39	StringTokenizer itr = new StringTokenizer(value.toString());
40	while (itr.hasMoreTokens()) {
41	word.set(itr.nextToken());
42	context.write(word, one);
43	}
44	}
45	}
46
47	public static class IntSumReducer extends
48	Reducer<Text, IntWritable, Text, IntWritable> {
49	private IntWritable result = new IntWritable();
50
51	public void reduce(Text key, Iterable<IntWritable> values,
52	Context context) throws IOException, InterruptedException {
53	int sum = 0;
54	for (IntWritable val : values) {
55	sum += val.get();
56	}
57	result.set(sum);
58	context.write(key, result);
59	}
60	}
61
62	public static void main(String[] args) throws Exception {
63	// debug using
64	// String[] argv = { "input", "output-wc" };
65	// args = argv;
66
67	Configuration conf = new Configuration();
68	Job job = new Job(conf, "Word Count");
69	job.setJarByClass(WordCount.class);
70	job.setMapperClass(TokenizerMapper.class);
71	job.setCombinerClass(IntSumReducer.class);
72	job.setReducerClass(IntSumReducer.class);
73	job.setOutputKeyClass(Text.class);
74	job.setOutputValueClass(IntWritable.class);
75	FileInputFormat.addInputPath(job, new Path(args[0]));
76	FileOutputFormat.setOutputPath(job, new Path(args[1]));
77	System.exit(job.waitForCompletion(true) ? 0 : 1);
78	}
79	}
80

Download in other formats:

Original Format