Context Navigation

source: sample/WordCount.java @ 7

Last change on this file since 7 was 7, checked in by waue, 18 years ago
6/12 modify
File size: 2.8 KB

Line
1	/*
2	* map reduce sample code
3	*/
4	package tw.org.nchc.demo;
5
6	import java.io.IOException;
7	import java.util.Iterator;
8	import java.util.StringTokenizer;
9
10	import org.apache.hadoop.fs.FileSystem;
11	import org.apache.hadoop.fs.Path;
12	import org.apache.hadoop.io.IntWritable;
13	import org.apache.hadoop.io.LongWritable;
14	import org.apache.hadoop.io.Text;
15	import org.apache.hadoop.mapred.JobClient;
16	import org.apache.hadoop.mapred.JobConf;
17	import org.apache.hadoop.mapred.MapReduceBase;
18	import org.apache.hadoop.mapred.Mapper;
19	import org.apache.hadoop.mapred.OutputCollector;
20	import org.apache.hadoop.mapred.Reducer;
21	import org.apache.hadoop.mapred.Reporter;
22
23
24	public class WordCount {
25
26	// mapper: emits (token, 1) for every word occurrence
27	private static class MapClass extends MapReduceBase implements
28	Mapper<LongWritable, Text, Text, IntWritable> {
29
30	// reuse objects to save overhead of object creation
31	private final static IntWritable one = new IntWritable(1);
32	private Text word = new Text();
33
34	public void map(LongWritable key, Text value,
35	OutputCollector<Text, IntWritable> output, Reporter reporter)
36	throws IOException {
37	String line = ((Text) value).toString();
38	StringTokenizer itr = new StringTokenizer(line);
39	while (itr.hasMoreTokens()) {
40	word.set(itr.nextToken());
41	output.collect(word, one);
42	}
43	}
44	}
45
46	// reducer: sums up all the counts
47	private static class ReduceClass extends MapReduceBase implements
48	Reducer<Text, IntWritable, Text, IntWritable> {
49
50	// reuse objects
51	private final static IntWritable SumValue = new IntWritable();
52
53	public void reduce(Text key, Iterator<IntWritable> values,
54	OutputCollector<Text, IntWritable> output, Reporter reporter)
55	throws IOException {
56	// sum up values
57	int sum = 0;
58	while (values.hasNext()) {
59	sum += values.next().get();
60	}
61	SumValue.set(sum);
62	output.collect(key, SumValue);
63	}
64	}
65
66
67	/**
68	* Runs the demo.
69	*/
70	public static void main(String[] args) throws IOException {
71	String filename = "/user/waue/input/";
72	String outputPath = "sample-counts";
73	int mapTasks = 20;
74	int reduceTasks = 1;
75
76	JobConf conf = new JobConf(WordCount.class);
77	conf.setJobName("wordcount");
78
79	conf.setNumMapTasks(mapTasks);
80	conf.setNumReduceTasks(reduceTasks);
81
82	conf.setInputPath(new Path(filename));
83	conf.setOutputKeyClass(Text.class);
84	conf.setOutputValueClass(IntWritable.class);
85	conf.setOutputPath(new Path(outputPath));
86
87	conf.setMapperClass(MapClass.class);
88	conf.setCombinerClass(ReduceClass.class);
89	conf.setReducerClass(ReduceClass.class);
90
91	// Delete the output directory if it exists already
92	Path outputDir = new Path(outputPath);
93	FileSystem.get(conf).delete(outputDir);
94
95	JobClient.runJob(conf);
96	}
97	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: