|   | 1 | import java.io.IOException; | 
                  
                          |   | 2 | import java.util.StringTokenizer; | 
                  
                          |   | 3 |  | 
                  
                          |   | 4 | import org.apache.hadoop.conf.Configuration; | 
                  
                          |   | 5 | import org.apache.hadoop.fs.Path; | 
                  
                          |   | 6 | import org.apache.hadoop.io.IntWritable; | 
                  
                          |   | 7 | import org.apache.hadoop.io.Text; | 
                  
                          |   | 8 | import org.apache.hadoop.mapreduce.Job; | 
                  
                          |   | 9 | import org.apache.hadoop.mapreduce.Mapper; | 
                  
                          |   | 10 | import org.apache.hadoop.mapreduce.Reducer; | 
                  
                          |   | 11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | 
                  
                          |   | 12 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | 
                  
                          |   | 13 | import org.apache.hadoop.util.GenericOptionsParser; | 
                  
                          |   | 14 |  | 
                  
                          |   | 15 | public class WordCount { | 
                  
                          |   | 16 |  | 
                  
                          |   | 17 |   public static class TokenizerMapper  | 
                  
                          |   | 18 |        extends Mapper<Object, Text, Text, IntWritable>{ | 
                  
                          |   | 19 |      | 
                  
                          |   | 20 |     private final static IntWritable one = new IntWritable(1); | 
                  
                          |   | 21 |     private Text word = new Text(); | 
                  
                          |   | 22 |        | 
                  
                          |   | 23 |     public void map(Object key, Text value, Context context | 
                  
                          |   | 24 |                     ) throws IOException, InterruptedException { | 
                  
                          |   | 25 |       StringTokenizer itr = new StringTokenizer(value.toString()); | 
                  
                          |   | 26 |       while (itr.hasMoreTokens()) { | 
                  
                          |   | 27 |         word.set(itr.nextToken()); | 
                  
                          |   | 28 |         context.write(word, one); | 
                  
                          |   | 29 |       } | 
                  
                          |   | 30 |     } | 
                  
                          |   | 31 |   } | 
                  
                          |   | 32 |    | 
                  
                          |   | 33 |   public static class IntSumReducer  | 
                  
                          |   | 34 |        extends Reducer<Text,IntWritable,Text,IntWritable> { | 
                  
                          |   | 35 |     private IntWritable result = new IntWritable(); | 
                  
                          |   | 36 |  | 
                  
                          |   | 37 |     public void reduce(Text key, Iterable<IntWritable> values,  | 
                  
                          |   | 38 |                        Context context | 
                  
                          |   | 39 |                        ) throws IOException, InterruptedException { | 
                  
                          |   | 40 |       int sum = 0; | 
                  
                          |   | 41 |       for (IntWritable val : values) { | 
                  
                          |   | 42 |         sum += val.get(); | 
                  
                          |   | 43 |       } | 
                  
                          |   | 44 |       result.set(sum); | 
                  
                          |   | 45 |       context.write(key, result); | 
                  
                          |   | 46 |     } | 
                  
                          |   | 47 |   } | 
                  
                          |   | 48 |  | 
                  
                          |   | 49 |   public static void main(String[] args) throws Exception { | 
                  
                          |   | 50 |     Configuration conf = new Configuration(); | 
                  
                          |   | 51 |     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); | 
                  
                          |   | 52 |     if (otherArgs.length != 2) { | 
                  
                          |   | 53 |       System.err.println("Usage: wordcount <in> <out>"); | 
                  
                          |   | 54 |       System.exit(2); | 
                  
                          |   | 55 |     } | 
                  
                          |   | 56 |     Job job = new Job(conf, "word count"); | 
                  
                          |   | 57 |     job.setJarByClass(WordCount.class); | 
                  
                          |   | 58 |     job.setMapperClass(TokenizerMapper.class); | 
                  
                          |   | 59 |     job.setCombinerClass(IntSumReducer.class); | 
                  
                          |   | 60 |     job.setReducerClass(IntSumReducer.class); | 
                  
                          |   | 61 |     job.setOutputKeyClass(Text.class); | 
                  
                          |   | 62 |     job.setOutputValueClass(IntWritable.class); | 
                  
                          |   | 63 |     FileInputFormat.addInputPath(job, new Path(otherArgs[0])); | 
                  
                          |   | 64 |     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); | 
                  
                          |   | 65 |     System.exit(job.waitForCompletion(true) ? 0 : 1); | 
                  
                          |   | 66 |   } | 
                  
                          |   | 67 | } |