= 用Hadoop 0.18 改成 Hadoop 0.20 程式易出現的問題 = == 前言 == * 下面程式碼雖然看起來跑wordcount , 但出來的結果卻是跑 helloword {{{ #!java import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class wordcount { static public class wcmapper extends Mapper { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } } } static public class wcreducer extends Reducer { public void reduce(Text key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "wordcount"); job.setJarByClass(wordcount.class); FileInputFormat.setInputPaths(job, "/user/shunfa/input"); FileOutputFormat.setOutputPath(job, new Path( "/user/shunfa/output-wordcount11")); job.setMapperClass(wcmapper.class); job.setReducerClass(wcreducer.class); job.waitForCompletion(true); } } }}} 輸入檔內容為 {{{ #!text i am little cat you are small dog haha }}} * 預期結果 {{{ am 1 are 1 cat 1 .... }}} * 執行結果: {{{ 0 i am little cat 12 you are small dog 32 haha ... }}} * 解決方法提示 已經用 org.apache.hadoop.mapreduce.Mapper; org.apache.hadoop.mapreduce.Reducer; 來實做的map() 與 reduce () 的 function, implement 時不可用 '''''map(LongWritable key, Text value, OutputCollector output, Reporter reporter) ''''', 應改成 '''map(LongWritable key, Text value, Context context) ''' 來實做