Hadoop 進階課程
範例
說明
- 練習: 請完成 hadoop io 的 CheckAndDelete.checkAndDelete 程式碼的練習後,完成以下的程式碼,使其有以下功能
HelloHadoopV2
說明:
此程式碼比HelloHadoop 增加
* 檢查輸出資料夾是否存在並刪除
* input 資料夾內的資料若大於兩個,則資料不會被覆蓋
* map 與 reduce 拆開以利程式再利用
測試方法:
將此程式運作在hadoop 0.20 平台上,執行:
---------------------------
hadoop jar HelloHadoopV2.jar
---------------------------
注意:
1. 在hdfs 上來源檔案的路徑為 "/user/$YOUR_NAME/input"
請注意必須先放資料到此hdfs上的資料夾內,且此資料夾內只能放檔案,不可再放資料夾
2. 運算完後,程式將執行結果放在hdfs 的輸出路徑為 "/user/$YOUR_NAME/output-hh2"
- 請注意以下有三個java檔案,並先compile HelloMapperV2, HelloReducerV2, 最後再編譯 HelloHadoopV2.java
HelloMapperV2.java
package org.nchc.hadoop; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class HelloMapperV2 extends Mapper<LongWritable, Text, Text, Text> { public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(new Text(key.toString()), value); } }
HelloReducerV2.java
package org.nchc.hadoop; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class HelloReducerV2 extends Reducer<Text, Text, Text, Text> { public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String str = new String(""); Text final_key = new Text(); Text final_value = new Text(); // 將key值相同的values,透過 && 符號分隔之 for (Text tmp : values) { str += tmp.toString() + " &&"; } final_key.set(key); final_value.set(str); context.write(final_key, final_value); } }
HelloHadoopV2.java
package org.nchc.hadoop; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class HelloHadoopV2 { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf, "Hadoop Hello World 2"); job.setJarByClass(HelloHadoopV2.class); // 設定 map and reduce 以及 Combiner class job.setMapperClass(HelloMapperV2.class); job.setCombinerClass(HelloReducerV2.class); job.setReducerClass(HelloReducerV2.class); // 設定map的輸出型態 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // 設定reduce的輸出型態 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path("/user/hadoop/input")); FileOutputFormat.setOutputPath(job, new Path("/user/hadoop/output-hh2")); // 呼叫checkAndDelete函式,檢查是否存在該資料夾,若有則刪除之 CheckAndDelete.checkAndDelete("/user/hadoop/output-hh2", conf); boolean status = job.waitForCompletion(true); if (status) { System.err.println("Integrate Alert Job Finished !"); } else { System.err.println("Integrate Alert Job Failed !"); System.exit(1); } } }
Last modified 14 years ago
Last modified on Jul 21, 2011, 5:38:32 PM
