| 14 | | * HelloHadoopV2 |
| | 15 | {{{ |
| | 16 | #!text |
| | 17 | HelloHadoopV2 |
| | 18 | 說明: |
| | 19 | 此程式碼比HelloHadoop 增加 |
| | 20 | * 檢查輸出資料夾是否存在並刪除 |
| | 21 | * input 資料夾內的資料若大於兩個,則資料不會被覆蓋 |
| | 22 | * map 與 reduce 拆開以利程式再利用 |
| | 23 | |
| | 24 | 測試方法: |
| | 25 | 將此程式運作在hadoop 0.20 平台上,執行: |
| | 26 | --------------------------- |
| | 27 | hadoop jar HelloHadoopV2.jar |
| | 28 | --------------------------- |
| | 29 | |
| | 30 | 注意: |
| | 31 | 1. 在hdfs 上來源檔案的路徑為 "/user/$YOUR_NAME/input" |
| | 32 | 請注意必須先放資料到此hdfs上的資料夾內,且此資料夾內只能放檔案,不可再放資料夾 |
| | 33 | 2. 運算完後,程式將執行結果放在hdfs 的輸出路徑為 "/user/$YOUR_NAME/output-hh2" |
| | 34 | }}} |
| | 35 | |
| | 36 | |
| | 37 | * 請注意以下有三個java檔案,並先compile !HelloMapperV2, !HelloReducerV2, 最後再編譯 !HelloHadoopV2.java |
| | 38 | |
| | 39 | |
| | 40 | = !HelloMapperV2.java = |
| | 41 | |
| | 42 | {{{ |
| | 43 | #!java |
| | 44 | package org.nchc.hadoop; |
| | 45 | import java.io.IOException; |
| | 46 | |
| | 47 | import org.apache.hadoop.io.LongWritable; |
| | 48 | import org.apache.hadoop.io.Text; |
| | 49 | import org.apache.hadoop.mapreduce.Mapper; |
| | 50 | |
| | 51 | public class HelloMapperV2 extends Mapper<LongWritable, Text, Text, Text> { |
| | 52 | |
| | 53 | public void map(LongWritable key, Text value, Context context) |
| | 54 | throws IOException, InterruptedException { |
| | 55 | context.write(new Text(key.toString()), value); |
| | 56 | } |
| | 57 | |
| | 58 | } |
| | 59 | |
| | 60 | }}} |
| | 61 | |
| | 62 | = !HelloReducerV2.java = |
| | 63 | |
| | 64 | {{{ |
| | 65 | #!java |
| | 66 | package org.nchc.hadoop; |
| | 67 | import java.io.IOException; |
| | 68 | |
| | 69 | import org.apache.hadoop.io.Text; |
| | 70 | import org.apache.hadoop.mapreduce.Reducer; |
| | 71 | |
| | 72 | public class HelloReducerV2 extends Reducer<Text, Text, Text, Text> { |
| | 73 | public void reduce(Text key, Iterable<Text> values, Context context) |
| | 74 | throws IOException, InterruptedException { |
| | 75 | |
| | 76 | String str = new String(""); |
| | 77 | Text final_key = new Text(); |
| | 78 | Text final_value = new Text(); |
| | 79 | // 將key值相同的values,透過 && 符號分隔之 |
| | 80 | for (Text tmp : values) { |
| | 81 | str += tmp.toString() + " &&"; |
| | 82 | } |
| | 83 | |
| | 84 | final_key.set(key); |
| | 85 | final_value.set(str); |
| | 86 | |
| | 87 | context.write(final_key, final_value); |
| | 88 | } |
| | 89 | } |
| | 90 | |
| | 91 | }}} |
| | 92 | |
| | 93 | = !HelloHadoopV2.java = |
| | 94 | |
| 25 | | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
| 26 | | |
| 27 | | // HelloHadoopV2 |
| 28 | | // 說明: |
| 29 | | // 此程式碼比HelloHadoop 增加 |
| 30 | | // * 檢查輸出資料夾是否存在並刪除 |
| 31 | | // * input 資料夾內的資料若大於兩個,則資料不會被覆蓋 |
| 32 | | // * map 與 reduce 拆開以利程式再利用 |
| 33 | | // |
| 34 | | // 測試方法: |
| 35 | | // 將此程式運作在hadoop 0.20 平台上,執行: |
| 36 | | // --------------------------- |
| 37 | | // hadoop jar HelloHadoopV2.jar |
| 38 | | // --------------------------- |
| 39 | | // |
| 40 | | // 注意: |
| 41 | | // 1. 在hdfs 上來源檔案的路徑為 "/user/$YOUR_NAME/input" |
| 42 | | // 請注意必須先放資料到此hdfs上的資料夾內,且此資料夾內只能放檔案,不可再放資料夾 |
| 43 | | // 2. 運算完後,程式將執行結果放在hdfs 的輸出路徑為 "/user/$YOUR_NAME/output-hh2" |
| 44 | | // |
| | 105 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
| 87 | | |
| 88 | | * HelloMapperV2 |
| 89 | | |
| 90 | | {{{ |
| 91 | | #!java |
| 92 | | package org.nchc.hadoop; |
| 93 | | import java.io.IOException; |
| 94 | | |
| 95 | | import org.apache.hadoop.io.LongWritable; |
| 96 | | import org.apache.hadoop.io.Text; |
| 97 | | import org.apache.hadoop.mapreduce.Mapper; |
| 98 | | |
| 99 | | public class HelloMapperV2 extends Mapper<LongWritable, Text, Text, Text> { |
| 100 | | |
| 101 | | public void map(LongWritable key, Text value, Context context) |
| 102 | | throws IOException, InterruptedException { |
| 103 | | context.write(new Text(key.toString()), value); |
| 104 | | } |
| 105 | | |
| 106 | | } |
| 107 | | |
| 108 | | }}} |
| 109 | | |
| 110 | | * HelloReducerV2 |
| 111 | | |
| 112 | | {{{ |
| 113 | | #!java |
| 114 | | package org.nchc.hadoop; |
| 115 | | import java.io.IOException; |
| 116 | | |
| 117 | | import org.apache.hadoop.io.Text; |
| 118 | | import org.apache.hadoop.mapreduce.Reducer; |
| 119 | | |
| 120 | | public class HelloReducerV2 extends Reducer<Text, Text, Text, Text> { |
| 121 | | public void reduce(Text key, Iterable<Text> values, Context context) |
| 122 | | throws IOException, InterruptedException { |
| 123 | | |
| 124 | | String str = new String(""); |
| 125 | | Text final_key = new Text(); |
| 126 | | Text final_value = new Text(); |
| 127 | | // 將key值相同的values,透過 && 符號分隔之 |
| 128 | | for (Text tmp : values) { |
| 129 | | str += tmp.toString() + " &&"; |
| 130 | | } |
| 131 | | |
| 132 | | final_key.set(key); |
| 133 | | final_value.set(str); |
| 134 | | |
| 135 | | context.write(final_key, final_value); |
| 136 | | } |
| 137 | | } |
| 138 | | |
| 139 | | }}} |