| 59 | | public static class HtMap extends |
| 60 | | Mapper<LongWritable, Text, Text, IntWritable> { |
| 61 | | private IntWritable one = new IntWritable(1); |
| | 60 | public static class HtMap extends |
| | 61 | Mapper<LongWritable, Text, Text, IntWritable> { |
| | 62 | private IntWritable one = new IntWritable(1); |
| 63 | | public void map(LongWritable key, Text value, Context context) |
| 64 | | throws IOException, InterruptedException { |
| 65 | | // 輸入的字串先轉換成小寫再用空白區隔 |
| 66 | | String s[] = value.toString().toLowerCase().trim().split(" "); |
| 67 | | for (String m : s) { |
| 68 | | // 寫入到輸出串流 |
| 69 | | context.write(new Text(m), one); |
| 70 | | } |
| 71 | | } |
| 72 | | } |
| | 64 | public void map(LongWritable key, Text value, Context context) |
| | 65 | throws IOException, InterruptedException { |
| | 66 | // 輸入的字串先轉換成小寫再用空白區隔 |
| | 67 | String s[] = value.toString().toLowerCase().trim().split(" "); |
| | 68 | for (String m : s) { |
| | 69 | // 寫入到輸出串流 |
| | 70 | context.write(new Text(m), one); |
| | 71 | } |
| | 72 | } |
| | 73 | } |
| 74 | | // TableReducer<KEYIN,VALUEIN,KEYOUT> |
| 75 | | // 原本為 TableReducer<Text, IntWritable, NullWritable > |
| 76 | | // 但在此改成 LongWritable 也可以 |
| 77 | | // 因此證明在此的Class可以很多,org.apache.hadoop.io.* 內有write()的Writable class應該皆可 |
| 78 | | public static class HtReduce extends |
| 79 | | TableReducer<Text, IntWritable, LongWritable> { |
| | 75 | public static class HtReduce extends |
| | 76 | TableReducer<Text, IntWritable, LongWritable> { |
| 81 | | public void reduce(Text key, Iterable<IntWritable> values, |
| 82 | | Context context) throws IOException, InterruptedException { |
| 83 | | int sum = 0; |
| 84 | | for (IntWritable i : values) { |
| 85 | | sum += i.get(); |
| 86 | | } |
| | 78 | public void reduce(Text key, Iterable<IntWritable> values, |
| | 79 | Context context) throws IOException, InterruptedException { |
| | 80 | int sum = 0; |
| | 81 | for (IntWritable i : values) { |
| | 82 | sum += i.get(); |
| | 83 | } |
| 93 | | // add(byte[] family, byte[] qualifier, byte[] value) |
| 94 | | // 在main設定output format class 為 TableOutputFormat |
| 95 | | // TableReducer 內有定義 output Key class 必須為 Put 或 Delete |
| 96 | | put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes |
| 97 | | .toBytes(String.valueOf(sum))); |
| | 87 | put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes |
| | 88 | .toBytes(String.valueOf(sum))); |
| 115 | | Configuration conf = new Configuration(); |
| 116 | | // OUTPUT_TABLE = "hbase.mapred.outputtable" |
| 117 | | // conf.set 用於設定 如 core-site.xml 的 name 與 value |
| 118 | | // 告訴程式 hbase.mapred.outputtable --> wordcount |
| 119 | | conf.set(TableOutputFormat.OUTPUT_TABLE, tablename); |
| 120 | | // 建立hbase 的table 否則沒先建立會出錯 |
| 121 | | CreateTable.createHBaseTable(tablename, family); |
| | 100 | String tablename = "wordcount"; |
| | 101 | String family = "content"; |
| 127 | | job.setMapperClass(HtMap.class); |
| 128 | | job.setReducerClass(HtReduce.class); |
| 129 | | // 此範例的輸出為 <Text,IntWritable> 因此其實可以省略宣告 |
| 130 | | // set{Map|Reduce}Output{Key|Value}Class() |
| 131 | | job.setMapOutputKeyClass(Text.class); |
| 132 | | job.setMapOutputValueClass(IntWritable.class); |
| 133 | | // InputFormat 只有一個子介面 |
| 134 | | // FileInputFormat <-(SequenceFileInputFormat,TextInputFormat) |
| 135 | | // 其中TextInputFormat 最常用 ,預設輸入為 LongWritable,Text |
| 136 | | // 另外HBase 則設計了一個子類別 TableInputFormat |
| 137 | | job.setInputFormatClass(TextInputFormat.class); |
| 138 | | // TAbleOutputFormat |
| 139 | | // 宣告此行則可使 reduce 輸出為 HBase 的table |
| 140 | | job.setOutputFormatClass(TableOutputFormat.class); |
| | 109 | Job job = new Job(conf, "WordCount table with " + args[0]); |
| 147 | | System.exit(job.waitForCompletion(true) ? 0 : 1); |
| 148 | | } |
| | 113 | job.setMapperClass(HtMap.class); |
| | 114 | job.setReducerClass(HtReduce.class); |
| | 115 | // 此範例的輸出為 <Text,IntWritable> 因此其實可以省略宣告 |
| | 116 | // set{Map|Reduce}Output{Key|Value}Class() |
| | 117 | job.setMapOutputKeyClass(Text.class); |
| | 118 | job.setMapOutputValueClass(IntWritable.class); |
| | 119 | // InputFormat 只有一個子介面 |
| | 120 | // FileInputFormat <-(SequenceFileInputFormat,TextInputFormat) |
| | 121 | // 其中TextInputFormat 最常用 ,預設輸入為 LongWritable,Text |
| | 122 | // 另外HBase 則設計了一個子類別 TableInputFormat |
| | 123 | job.setInputFormatClass(TextInputFormat.class); |
| | 124 | // TAbleOutputFormat |
| | 125 | // 宣告此行則可使 reduce 輸出為 HBase 的table |
| | 126 | job.setOutputFormatClass(TableOutputFormat.class); |
| | 127 | |
| | 128 | FileInputFormat.addInputPath(job, new Path(args[0])); |
| | 129 | |
| | 130 | System.exit(job.waitForCompletion(true) ? 0 : 1); |
| | 131 | } |
| 154 | | 1.程式碼改編於: http://blog.ring.idv.tw/comment.ser?i=337 |
| 155 | | |
| 156 | | 2.hbase 運作 mapreduce 程式的方法參考於:http://wiki.apache.org/hadoop/Hbase/MapReduce |
| 157 | | |
| | 138 | {{{ |
| | 139 | $ /opt/hbase/bin/hbase shell |
| | 140 | hbase(main):x:0> list |
| | 141 | wordcount |
| | 142 | 1 row(s) in 0.0240 seconds |
| | 143 | hbase(main):x:0> scan 'wordcount' |
| | 144 | ..... |
| | 145 | zeller column=content:count, timestamp=1285674576293, value=1 |
| | 146 | zero column=content:count, timestamp=1285674576293, value=8 |
| | 147 | zero, column=content:count, timestamp=1285674576293, value=2 |
| | 148 | zero-compressed column=content:count, timestamp=1285674576293, value=1 |
| | 149 | ..... |
| | 150 | hbase(main):x:0> exit |
| | 151 | }}} |