Context Navigation

Changes between Version 1 and Version 2 of NCHCCloudCourse100929_4_HBEX6

Timestamp:: Sep 28, 2010, 7:55:47 PM (15 years ago)
Author:: waue
Comment:: --

Legend:

: Unmodified
: Added
: Removed
: Modified

NCHCCloudCourse100929_4_HBEX6

-                      v1
+                      v2
 {{{
 #!java
 package org.nchc.hbase;
 …
 public class CountToHBaseReducer {
         public static class HtMap extends
                         Mapper<LongWritable, Text, Text, IntWritable> {
                 private IntWritable one = new IntWritable(1);
+  public static class HtMap extends
+      Mapper<LongWritable, Text, Text, IntWritable> {
+    private IntWritable one = new IntWritable(1);
                 public void map(LongWritable key, Text value, Context context)
                                 throws IOException, InterruptedException {
                         // 輸入的字串先轉換成小寫再用空白區隔
                         String s[] = value.toString().toLowerCase().trim().split(" ");
                         for (String m : s) {
                                 // 寫入到輸出串流
                                 context.write(new Text(m), one);
+                        }
+                }
+        }
+    public void map(LongWritable key, Text value, Context context)
+        throws IOException, InterruptedException {
+      // 輸入的字串先轉換成小寫再用空白區隔
+      String s[] = value.toString().toLowerCase().trim().split(" ");
+      for (String m : s) {
+        // 寫入到輸出串流
+        context.write(new Text(m), one);
+      }
+    }
+  }
+        // TableReducer<KEYIN,VALUEIN,KEYOUT>
+        // 原本為 TableReducer<Text, IntWritable, NullWritable >
+        // 但在此改成 LongWritable 也可以
+        // 因此證明在此的Class可以很多,org.apache.hadoop.io.* 內有write()的Writable class應該皆可
+        public static class HtReduce extends
+                        TableReducer<Text, IntWritable, LongWritable> {
+  public static class HtReduce extends
+      TableReducer<Text, IntWritable, LongWritable> {
                 public void reduce(Text key, Iterable<IntWritable> values,
                                 Context context) throws IOException, InterruptedException {
                         int sum = 0;
                         for (IntWritable i : values) {
                                 sum += i.get();
+                        }
+    public void reduce(Text key, Iterable<IntWritable> values,
+        Context context) throws IOException, InterruptedException {
+      int sum = 0;
+      for (IntWritable i : values) {
+        sum += i.get();
+      }
+                        // org.apache.hadoop.hbase.client.Put
+                        // Used to perform Put operations for a single row.
+                        // new Put(byte[] row)
+                        Put put = new Put(Bytes.toBytes(key.toString()));
+      Put put = new Put(Bytes.toBytes(key.toString()));
+                        // add(byte[] family, byte[] qualifier, byte[] value)
+                        // 在main設定output format class 為 TableOutputFormat
+                        // TableReducer 內有定義 output Key class 必須為 Put 或 Delete
+                        put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes
+                                        .toBytes(String.valueOf(sum)));
+      put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes
+          .toBytes(String.valueOf(sum)));
+                        // NullWritable.get(): Returns the single instance of this class.
+                        // NullWritable.write(): Serialize the fields of this object to out.
+                        context.write(new LongWritable(), put);
+                        // context.write(NullWritable.get(), put)
+                }
+        }
+      context.write(new LongWritable(), put);
+    }
+  }
+        public static void main(String args[]) throws Exception {
+                // debug
+                String[] argv = { "/user/waue/input" };
+                args = argv;
+                String input = args[0];
+  public static void main(String args[]) throws Exception {
+    // eclipse
+    String[] argv = { "/user/hadoop/input" };
+    args = argv;
-                String tablename = "wordcount";
-                String family = "content";
+                Configuration conf = new Configuration();
+                // OUTPUT_TABLE = "hbase.mapred.outputtable"
+                // conf.set 用於設定 如 core-site.xml 的 name 與 value
+                // 告訴程式 hbase.mapred.outputtable --> wordcount
+                conf.set(TableOutputFormat.OUTPUT_TABLE, tablename);
+                // 建立hbase 的table 否則沒先建立會出錯
+                CreateTable.createHBaseTable(tablename, family);
+    String tablename = "wordcount";
+    String family = "content";
                 Job job = new Job(conf, "WordCount table with " + input);
+    Configuration conf = new Configuration();
+                job.setJarByClass(CountToHBaseReducer.class);
+    conf.set(TableOutputFormat.OUTPUT_TABLE, tablename);
+    // 建立hbase 的table 否則沒先建立會出錯
+    CreateTable.createHBaseTable(tablename, family);
+                job.setMapperClass(HtMap.class);
+                job.setReducerClass(HtReduce.class);
+                // 此範例的輸出為 <Text,IntWritable> 因此其實可以省略宣告
+                // set{Map|Reduce}Output{Key|Value}Class()
+                job.setMapOutputKeyClass(Text.class);
+                job.setMapOutputValueClass(IntWritable.class);
+                // InputFormat 只有一個子介面
+                // FileInputFormat <-(SequenceFileInputFormat,TextInputFormat)
+                // 其中TextInputFormat 最常用 ，預設輸入為 LongWritable,Text
+                // 另外HBase 則設計了一個子類別 TableInputFormat
+                job.setInputFormatClass(TextInputFormat.class);
+                // TAbleOutputFormat
+                // 宣告此行則可使 reduce 輸出為 HBase 的table
+                job.setOutputFormatClass(TableOutputFormat.class);
+    Job job = new Job(conf, "WordCount table with " + args[0]);
+                // 原本設定輸入檔案為 Config.setInputPath(Path) 卻改為
+                // FileInputFormat.addInputPath(Job, Path()) 的設計，
+                // 猜測應該是考慮某些檔案操作並不需要跑mapreduce的Job，因此提到外面
+                FileInputFormat.addInputPath(job, new Path(input));
+    job.setJarByClass(CountToHBaseReducer.class);
+                System.exit(job.waitForCompletion(true) ? 0 : 1);
+        }
+    job.setMapperClass(HtMap.class);
+    job.setReducerClass(HtReduce.class);
+    // 此範例的輸出為 <Text,IntWritable> 因此其實可以省略宣告
+    // set{Map|Reduce}Output{Key|Value}Class()
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(IntWritable.class);
+    // InputFormat 只有一個子介面
+    // FileInputFormat <-(SequenceFileInputFormat,TextInputFormat)
+    // 其中TextInputFormat 最常用 ，預設輸入為 LongWritable,Text
+    // 另外HBase 則設計了一個子類別 TableInputFormat
+    job.setInputFormatClass(TextInputFormat.class);
+    // TAbleOutputFormat
+    // 宣告此行則可使 reduce 輸出為 HBase 的table
+    job.setOutputFormatClass(TableOutputFormat.class);
+    FileInputFormat.addInputPath(job, new Path(args[0]));
+    System.exit(job.waitForCompletion(true) ? 0 : 1);
+  }
+}
 }}}
  == 參考： ==
+= 執行測試 =
+.程式碼改編於： http://blog.ring.idv.tw/comment.ser?i=337
+.hbase 運作 mapreduce 程式的方法參考於：http://wiki.apache.org/hadoop/Hbase/MapReduce
+{{{
+$ /opt/hbase/bin/hbase shell
+hbase(main):x:0> list
+wordcount
+row(s) in 0.0240 seconds
+hbase(main):x:0> scan 'wordcount'
+.....
+ zeller                      column=content:count, timestamp=1285674576293, value=1
+ zero                        column=content:count, timestamp=1285674576293, value=8
+ zero,                       column=content:count, timestamp=1285674576293, value=2
+ zero-compressed             column=content:count, timestamp=1285674576293, value=1
+.....
+hbase(main):x:0> exit
+}}}