Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

WordCountIntoHBase.java

Last change on this file was 31, checked in by waue, 17 years ago
update some new ..
File size: 3.7 KB

Line
1	/**
2	* Program: WordCountIntoHBase.java
3	* Editor: Waue Chen
4	* From : NCHC. Taiwn
5	* Last Update Date: 07/02/2008
6	*/
7
8	/**
9	* Purpose :
10	* Store every line from $Input_Path to HBase
11	*
12	* HowToUse :
13	* Make sure Hadoop file system and HBase are running correctly.
14	* Use Hadoop instruction to add input-text-files to $Input_Path.
15	* ($ bin/hadoop dfs -put local_dir hdfs_dir)
16	* Then run the program with BuildHTable.java after \
17	* modifying these setup parameters.
18	*
19	* Check Result :
20	* View the result by hbase instruction (hql> select * from $Table_Name).
21	* Or run WordCountFromHBase.java then inspect http://localhost:60070 by web explorer;
22	*/
23
24	package tw.org.nchc.code;
25
26	import java.io.IOException;
27	import java.util.Iterator;
28
29	import org.apache.hadoop.fs.Path;
30	import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
31	import org.apache.hadoop.hbase.mapred.TableReduce;
32	import org.apache.hadoop.io.LongWritable;
33	import org.apache.hadoop.io.MapWritable;
34	import org.apache.hadoop.io.Text;
35	import org.apache.hadoop.mapred.JobClient;
36	import org.apache.hadoop.mapred.JobConf;
37	import org.apache.hadoop.mapred.OutputCollector;
38	import org.apache.hadoop.mapred.Reporter;
39	import org.apache.hadoop.mapred.lib.IdentityMapper;
40	import org.apache.hadoop.mapred.lib.IdentityReducer;
41
42	public class WordCountIntoHBase {
43
44	/* setup parameters */
45	// $Input_Path. Please make sure the path is correct and contains input
46	// files
47	static final String Input_Path = "/user/waue/input";
48
49	// Hbase table name, the program will create it
50	static final String Table_Name = "word_count5";
51
52	// column name, the program will create it
53	static final String colstr = "word:text";
54
55	// constructor
56	private WordCountIntoHBase() {
57	}
58
59	private static class ReduceClass extends TableReduce<LongWritable, Text> {
60	// set (column_family:column_qualify)
61	private static final Text col = new Text(WordCountIntoHBase.colstr);
62
63	// this map holds the columns per row
64	private MapWritable map = new MapWritable();
65
66	public void reduce(LongWritable key, Iterator<Text> values,
67	OutputCollector<Text, MapWritable> output, Reporter reporter)
68	throws IOException {
69	// contents must be ImmutableBytesWritable
70	ImmutableBytesWritable bytes = new ImmutableBytesWritable(values
71	.next().getBytes());
72	map.clear();
73	// write data
74	map.put(col, bytes);
75	// add the row with the key as the row id
76	output.collect(new Text(key.toString()), map);
77	}
78	}
79
80	/**
81	* Runs the demo.
82	*/
83	public static void main(String[] args) throws IOException {
84	// parse colstr to split column family and column qualify
85	String tmp[] = colstr.split(":");
86	String Column_Family = tmp[0] + ":";
87	String CF[] = { Column_Family };
88	// check whether create table or not , we don't admit \
89	// the same name but different structure
90	BuildHTable build_table = new BuildHTable(Table_Name, CF);
91	if (!build_table.checkTableExist(Table_Name)) {
92	if (!build_table.createTable()) {
93	System.out.println("create table error !");
94	}
95	} else {
96	System.out.println("Table \"" + Table_Name
97	+ "\" has already existed !");
98	}
99	int mapTasks = 1;
100	int reduceTasks = 1;
101	JobConf conf = new JobConf(WordCountIntoHBase.class);
102	conf.setJobName(Table_Name);
103
104	// must initialize the TableReduce before running job
105	TableReduce.initJob(Table_Name, ReduceClass.class, conf);
106	conf.setNumMapTasks(mapTasks);
107	conf.setNumReduceTasks(reduceTasks);
108	conf.setInputPath(new Path(Input_Path));
109	conf.setMapperClass(IdentityMapper.class);
110	conf.setCombinerClass(IdentityReducer.class);
111	conf.setReducerClass(ReduceClass.class);
112
113	JobClient.runJob(conf);
114	}
115	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/hadoop-0.16/tw/org/nchc/code/WordCountIntoHBase.java

Download in other formats: