Context Navigation

source: sample/WordCount.java @ 18

Last change on this file since 18 was 18, checked in by waue, 17 years ago
upgrade 0.16 to 0.17
File size: 3.7 KB

Rev	Line
[9]	1	/**
	2	* Program: WordCount.java
	3	* Editor: Waue Chen
	4	* From : NCHC. Taiwn
[18]	5	* Last Update Date: 07/02/2008
	6	* Upgrade to 0.17
[7]	7	*/
[9]	8
	9	/**
	10	* Purpose :
	11	* Store the result of WordCount.java from Hbase to Hadoop file system
	12	*
	13	* HowToUse :
	14	* Make sure Hadoop file system is running correctly.
	15	* Put text file on the directory "/local_src/input"
	16	* You can use the instruction to upload "/local_src/input" to HDFS input dir
	17	* $ bin/hadoop dfs -put /local_src/input input
	18	* Then modify the $filepath parameter in construtor to be correct and run this code.
	19	*
	20	*
	21	* Check Result:
	22	* inspect http://localhost:50070 by web explorer
	23	*/
[8]	24	package tw.org.nchc.code;
[7]	25
	26	import java.io.IOException;
	27	import java.util.Iterator;
	28	import java.util.StringTokenizer;
	29
	30	import org.apache.hadoop.fs.FileSystem;
	31	import org.apache.hadoop.fs.Path;
	32	import org.apache.hadoop.io.IntWritable;
	33	import org.apache.hadoop.io.LongWritable;
	34	import org.apache.hadoop.io.Text;
	35	import org.apache.hadoop.mapred.JobClient;
	36	import org.apache.hadoop.mapred.JobConf;
	37	import org.apache.hadoop.mapred.MapReduceBase;
	38	import org.apache.hadoop.mapred.Mapper;
	39	import org.apache.hadoop.mapred.OutputCollector;
	40	import org.apache.hadoop.mapred.Reducer;
	41	import org.apache.hadoop.mapred.Reporter;
	42
	43	public class WordCount {
[9]	44	private String filepath;
[18]	45
[9]	46	private String outputPath;
[18]	47
	48	public WordCount() {
[9]	49	filepath = "/user/waue/input/";
	50	outputPath = "counts1";
	51	}
[18]	52
	53	public WordCount(String path, String output) {
[9]	54	filepath = path;
	55	outputPath = output;
	56	}
[18]	57
[7]	58	// mapper: emits (token, 1) for every word occurrence
	59	private static class MapClass extends MapReduceBase implements
	60	Mapper<LongWritable, Text, Text, IntWritable> {
	61
	62	// reuse objects to save overhead of object creation
	63	private final static IntWritable one = new IntWritable(1);
[18]	64
[7]	65	private Text word = new Text();
	66
	67	public void map(LongWritable key, Text value,
	68	OutputCollector<Text, IntWritable> output, Reporter reporter)
	69	throws IOException {
	70	String line = ((Text) value).toString();
	71	StringTokenizer itr = new StringTokenizer(line);
	72	while (itr.hasMoreTokens()) {
	73	word.set(itr.nextToken());
	74	output.collect(word, one);
	75	}
	76	}
	77	}
	78
	79	// reducer: sums up all the counts
	80	private static class ReduceClass extends MapReduceBase implements
	81	Reducer<Text, IntWritable, Text, IntWritable> {
	82
	83	// reuse objects
	84	private final static IntWritable SumValue = new IntWritable();
	85
	86	public void reduce(Text key, Iterator<IntWritable> values,
	87	OutputCollector<Text, IntWritable> output, Reporter reporter)
	88	throws IOException {
	89	// sum up values
	90	int sum = 0;
	91	while (values.hasNext()) {
	92	sum += values.next().get();
	93	}
	94	SumValue.set(sum);
	95	output.collect(key, SumValue);
	96	}
	97	}
	98
	99	/**
	100	* Runs the demo.
	101	*/
	102	public static void main(String[] args) throws IOException {
[9]	103	WordCount wc = new WordCount();
[18]	104
[9]	105	int mapTasks = 1;
[7]	106	int reduceTasks = 1;
	107	JobConf conf = new JobConf(WordCount.class);
	108	conf.setJobName("wordcount");
	109
	110	conf.setNumMapTasks(mapTasks);
	111	conf.setNumReduceTasks(reduceTasks);
[18]	112	// 0.16
	113	// conf.setInputPath(new Path(wc.filepath));
	114	Convert.setInputPath(conf, new Path(wc.filepath));
[7]	115	conf.setOutputKeyClass(Text.class);
	116	conf.setOutputValueClass(IntWritable.class);
[18]	117	// 0.16
	118	// conf.setOutputPath(new Path(wc.outputPath));
	119	Convert.setOutputPath(conf, new Path(wc.outputPath));
[7]	120
	121	conf.setMapperClass(MapClass.class);
	122	conf.setCombinerClass(ReduceClass.class);
	123	conf.setReducerClass(ReduceClass.class);
[18]	124
[7]	125	// Delete the output directory if it exists already
[9]	126	Path outputDir = new Path(wc.outputPath);
[18]	127	// 0.16
	128	FileSystem.get(conf).delete(outputDir,true);
[7]	129
	130	JobClient.runJob(conf);
	131	}
	132	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/WordCount.java @ 18

Download in other formats: