Index: sample/hadoop-0.16/tw/org/nchc/code/LogParser.java
===================================================================
--- sample/hadoop-0.16/tw/org/nchc/code/LogParser.java	(revision 30)
+++ sample/hadoop-0.16/tw/org/nchc/code/LogParser.java	(revision 30)
@@ -0,0 +1,110 @@
+package tw.org.nchc.code;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Locale;
+import java.util.StringTokenizer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+
+
+public class AccessLogParser {
+  private String ip;
+  private String protocol;
+  private String method;
+  private String url;
+  private String code;
+  private String byteSize;
+  private String referrer;
+  private String agent;
+  private long timestamp;
+
+  private static Pattern p = Pattern
+  .compile("([^ ]*) ([^ ]*) ([^ ]*) \\[([^]]*)\\] \"([^\"]*)\"" +
+                  " ([^ ]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\".*");
+  
+  
+  
+  public AccessLogParser(String line) throws ParseException, Exception{
+	 
+	 Matcher matcher = p.matcher(line);
+	 if(matcher.matches()){
+		 this.ip = matcher.group(1);
+		 // IP address of the client requesting the web page.
+		 if(isIpAddress(ip)){
+			 SimpleDateFormat sdf = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z",Locale.US);
+			 this.timestamp = sdf.parse(matcher.group(4)).getTime();
+			 String[] http = matcher.group(5).split(" ");
+			 this.method = http[0];
+			 this.url = http[1];
+			 this.protocol = http[2];
+			 this.code = matcher.group(6);
+			 this.byteSize = matcher.group(7);
+			 this.referrer = matcher.group(8);
+			 this.agent = matcher.group(9);
+		 }
+	 }
+
+
+  }
+
+  public static boolean isIpAddress(String inputString) {
+    StringTokenizer tokenizer = new StringTokenizer(inputString, ".");
+    if (tokenizer.countTokens() != 4) {
+      return false;
+    }
+    try {
+      for (int i = 0; i < 4; i++) {
+        String t = tokenizer.nextToken();
+        int chunk = Integer.parseInt(t);
+        if ((chunk & 255) != chunk) {
+          return false;
+        }
+      }
+    } catch (NumberFormatException e) {
+      return false;
+    }
+    if (inputString.indexOf("..") >= 0) {
+      return false;
+    }
+    return true;
+  }
+
+  public String getIp() {
+    return ip;
+  }
+
+  public String getProtocol() {
+    return protocol;
+  }
+
+  public String getMethod() {
+    return method;
+  }
+
+  public String getUrl() {
+    return url;
+  }
+
+  public String getCode() {
+    return code;
+  }
+
+  public String getByteSize() {
+    return byteSize;
+  }
+
+  public String getReferrer() {
+    return referrer;
+  }
+
+  public String getAgent() {
+    return agent;
+  }
+
+  public long getTimestamp() {
+    return timestamp;
+  }
+  
+}
Index: sample/hadoop-0.16/tw/org/nchc/code/LogParserGo.java
===================================================================
--- sample/hadoop-0.16/tw/org/nchc/code/LogParserGo.java	(revision 30)
+++ sample/hadoop-0.16/tw/org/nchc/code/LogParserGo.java	(revision 30)
@@ -0,0 +1,192 @@
+/**
+ * Program: LogFetcher.java
+ * Editor: Waue Chen 
+ * From :  NCHC. Taiwn
+ * Last Update Date: 07/02/2008
+ */
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package tw.org.nchc.code;
+
+import java.io.IOException;
+import java.text.ParseException;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseAdmin;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTable;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.ClusterStatus;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+// import AccessLogParser
+/**
+ * Access_log fetcher. TODO: FgnStatLog, Error_log, Access_log (Default,
+ * W3CExtended, IISw3cExtended)
+ */
+public class LogFetcher {
+	static HBaseConfiguration conf = new HBaseConfiguration();
+
+	public static final String TABLE = "table.name";
+
+	static String tableName;
+
+	static HTable table = null;
+
+	static boolean eclipseRun = false;
+
+	public static class MapClass extends MapReduceBase implements
+			Mapper<WritableComparable, Text, Text, Writable> {
+
+		@Override
+		public void configure(JobConf job) {
+			tableName = job.get(TABLE, "");
+		}
+
+		public void map(WritableComparable key, Text value,
+				OutputCollector<Text, Writable> output, Reporter reporter)
+				throws IOException {
+			try {
+				
+				AccessLogParser log = new AccessLogParser(value.toString());
+				if (table == null)
+					table = new HTable(conf, new Text(tableName));
+				long lockId = table.startUpdate(new Text(log.getIp()));
+				table.put(lockId, new Text("http:protocol"), log.getProtocol()
+						.getBytes());
+				table.put(lockId, new Text("http:method"), log.getMethod()
+						.getBytes());
+				table.put(lockId, new Text("http:code"), log.getCode()
+						.getBytes());
+				table.put(lockId, new Text("http:bytesize"), log.getByteSize()
+						.getBytes());
+				table.put(lockId, new Text("http:agent"), log.getAgent()
+						.getBytes());
+				table.put(lockId, new Text("url:" + log.getUrl()), log
+						.getReferrer().getBytes());
+				table.put(lockId, new Text("referrer:" + log.getReferrer()),
+						log.getUrl().getBytes());
+				table.commit(lockId, log.getTimestamp());
+			} catch (ParseException e) {
+				e.printStackTrace();
+			} catch (Exception e) {
+				e.printStackTrace();
+			}
+		}
+	}
+//	 do it to resolve warning : FileSystem.listPaths 
+	static public Path[] listPaths(FileSystem fsm,Path path) throws IOException
+	{
+		FileStatus[] fss = fsm.listStatus(path);
+		int length = fss.length;
+		Path[] pi = new Path[length];
+		for (int i=0 ; i< length; i++)
+		{
+			pi[i] = fss[i].getPath();
+		}
+		return pi;
+	}	
+	public static void runMapReduce(String table, String dir)
+			throws IOException {
+		Path tempDir = new Path("log/temp");
+		Path InputDir = new Path(dir);
+		FileSystem fs = FileSystem.get(conf);
+		JobConf jobConf = new JobConf(conf, LogFetcher.class);
+		jobConf.setJobName("apache log fetcher");
+		jobConf.set(TABLE, table);
+		Path[] in = listPaths(fs, InputDir);
+		if (fs.isFile(InputDir)) {
+			jobConf.setInputPath(InputDir);
+		} else {
+			for (int i = 0; i < in.length; i++) {
+				if (fs.isFile(in[i])) {
+					jobConf.addInputPath(in[i]);
+				} else {
+					Path[] sub = listPaths(fs, in[i]);
+					for (int j = 0; j < sub.length; j++) {
+						if (fs.isFile(sub[j])) {
+							jobConf.addInputPath(sub[j]);
+						}
+					}
+				}
+			}
+		}
+		jobConf.setOutputPath(tempDir);
+		
+		jobConf.setMapperClass(MapClass.class);
+
+		JobClient client = new JobClient(jobConf);
+		ClusterStatus cluster = client.getClusterStatus();
+		jobConf.setNumMapTasks(cluster.getMapTasks());
+		jobConf.setNumReduceTasks(0);
+
+		JobClient.runJob(jobConf);
+
+		fs.delete(tempDir);		
+		fs.close();
+	}
+
+	public static void creatTable(String table) throws IOException {
+		HBaseAdmin admin = new HBaseAdmin(conf);
+		if (!admin.tableExists(new Text(table))) {
+			System.out.println("1. " + table
+					+ " table creating ... please wait");
+			HTableDescriptor tableDesc = new HTableDescriptor(table);
+			tableDesc.addFamily(new HColumnDescriptor("http:"));
+			tableDesc.addFamily(new HColumnDescriptor("url:"));
+			tableDesc.addFamily(new HColumnDescriptor("referrer:"));
+			admin.createTable(tableDesc);
+		} else {
+			System.out.println("1. " + table + " table already exists.");
+		}
+		System.out.println("2. access_log files fetching using map/reduce");
+	}
+
+	public static void main(String[] args) throws IOException {
+		String table_name = "log";
+		String dir = "apache-log";
+
+		if (eclipseRun) {
+			table_name = "log";
+			dir = "apache-log";
+		} else if (args.length < 2) {
+			System.out
+					.println("Usage: logfetcher <access_log file or directory> <table_name>");
+			System.exit(1);
+		} else {
+			table_name = args[1];
+			dir = args[0];
+		}
+		creatTable(table_name);
+		runMapReduce(table_name, dir);
+
+	}
+
+}