| Version 4 (modified by waue, 17 years ago) (diff) | 
|---|
目的
This program will parse your apache log and store it into Hbase.
如何使用
- 1. Upload apache logs ( /var/log/apache2/access.log* ) to hdfs (default: /user/waue/apache-log) \
 
$ bin/hadoop dfs -put /var/log/apache2/ apache-log
- 2. parameter "dir" in main contains the logs.
 
- 3. you should filter the exception contents manually, 
ex: ::1 - - [29/Jun/2008:07:35:15 +0800] "GET / HTTP/1.0" 200 729 "...
 
結果
1 執行以下指令
hql > select * from apache-log;2 結果
+-------------------------+-------------------------+-------------------------+ | Row | Column | Cell | +-------------------------+-------------------------+-------------------------+ | 118.170.101.250 | http:agent | Mozilla/4.0 (compatible;| | | | MSIE 4.01; Windows 95) | +-------------------------+-------------------------+-------------------------+ | 118.170.101.250 | http:bytesize | 318 | +-------------------------+-------------------------+-------------------------+ ..........(skip)........ +-------------------------+-------------------------+-------------------------+ | 87.65.93.58 | http:method | OPTIONS | +-------------------------+-------------------------+-------------------------+ | 87.65.93.58 | http:protocol | HTTP/1.1 | +-------------------------+-------------------------+-------------------------+ | 87.65.93.58 | referrer:- | * | +-------------------------+-------------------------+-------------------------+ | 87.65.93.58 | url:* | - | +-------------------------+-------------------------+-------------------------+ 31 row(s) in set. (0.58 sec)
LogParser.java
package tw.org.nchc.code;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Locale;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class LogParser {
  private String ip;
  private String protocol;
  private String method;
  private String url;
  private String code;
  private String byteSize;
  private String referrer;
  private String agent;
  private long timestamp;
  private static Pattern p = Pattern
  .compile("([^ ]*) ([^ ]*) ([^ ]*) \\[([^]]*)\\] \"([^\"]*)\"" +
                  " ([^ ]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\".*");
  public LogParser(String line) throws ParseException, Exception{ 
	 Matcher matcher = p.matcher(line);
	 if(matcher.matches()){
		 this.ip = matcher.group(1);
		 // IP address of the client requesting the web page.
		 if(isIpAddress(ip)){
			 SimpleDateFormat sdf = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z",Locale.US);
			 this.timestamp = sdf.parse(matcher.group(4)).getTime();
			 String[] http = matcher.group(5).split(" ");
			 this.method = http[0];
			 this.url = http[1];
			 this.protocol = http[2];
			 this.code = matcher.group(6);
			 this.byteSize = matcher.group(7);
			 this.referrer = matcher.group(8);
			 this.agent = matcher.group(9);
		 }
	 }
  }
  public static boolean isIpAddress(String inputString) {
    StringTokenizer tokenizer = new StringTokenizer(inputString, ".");
    if (tokenizer.countTokens() != 4) {
      return false;
    }
    try {
      for (int i = 0; i < 4; i++) {
        String t = tokenizer.nextToken();
        int chunk = Integer.parseInt(t);
        if ((chunk & 255) != chunk) {
          return false;
        }
      }
    } catch (NumberFormatException e) {
      return false;
    }
    if (inputString.indexOf("..") >= 0) {
      return false;
    }
    return true;
  }
  public String getIp() {
    return ip;
  }
  public String getProtocol() {
    return protocol;
  }
  public String getMethod() {
    return method;
  }
  public String getUrl() {
    return url;
  }
  public String getCode() {
    return code;
  }
  public String getByteSize() {
    return byteSize;
  }
  public String getReferrer() {
    return referrer;
  }
  public String getAgent() {
    return agent;
  }
  public long getTimestamp() {
    return timestamp;
  }
}
