| Version 3 (modified by waue, 17 years ago) (diff) |
|---|
目的
This program will parse your apache log and store it into Hbase.
如何使用
- 1. Upload apache logs ( /var/log/apache2/access.log* ) to hdfs (default: /user/waue/apache-log) \
$ bin/hadoop dfs -put /var/log/apache2/ apache-log
- 2. parameter "dir" in main contains the logs.
- 3. you should filter the exception contents manually,
{{{ ex: ::1 - - [29/Jun/2008:07:35:15 +0800] "GET / HTTP/1.0" 200 729 "...
}}}
}}}
結果
1 執行以下指令
hql > select * from apache-log;2 結果
+-------------------------+-------------------------+-------------------------+ | Row | Column | Cell | +-------------------------+-------------------------+-------------------------+ | 118.170.101.250 | http:agent | Mozilla/4.0 (compatible;| | | | MSIE 4.01; Windows 95) | +-------------------------+-------------------------+-------------------------+ | 118.170.101.250 | http:bytesize | 318 | +-------------------------+-------------------------+-------------------------+ ..........(skip)........ +-------------------------+-------------------------+-------------------------+ | 87.65.93.58 | http:method | OPTIONS | +-------------------------+-------------------------+-------------------------+ | 87.65.93.58 | http:protocol | HTTP/1.1 | +-------------------------+-------------------------+-------------------------+ | 87.65.93.58 | referrer:- | * | +-------------------------+-------------------------+-------------------------+ | 87.65.93.58 | url:* | - | +-------------------------+-------------------------+-------------------------+ 31 row(s) in set. (0.58 sec)
LogParser.java
package tw.org.nchc.code;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Locale;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class LogParser {
private String ip;
private String protocol;
private String method;
private String url;
private String code;
private String byteSize;
private String referrer;
private String agent;
private long timestamp;
private static Pattern p = Pattern
.compile("([^ ]*) ([^ ]*) ([^ ]*) \\[([^]]*)\\] \"([^\"]*)\"" +
" ([^ ]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\".*");
public LogParser(String line) throws ParseException, Exception{
Matcher matcher = p.matcher(line);
if(matcher.matches()){
this.ip = matcher.group(1);
// IP address of the client requesting the web page.
if(isIpAddress(ip)){
SimpleDateFormat sdf = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z",Locale.US);
this.timestamp = sdf.parse(matcher.group(4)).getTime();
String[] http = matcher.group(5).split(" ");
this.method = http[0];
this.url = http[1];
this.protocol = http[2];
this.code = matcher.group(6);
this.byteSize = matcher.group(7);
this.referrer = matcher.group(8);
this.agent = matcher.group(9);
}
}
}
public static boolean isIpAddress(String inputString) {
StringTokenizer tokenizer = new StringTokenizer(inputString, ".");
if (tokenizer.countTokens() != 4) {
return false;
}
try {
for (int i = 0; i < 4; i++) {
String t = tokenizer.nextToken();
int chunk = Integer.parseInt(t);
if ((chunk & 255) != chunk) {
return false;
}
}
} catch (NumberFormatException e) {
return false;
}
if (inputString.indexOf("..") >= 0) {
return false;
}
return true;
}
public String getIp() {
return ip;
}
public String getProtocol() {
return protocol;
}
public String getMethod() {
return method;
}
public String getUrl() {
return url;
}
public String getCode() {
return code;
}
public String getByteSize() {
return byteSize;
}
public String getReferrer() {
return referrer;
}
public String getAgent() {
return agent;
}
public long getTimestamp() {
return timestamp;
}
}
