| | 29 | |
| | 30 | || 說明 || 路徑 || 擁有者身份 || |
| | 31 | || nutchez 家目錄 || /opt/nutchez/ || nutchuser || |
| | 32 | || nutch 家目錄 || /opt/nutchez/nutch || nutchuser || |
| | 33 | || nutch 工作目錄 || /var/nutchez/nutch-nutchuser || nutchuser || |
| | 34 | || nutch 日誌檔 || /var/nutchez/logs || nutchuser || |
| | 35 | || nutch 設定檔 || /opt/nutchez/nutch/conf || nutchuser || |
| | 36 | || tomcat 家目錄 || /opt/nutchez/tomcat || nutchuser || |
| | 37 | || nutchez 使用者目錄 || /home/nutchuser/nutchez/ || nutchuser || |
| | 38 | || nutchez 索引資料庫 || /home/nutchuser/nutchez/search/ || 由nutch完成crawl後產生 || |
| | 39 | |
| | 40 | * 修改 /opt/nutchez/nutch/conf/ 的 hadoop-site.xml |
| | 41 | {{{ |
| | 42 | #!xml |
| | 43 | <configuration> |
| | 44 | <property> |
| | 45 | <name>fs.default.name</name> |
| | 46 | <value>hdfs://secuse.nchc.org.tw:9000</value> |
| | 47 | </property> |
| | 48 | <property> |
| | 49 | <name>mapred.job.tracker</name> |
| | 50 | <value>secuse.nchc.org.tw:9001</value> |
| | 51 | </property> |
| | 52 | <property> |
| | 53 | <name>hadoop.tmp.dir</name> |
| | 54 | <value>/var/nutchez/nutch-nutchuser</value> |
| | 55 | </property> |
| | 56 | </configuration> |
| | 57 | }}} |
| | 58 | |
| | 59 | * 改tomcat port => /opt/nutchez/tomcat/conf/ 的 server.xml |
| | 60 | |
| | 61 | {{{ |
| | 62 | #!xml |
| | 63 | <Connector port="8080" protocol="HTTP/1.1" |
| | 64 | connectionTimeout="20000" |
| | 65 | redirectPort="8443" URIEncoding="UTF-8" |
| | 66 | useBodyEncodingForURI="true" /> |
| | 67 | }}} |
| | 68 | |
| | 69 | * 最後的搜尋結果 => /opt/nutchez/tomcat/webapps/ROOT/WEB-INF/classes/ 的 nutch-site.xml |
| | 70 | |
| | 71 | {{{ |
| | 72 | #!xml |
| | 73 | <configuration> |
| | 74 | <property> |
| | 75 | <name>searcher.dir</name> |
| | 76 | <value>/home/nutchuser/nutchez/search</value> |
| | 77 | </property> |
| | 78 | </configuration> |
| | 79 | }}} |
| | 80 | |
| | 81 | * /opt/nutchez/nutch/bin/nutch 執行檔有改 |
| | 82 | |
| | 83 | {{{ |
| | 84 | #!sh |
| | 85 | NUTCH_HOME=/opt/nutchez/nutch |
| | 86 | NUTCH_CONF_DIR=/opt/nutchez/nutch/conf |
| | 87 | NUTCH_LOG_DIR=/var/nutchez/logs |
| | 88 | }}} |
| | 89 | |
| | 90 | * 用 改版的 nutchez 的 hadoop 還是要format 與 start-all.sh |
| | 91 | |
| | 92 | |