1 | #!/bin/bash |
---|
2 | source install_lang |
---|
3 | ####### garbage here ############# |
---|
4 | function mainFunction ( ) |
---|
5 | { |
---|
6 | echo "$Good" |
---|
7 | } |
---|
8 | function braBraBra ( ) |
---|
9 | { |
---|
10 | echo "$Bra_Bra_Bra" |
---|
11 | } |
---|
12 | ####### garbage end ############### |
---|
13 | |
---|
14 | |
---|
15 | |
---|
16 | ####### fafa code here ########### |
---|
17 | |
---|
18 | # 參數假設 |
---|
19 | # /home/nutchuser/NutchEZ_source下有3個檔案 |
---|
20 | # install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz |
---|
21 | # 安裝路徑為/opt/NutchEZ |
---|
22 | |
---|
23 | Install_source=/home/nutchuser/NutchEZ_source |
---|
24 | NutchEZ_HOME=/opt/NutchEZ |
---|
25 | MasterIP_Address=`/sbin/ifconfig eth0 | grep 'inet addr' | sed 's/^.*addr://g' | sed 's/Bcast.*$//g' | sed 's/ .*// '` |
---|
26 | |
---|
27 | |
---|
28 | set_install_information () { |
---|
29 | read -p "Please enter administrator's e-mail address: " Admin_email |
---|
30 | read -p "Please enter the Master DNS: " MasterDNS |
---|
31 | } |
---|
32 | |
---|
33 | show_info () { |
---|
34 | echo "Administrator's e-mail address is $Admin_email." |
---|
35 | echo "The master DNS is: $MasterDNS" |
---|
36 | } |
---|
37 | |
---|
38 | confirm_install_information () { |
---|
39 | read -p "Please confirm your install infomation: 1.Yes 2.No " confirm |
---|
40 | } |
---|
41 | |
---|
42 | Install_Nutch () { |
---|
43 | cd /opt |
---|
44 | tar zxf /opt/nutch-1.0.tar.gz |
---|
45 | # tar zxvf /opt/nutch-1.0.tar.gz |
---|
46 | mv /opt/nutch-1.0 NutchEZ |
---|
47 | chown -R nutchuser:nutchuser $NutchEZ_HOME |
---|
48 | set_Nutch_conf |
---|
49 | } |
---|
50 | |
---|
51 | set_Nutch_conf () { |
---|
52 | set_hadoop-env |
---|
53 | set_haoop-site |
---|
54 | set_nutch-site |
---|
55 | set_crawl-urlfilter |
---|
56 | } |
---|
57 | |
---|
58 | # set $NutchEZ_HOME/conf/hadoop-env.sh |
---|
59 | set_hadoop-env () { |
---|
60 | echo "set $NutchEZ_HOME/conf/hadoop-env.sh" |
---|
61 | cd $NutchEZ_HOME/conf/ |
---|
62 | cat >> hadoop-env.sh << EOF |
---|
63 | export JAVA_HOME=/usr/lib/jvm/java-6-sun |
---|
64 | export HADOOP_HOME=/opt/NutchEZ |
---|
65 | export HADOOP_LOG_DIR=/tmp/NutchEZ/logs |
---|
66 | export HADOOP_SLAVES=/opt/NutchEZ/conf/slaves |
---|
67 | EOF |
---|
68 | } |
---|
69 | |
---|
70 | # set $NutchEZ_HOME/conf/hadoop-site.xml |
---|
71 | set_haoop-site () { |
---|
72 | echo "set $NutchEZ_HOME/conf/hadoop-site.xml" |
---|
73 | cd $NutchEZ_HOME/conf/ |
---|
74 | cat > hadoop-site.xml << EOF |
---|
75 | <configuration> |
---|
76 | <property> |
---|
77 | <name>fs.default.name</name> |
---|
78 | <value>$MasterDNS:9000</value> |
---|
79 | <description> The name of the default file system. Either the literal string "local" or a host:port for NDFS. </description> |
---|
80 | </property> |
---|
81 | <property> |
---|
82 | <name>mapred.job.tracker</name> |
---|
83 | <value>$MasterDNS:9001</value> |
---|
84 | <description> The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task. </description> |
---|
85 | </property> |
---|
86 | </configuration> |
---|
87 | EOF |
---|
88 | } |
---|
89 | |
---|
90 | set_nutch-site () { |
---|
91 | echo "set $NutchEZ_HOME/conf/nutch-site.xml" |
---|
92 | cd $NutchEZ_HOME/conf/ |
---|
93 | cat > nutch-site.xml << EOF |
---|
94 | <configuration> |
---|
95 | <property> |
---|
96 | <name>http.agent.name</name> |
---|
97 | <value>nutchuser</value> |
---|
98 | <description>HTTP 'User-Agent' request header. </description> |
---|
99 | </property> |
---|
100 | <property> |
---|
101 | <name>http.agent.description</name> |
---|
102 | <value>MyTest</value> |
---|
103 | <description>Further description</description> |
---|
104 | </property> |
---|
105 | <property> |
---|
106 | <name>http.agent.url</name> |
---|
107 | <value>$MasterDNS</value> |
---|
108 | <description>A URL to advertise in the User-Agent header. </description> |
---|
109 | </property> |
---|
110 | <property> |
---|
111 | <name>$MasterDNS</name> |
---|
112 | <value>$Admin_email</value> |
---|
113 | <description>An email address |
---|
114 | </description> |
---|
115 | </property> |
---|
116 | </configuration> |
---|
117 | EOF |
---|
118 | } |
---|
119 | |
---|
120 | |
---|
121 | set_crawl-urlfilter () { |
---|
122 | echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt" |
---|
123 | Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'` |
---|
124 | sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt |
---|
125 | sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt |
---|
126 | |
---|
127 | |
---|
128 | Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'` |
---|
129 | sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt |
---|
130 | sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt |
---|
131 | |
---|
132 | |
---|
133 | Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'` |
---|
134 | sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt |
---|
135 | sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt |
---|
136 | |
---|
137 | |
---|
138 | Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'` |
---|
139 | sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt |
---|
140 | sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt |
---|
141 | sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt |
---|
142 | } |
---|
143 | |
---|
144 | format_HDFS () { |
---|
145 | echo "format HDFS..." |
---|
146 | $NutchEZ_HOME/bin/hadoop namenode -format |
---|
147 | } |
---|
148 | |
---|
149 | start_up_NutchEZ (){ |
---|
150 | echo "start up NutchEZ..." |
---|
151 | $NutchEZ_HOME/bin/start-all.sh |
---|
152 | } |
---|
153 | |
---|
154 | # install tomcat |
---|
155 | Install_Tomcat () { |
---|
156 | cd /opt/ |
---|
157 | # tar zxf apache-tomcat-6.0.18.tar.gz |
---|
158 | tar zxf apache-tomcat-6.0.18.tar.gz |
---|
159 | mv apache-tomcat-6.0.18 $NutchEZ_HOME |
---|
160 | cd $NutchEZ_HOME |
---|
161 | mv apache-tomcat-6.0.18 tomcat |
---|
162 | chown -R nutchuser:nutchuser $NutchEZ_HOME |
---|
163 | mkdir $NutchEZ_HOME/web |
---|
164 | jar -xvf $NutchEZ_HOME/nutch-1.0.war $NutchEZ_HOME/web |
---|
165 | mv $NutchEZ_HOME/tomcat/webapps/ROOT $NutchEZ_HOME/tomcat/webapps/ROOT-ori |
---|
166 | mv $NutchEZ_HOME/web $NutchEZ_HOME/tomcat/webapps/ROOT |
---|
167 | mkdir $NutchEZ_HOME/search |
---|
168 | set_server |
---|
169 | set_nutch-site |
---|
170 | } |
---|
171 | |
---|
172 | |
---|
173 | set_server () { |
---|
174 | echo "$NutchEZ_HOME/tomcat/conf/server.xml" |
---|
175 | Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'` |
---|
176 | |
---|
177 | sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml |
---|
178 | sed -i ''$Line_NO'a <Connector port="8080" protocol="HTTP/1.1"\ |
---|
179 | connectionTimeout="20000"\ |
---|
180 | redirectPort="8443" URIEncoding="UTF-8"\ |
---|
181 | useBodyEncodingForURI="true" />\ |
---|
182 | ' $NutchEZ_HOME/tomcat/conf/server.xml |
---|
183 | } |
---|
184 | |
---|
185 | set_nutch-site () { |
---|
186 | echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml" |
---|
187 | |
---|
188 | # 搜尋加入設定的行號位址 |
---|
189 | line_NO=`cat $NutchEZ_HOME'/conf/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'` |
---|
190 | |
---|
191 | # 加入設定檔 |
---|
192 | sed -i ''$line_NO'a <property>\ |
---|
193 | <name>http.agent.name</name>\ |
---|
194 | <value>waue</value>\ |
---|
195 | <description>HTTP 'User-Agent' request header. </description>\ |
---|
196 | </property>\ |
---|
197 | <property>\ |
---|
198 | <name>http.agent.description</name>\ |
---|
199 | <value>MyTest</value>\ |
---|
200 | <description>Further description</description>\ |
---|
201 | </property>\ |
---|
202 | <property>\ |
---|
203 | <name>http.agent.url</name>\ |
---|
204 | <value>'$MasterDNS'</value>\ |
---|
205 | <description>A URL to advertise in the User-Agent header. </description>\ |
---|
206 | </property>\ |
---|
207 | <property>\ |
---|
208 | <name>http.agent.email</name>\ |
---|
209 | <value>'$Admin_email'</value>\ |
---|
210 | <description>An email address\ |
---|
211 | </description>\ |
---|
212 | </property>\ |
---|
213 | ' $NutchEZ_HOME/conf/nutch-site.xml |
---|
214 | } |
---|
215 | |
---|
216 | |
---|
217 | start_up_tomcat () { |
---|
218 | echo "start up tomcat..." |
---|
219 | $NutchEZ_HOME/tomcat/bin/startup.sh |
---|
220 | } |
---|