source: nutchez-0.2/src/test/install_func.sh @ 130

Last change on this file since 130 was 130, checked in by shunfa, 14 years ago

modify install, install_func

  • Property svn:executable set to *
File size: 6.4 KB
Line 
1#!/bin/bash
2source install_lang
3####### garbage here #############
4function mainFunction ( )
5{
6echo "$Good"
7}
8function braBraBra ( )
9{
10echo "$Bra_Bra_Bra"
11}
12####### garbage end ###############
13
14
15
16####### fafa code here ###########
17
18# 參數假設
19# /home/nutchuser/NutchEZ_source下有3個檔案
20# install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz
21# 安裝路徑為/opt/NutchEZ
22
23Install_source=/home/nutchuser/NutchEZ_source
24NutchEZ_HOME=/opt/NutchEZ
25MasterIP_Address=`/sbin/ifconfig eth0 | grep 'inet addr' |  sed 's/^.*addr://g' | sed 's/Bcast.*$//g' | sed 's/ .*// '`
26
27
28set_install_information () {
29  read -p "Please enter administrator's e-mail address:  " Admin_email
30  read -p "Please enter the Master DNS:  " MasterDNS
31}
32
33show_info () {
34  echo "Administrator's e-mail address is $Admin_email."
35  echo "The master DNS is: $MasterDNS"
36}
37
38confirm_install_information () {
39  read -p "Please confirm your install infomation: 1.Yes 2.No  " confirm
40}
41
42Install_Nutch () {
43  cd /opt
44  tar zxf /opt/nutch-1.0.tar.gz
45#  tar zxvf /opt/nutch-1.0.tar.gz
46  mv /opt/nutch-1.0  NutchEZ
47  chown -R nutchuser:nutchuser $NutchEZ_HOME
48  set_Nutch_conf
49}
50
51set_Nutch_conf () {
52  set_hadoop-env
53  set_haoop-site
54  set_nutch-site
55  set_crawl-urlfilter
56}
57
58# set $NutchEZ_HOME/conf/hadoop-env.sh
59set_hadoop-env () {
60  echo "set $NutchEZ_HOME/conf/hadoop-env.sh"
61  cd $NutchEZ_HOME/conf/
62  cat >> hadoop-env.sh << EOF
63export JAVA_HOME=/usr/lib/jvm/java-6-sun
64export HADOOP_HOME=/opt/NutchEZ
65export HADOOP_LOG_DIR=/tmp/NutchEZ/logs
66export HADOOP_SLAVES=/opt/NutchEZ/conf/slaves
67EOF
68}
69
70# set $NutchEZ_HOME/conf/hadoop-site.xml
71set_haoop-site () {
72  echo "set $NutchEZ_HOME/conf/hadoop-site.xml"
73  cd $NutchEZ_HOME/conf/
74  cat > hadoop-site.xml << EOF
75<configuration>
76<property>
77    <name>fs.default.name</name>
78    <value>$MasterDNS:9000</value>
79    <description> The name of the default file system. Either the literal string "local" or a host:port for NDFS. </description>
80</property>
81<property>
82    <name>mapred.job.tracker</name>
83    <value>$MasterDNS:9001</value>
84    <description> The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task. </description>
85</property>
86</configuration>
87EOF
88}
89
90set_nutch-site () {
91  echo "set $NutchEZ_HOME/conf/nutch-site.xml"
92  cd $NutchEZ_HOME/conf/
93  cat > nutch-site.xml << EOF
94<configuration>
95<property>
96  <name>http.agent.name</name>
97  <value>nutchuser</value>
98  <description>HTTP 'User-Agent' request header. </description>
99</property>
100<property>
101  <name>http.agent.description</name>
102  <value>MyTest</value>
103  <description>Further description</description>
104</property>
105<property>
106  <name>http.agent.url</name>
107  <value>$MasterDNS</value>
108  <description>A URL to advertise in the User-Agent header. </description>
109</property>
110<property>
111  <name>$MasterDNS</name>
112  <value>$Admin_email</value>
113  <description>An email address
114  </description>
115</property>
116</configuration>
117EOF
118}
119
120
121set_crawl-urlfilter () {
122  echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt"
123  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'`
124  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
125  sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt
126
127
128  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'`
129  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
130  sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt
131
132
133  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'`
134  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
135  sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt
136
137
138  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'`
139  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
140  sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt
141  sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt
142}
143
144format_HDFS () {
145  echo "format HDFS..."
146  $NutchEZ_HOME/bin/hadoop namenode -format
147}
148
149start_up_NutchEZ (){
150  echo "start up NutchEZ..."
151  $NutchEZ_HOME/bin/start-all.sh
152}
153
154# install tomcat
155Install_Tomcat () {
156  cd /opt/
157#  tar zxf apache-tomcat-6.0.18.tar.gz
158  tar zxf apache-tomcat-6.0.18.tar.gz
159  mv apache-tomcat-6.0.18 $NutchEZ_HOME
160  cd $NutchEZ_HOME
161  mv  apache-tomcat-6.0.18 tomcat
162  chown -R nutchuser:nutchuser $NutchEZ_HOME
163  mkdir $NutchEZ_HOME/web
164  jar -xvf $NutchEZ_HOME/nutch-1.0.war $NutchEZ_HOME/web
165  mv $NutchEZ_HOME/tomcat/webapps/ROOT $NutchEZ_HOME/tomcat/webapps/ROOT-ori
166  mv $NutchEZ_HOME/web $NutchEZ_HOME/tomcat/webapps/ROOT
167  mkdir $NutchEZ_HOME/search
168  set_server
169  set_nutch-site
170}
171
172
173set_server () {
174  echo "$NutchEZ_HOME/tomcat/conf/server.xml"
175  Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'`
176
177  sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml
178  sed -i ''$Line_NO'a    <Connector port="8080" protocol="HTTP/1.1"\
179               connectionTimeout="20000"\
180               redirectPort="8443" URIEncoding="UTF-8"\
181               useBodyEncodingForURI="true" />\
182' $NutchEZ_HOME/tomcat/conf/server.xml
183}
184
185set_nutch-site () {
186  echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml"
187 
188  # 搜尋加入設定的行號位址
189  line_NO=`cat $NutchEZ_HOME'/conf/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'`
190 
191  # 加入設定檔
192  sed -i ''$line_NO'a <property>\
193  <name>http.agent.name</name>\
194  <value>waue</value>\
195  <description>HTTP 'User-Agent' request header. </description>\
196</property>\
197<property>\
198  <name>http.agent.description</name>\
199  <value>MyTest</value>\
200  <description>Further description</description>\
201</property>\
202<property>\
203  <name>http.agent.url</name>\
204  <value>'$MasterDNS'</value>\
205  <description>A URL to advertise in the User-Agent header. </description>\
206</property>\
207<property>\
208  <name>http.agent.email</name>\
209  <value>'$Admin_email'</value>\
210  <description>An email address\
211  </description>\
212</property>\
213' $NutchEZ_HOME/conf/nutch-site.xml
214}
215
216
217start_up_tomcat () {
218  echo "start up tomcat..."
219  $NutchEZ_HOME/tomcat/bin/startup.sh
220}
Note: See TracBrowser for help on using the repository browser.