Index: /nutchez-0.2/src/test/install
===================================================================
--- /nutchez-0.2/src/test/install	(revision 130)
+++ /nutchez-0.2/src/test/install	(revision 131)
@@ -24,9 +24,6 @@
     # make_ssh_key
 
-    Install_Nutch
-    Install_Tomcat
-    
+    Install_Nutch    
     # make_client_install
-    
     format_HDFS
     start_up_NutchEZ
Index: /nutchez-0.2/src/test/install_func.sh
===================================================================
--- /nutchez-0.2/src/test/install_func.sh	(revision 130)
+++ /nutchez-0.2/src/test/install_func.sh	(revision 131)
@@ -38,4 +38,201 @@
 confirm_install_information () {
   read -p "Please confirm your install infomation: 1.Yes 2.No  " confirm
+}
+
+set_Nutch_conf () {
+  set_hadoop-env
+  set_haoop-site
+  set_nutch-site
+  set_crawl-urlfilter
+}
+
+# set $NutchEZ_HOME/conf/hadoop-env.sh
+set_hadoop-env () {
+  echo "set $NutchEZ_HOME/conf/hadoop-env.sh"
+  cd $NutchEZ_HOME/conf/
+  cat >> hadoop-env.sh << EOF
+export JAVA_HOME=/usr/lib/jvm/java-6-sun
+export HADOOP_HOME=$NutchEZ_HOME
+export HADOOP_LOG_DIR=/tmp/NutchEZ/logs
+export HADOOP_SLAVES=$NutchEZ_HOME/conf/slaves
+export HADOOP_CONF_DIR=$NutchEZ_HOME/conf
+export HADOOP_PID_DIR=/tmp/hadoop/pid
+export NUTCH_HOME=$NutchEZ_HOME
+export NUTCH_CONF_DIR=$NutchEZ_HOME/conf
+EOF
+}
+
+# set $NutchEZ_HOME/conf/hadoop-site.xml
+set_haoop-site () {
+  echo "set $NutchEZ_HOME/conf/hadoop-site.xml"
+  cd $NutchEZ_HOME/conf/
+  cat > hadoop-site.xml << EOF
+<configuration>
+<property>
+    <name>fs.default.name</name>
+    <value>$MasterDNS:9000</value>
+    <description> The name of the default file system. Either the literal string "local" or a host:port for NDFS. </description>
+</property>
+<property>
+    <name>mapred.job.tracker</name>
+    <value>$MasterDNS:9001</value>
+    <description> The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task. </description>
+</property>
+</configuration>
+EOF
+}
+
+set_nutch-site () {
+  echo "set $NutchEZ_HOME/conf/nutch-site.xml"
+  cd $NutchEZ_HOME/conf/
+  cat > nutch-site.xml << EOF
+<configuration>
+<property>
+  <name>http.agent.name</name>
+  <value>nutchuser</value>
+  <description>HTTP 'User-Agent' request header. </description>
+</property>
+<property>
+  <name>http.agent.description</name>
+  <value>MyTest</value>
+  <description>Further description</description>
+</property>
+<property>
+  <name>http.agent.url</name>
+  <value>$MasterDNS</value>
+  <description>A URL to advertise in the User-Agent header. </description>
+</property>
+<property>
+  <name>$MasterDNS</name>
+  <value>$Admin_email</value>
+  <description>An email address
+  </description>
+</property>
+</configuration>
+EOF
+}
+
+
+set_crawl-urlfilter () {
+  echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt"
+  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'`
+  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+  sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+
+
+  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'`
+  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+  sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+
+
+  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'`
+  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+  sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+
+
+  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'`
+  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+  sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+  sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+}
+
+format_HDFS () {
+  echo "format HDFS..."
+  $NutchEZ_HOME/bin/hadoop namenode -format
+}
+
+start_up_NutchEZ (){
+  echo "start up NutchEZ..."
+  $NutchEZ_HOME/bin/start-all.sh
+}
+
+set_server () {
+  echo "$NutchEZ_HOME/tomcat/conf/server.xml"
+  Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'`
+
+  sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml
+  sed -i ''$Line_NO'a    <Connector port="8080" protocol="HTTP/1.1"\
+               connectionTimeout="20000"\
+               redirectPort="8443" URIEncoding="UTF-8"\
+               useBodyEncodingForURI="true" />\
+' $NutchEZ_HOME/tomcat/conf/server.xml
+}
+
+
+set_nutch-site2 () {
+  echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml"
+  
+  # 搜尋加入設定的行號位址
+  line_NO=`cat $NutchEZ_HOME'/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'`
+  
+  # 加入設定檔
+  sed -i ''$line_NO'a  <property>\
+  <name>http.agent.name</name>\
+  <value>nutch</value>\
+  <description>HTTP 'User-Agent' request header. </description> \
+</property>\
+<property>\
+  <name>http.agent.description</name>\
+  <value>MyTest</value>\
+  <description>Further description</description> \
+</property>\
+<property>\
+  <name>http.agent.url</name> \
+  <value>localhost</value> \
+  <description>A URL to advertise in the User-Agent header. </description> \
+</property>\
+<property>\
+  <name>http.agent.email</name>\
+  <value>'$Admin_email'</value> \
+  <description>An email address \
+  </description> \
+</property>\
+<property>\
+  <name>plugin.folders</name>\
+  <value>'$NutchEZ_HOME'/plugins</value>\
+  <description>Directories where nutch plugins are located. </description>\
+</property>\
+<property>\
+  <name>plugin.includes</name>\
+  <value>protocol-(http|httpclient)|urlfilter-regex|parse-(text|html|js|ext|msexcel|mspowerpoint|msword|oo|pdf|rss|swf|zip)|index-(more|basic|anchor)|query-(more|basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>\
+  <description> Regular expression naming plugin directory names</description>\
+ </property>\
+ <property>\
+  <name>parse.plugin.file</name>\
+  <value>parse-plugins.xml</value>\
+  <description>The name of the file that defines the associations between\
+  content-types and parsers.</description>\
+ </property>\
+ <property>\
+   <name>db.max.outlinks.per.page</name>\
+   <value>-1</value>\
+   <description> </description>\
+ </property> \
+ <property>\
+   <name>http.content.limit</name> \
+   <value>-1</value>\
+ </property>\
+<property>\
+  <name>indexer.mergeFactor</name>\
+  <value>500</value>\
+  <description>The factor that determines the frequency of Lucene segment\
+  merges. This must not be less than 2, higher values increase indexing\
+  speed but lead to increased RAM usage, and increase the number of\
+  open file handles (which may lead to "Too many open files" errors).\
+  NOTE: the "segments" here have nothing to do with Nutch segments, they\
+  are a low-level data unit used by Lucene.\
+  </description>\
+</property>\
+
+<property>\
+  <name>indexer.minMergeDocs</name>\
+  <value>500</value>\
+  <description>This number determines the minimum number of Lucene\
+  Documents buffered in memory between Lucene segment merges. Larger\
+  values increase indexing speed and increase RAM usage.\
+  </description>\
+</property>\
+
+' $NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml
 }
 
@@ -49,169 +246,21 @@
 }
 
-set_Nutch_conf () {
-  set_hadoop-env
-  set_haoop-site
-  set_nutch-site
-  set_crawl-urlfilter
-}
-
-# set $NutchEZ_HOME/conf/hadoop-env.sh
-set_hadoop-env () {
-  echo "set $NutchEZ_HOME/conf/hadoop-env.sh"
-  cd $NutchEZ_HOME/conf/
-  cat >> hadoop-env.sh << EOF
-export JAVA_HOME=/usr/lib/jvm/java-6-sun
-export HADOOP_HOME=/opt/NutchEZ
-export HADOOP_LOG_DIR=/tmp/NutchEZ/logs
-export HADOOP_SLAVES=/opt/NutchEZ/conf/slaves
-EOF
-}
-
-# set $NutchEZ_HOME/conf/hadoop-site.xml
-set_haoop-site () {
-  echo "set $NutchEZ_HOME/conf/hadoop-site.xml"
-  cd $NutchEZ_HOME/conf/
-  cat > hadoop-site.xml << EOF
-<configuration>
-<property>
-    <name>fs.default.name</name>
-    <value>$MasterDNS:9000</value>
-    <description> The name of the default file system. Either the literal string "local" or a host:port for NDFS. </description>
-</property>
-<property>
-    <name>mapred.job.tracker</name>
-    <value>$MasterDNS:9001</value>
-    <description> The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task. </description>
-</property>
-</configuration>
-EOF
-}
-
-set_nutch-site () {
-  echo "set $NutchEZ_HOME/conf/nutch-site.xml"
-  cd $NutchEZ_HOME/conf/
-  cat > nutch-site.xml << EOF
-<configuration>
-<property>
-  <name>http.agent.name</name>
-  <value>nutchuser</value>
-  <description>HTTP 'User-Agent' request header. </description>
-</property>
-<property>
-  <name>http.agent.description</name>
-  <value>MyTest</value>
-  <description>Further description</description>
-</property>
-<property>
-  <name>http.agent.url</name>
-  <value>$MasterDNS</value>
-  <description>A URL to advertise in the User-Agent header. </description>
-</property>
-<property>
-  <name>$MasterDNS</name>
-  <value>$Admin_email</value>
-  <description>An email address
-  </description>
-</property>
-</configuration>
-EOF
-}
-
-
-set_crawl-urlfilter () {
-  echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt"
-  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'`
-  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-
-
-  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'`
-  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-
-
-  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'`
-  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-
-
-  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'`
-  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-}
-
-format_HDFS () {
-  echo "format HDFS..."
-  $NutchEZ_HOME/bin/hadoop namenode -format
-}
-
-start_up_NutchEZ (){
-  echo "start up NutchEZ..."
-  $NutchEZ_HOME/bin/start-all.sh
-}
-
 # install tomcat
 Install_Tomcat () {
   cd /opt/
-#  tar zxf apache-tomcat-6.0.18.tar.gz
+#  tar zxvf apache-tomcat-6.0.18.tar.gz
   tar zxf apache-tomcat-6.0.18.tar.gz
   mv apache-tomcat-6.0.18 $NutchEZ_HOME
   cd $NutchEZ_HOME
   mv  apache-tomcat-6.0.18 tomcat
+  mkdir web
+  # mkdir $NutchEZ_HOME/search
   chown -R nutchuser:nutchuser $NutchEZ_HOME
-  mkdir $NutchEZ_HOME/web
-  jar -xvf $NutchEZ_HOME/nutch-1.0.war $NutchEZ_HOME/web
+  jar -xvf nutch-1.0.war web
   mv $NutchEZ_HOME/tomcat/webapps/ROOT $NutchEZ_HOME/tomcat/webapps/ROOT-ori
   mv $NutchEZ_HOME/web $NutchEZ_HOME/tomcat/webapps/ROOT
-  mkdir $NutchEZ_HOME/search
   set_server
-  set_nutch-site
-}
-
-
-set_server () {
-  echo "$NutchEZ_HOME/tomcat/conf/server.xml"
-  Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'`
-
-  sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml
-  sed -i ''$Line_NO'a    <Connector port="8080" protocol="HTTP/1.1"\
-               connectionTimeout="20000"\
-               redirectPort="8443" URIEncoding="UTF-8"\
-               useBodyEncodingForURI="true" />\
-' $NutchEZ_HOME/tomcat/conf/server.xml
-}
-
-set_nutch-site () {
-  echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml"
-  
-  # 搜尋加入設定的行號位址
-  line_NO=`cat $NutchEZ_HOME'/conf/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'`
-  
-  # 加入設定檔
-  sed -i ''$line_NO'a <property>\
-  <name>http.agent.name</name>\
-  <value>waue</value>\
-  <description>HTTP 'User-Agent' request header. </description>\
-</property>\
-<property>\
-  <name>http.agent.description</name>\
-  <value>MyTest</value>\
-  <description>Further description</description>\
-</property>\
-<property>\
-  <name>http.agent.url</name>\
-  <value>'$MasterDNS'</value>\
-  <description>A URL to advertise in the User-Agent header. </description>\
-</property>\
-<property>\
-  <name>http.agent.email</name>\
-  <value>'$Admin_email'</value>\
-  <description>An email address\
-  </description>\
-</property>\
-' $NutchEZ_HOME/conf/nutch-site.xml
-}
-
+  #set_nutch-site2
+}
 
 start_up_tomcat () {
