Changeset 69 for nutchez-0.1/bin
- Timestamp:
- May 27, 2009, 5:01:46 PM (16 years ago)
- Location:
- nutchez-0.1/bin
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
nutchez-0.1/bin/nutchez
r68 r69 4 4 # Description: Eazily use for Nutch 5 5 # . 6 export NUTCH_CONF_DIR=~/.nutchez7 NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}"8 6 9 . ./nutchez-func.sh 7 # begining .. 8 bin=`dirname "$0"` 9 bin=`cd "$bin"; pwd` 10 10 11 12 # root ? 13 #check_if_root 14 15 # show url lists 16 17 setup_nutchez 11 . "$bin"/nutchez-func.sh 12 init_nutchez 18 13 19 14 CHECK=0 -
nutchez-0.1/bin/nutchez-func.sh
r68 r69 5 5 # . 6 6 7 . /etc/nutch/hadoop-env.sh || . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh8 7 9 8 : ${DIALOG=dialog} 10 9 11 # display more for debug 12 VERB=0 10 # set 1 to display more for debug, 11 VERB=1 12 13 init_nutchez () { 14 if ! [ -e ~/.nutchez ] ;then 15 # copy from /etc/nutch 16 mkdir ~/.nutchez 17 cp -rf /etc/nutch/* ~/.nutchez 18 mkdir ~/.nutchez/log 19 chown -R $LOGNAME:$LOGNAME ~/.nutchez 20 fi 21 export NUTCH_CONF_DIR=~/.nutchez 22 export HADOOP_CONF_DIR=~/.nutchez 23 export HADOOP_LOG_DIR=~/.nutchez/log 24 . ~/.nutchez/hadoop-env.sh || . /etc/nutch/hadoop-env.sh 25 } 13 26 14 27 echo_vb () { … … 38 51 echo_vb "7. chang tmp as txt" 39 52 rm ~/.nutchez/sav/n.*.txt 40 mv /tmp/n.url .tmp ~/.nutchez/sav/41 mv /tmp/n.robot.tmp ~/.nutchez/sav/ 42 mv /tmp/n.crawler.tmp ~/.nutchez/sav/ 43 mv /tmp/n.tomcat.tmp ~/.nutchez/sav/ 53 mv /tmp/n.urls.tmp ~/.nutchez/sav/n.urls.txt 54 mv /tmp/n.robot.tmp ~/.nutchez/sav/n.robot.txt 55 mv /tmp/n.crawler.tmp ~/.nutchez/sav/n.crawler.txt 56 mv /tmp/n.tomcat.tmp ~/.nutchez/sav/n.tomcat.txt 44 57 } 45 58 … … 47 60 echo_vb "7. delete tmp" 48 61 rm /tmp/n.*.tmp 49 }50 51 init_nutchez () {52 if ! [ -e ~/.nutchez ] ;then53 # copy from /etc/nutch54 cp -rf /etc/nutch/* ~/.nutchez55 chown -R $LOGNAME:$LOGNAME ~/.nutchez56 fi57 62 } 58 63 … … 71 76 if [ -e ~/.nutchez/nutch-site.xml ] ; then 72 77 # set nutch-site.xml 73 sed -i e "s/>user</>$ROBOT</" ~/.nutchez/nutch-site.xml78 sed -i -e "4s/<value>[a-zA-Z0-9]*</<value>$ROBOT</" ~/.nutchez/nutch-site.xml 74 79 fi 75 80 … … 86 91 fi 87 92 # change explorer port 88 sed -ie "s/8080/>$PORT</" ~/.nutchez/tomcat/conf/server.xml 93 sed -i -e "s/<Connector port=\"[0-9]*\"/<Connector port=\"$PORT\"/" ~/.nutchez/tomcat/conf/server.xml 94 fi 89 95 } 90 96 … … 93 99 # show urls : ok =0 ,cancel = 1 94 100 echo_vb "2. show_urls !" 95 test_file ~/.nutchez/sav/n.url .txt101 test_file ~/.nutchez/sav/n.urls.txt 96 102 echo_vb "2.1 test_file ~/.nutchez/sav return : $?" 97 103 # dialog begin 98 dialog --editbox ~/.nutchez/sav/n.url .txt 16 51 2>/tmp/n.url.tmp104 dialog --editbox ~/.nutchez/sav/n.urls.txt 16 51 2>/tmp/n.urls.tmp 99 105 RET=$? 100 echo_vb "2.1 cat url: `cat /tmp/n.url .tmp`"106 echo_vb "2.1 cat url: `cat /tmp/n.urls.tmp`" 101 107 return $RET 102 108 } … … 126 132 final_confirm () { 127 133 echo_vb "6. final_confirm : start =0 , back =1 " 128 tempfile=`tempfile 2>/dev/null` || tempfile=/tmp/n.finalcheck.tmp 129 #trap "rm -f $tempfile" 0 1 2 5 15 134 tempfile=/tmp/n.finalcheck.tmp 130 135 131 136 echo " \n 1. The url list is : \n " > $tempfile 132 cat /tmp/n.url .tmp >> $tempfile137 cat /tmp/n.urls.tmp >> $tempfile 133 138 echo " \n 2. The robot name is : \n" >> $tempfile 134 139 cat /tmp/n.robot.tmp >> $tempfile … … 142 147 #read READ 143 148 $DIALOG --title "Check It !!" --clear \ 144 --yesno "$MSG" 16 51149 --yesno "$MSG" 26 51 145 150 RET=$? 146 151 echo_vb "final return = $RET" … … 152 157 start_crawl () { 153 158 154 ROBOT=`cat ~/.nutchez/sav `155 URLS=`cat ~/.nutchez/sav `156 DEPTH=`cat ~/.nutchez/sav `157 PORT=`cat ~/.nutchez/sav `159 ROBOT=`cat ~/.nutchez/sav/n.robot.txt` 160 URLS=`cat ~/.nutchez/sav/n.urls.txt` 161 DEPTH=`cat ~/.nutchez/sav/n.crawler.txt` 162 PORT=`cat ~/.nutchez/sav/n.tomcat.txt` 158 163 159 164 echo_vb "7. start_crawl" 160 setup_nutch 165 setup_nutchez 161 166 install_tomcat 162 # /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH 163 echo "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH" 167 echo_vb "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH" 168 echo_vb "$NUTCH_CONF_DIR" 169 /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH 164 170 } 165 171 166 172 start_tomcat () { 167 173 echo_vb "8. start_tomcat " 168 /opt/nutch/tomcat/bin/startup.sh 174 echo_vb "/opt/nutch/tomcat/bin/startup.sh" 175 if [ -e /tmp/search ] 176 rm -rf /tmp/search 177 fi 178 ln -sf ~/.nutchez/search/ /tmp/ 179 ~/.nutchez/tomcat/bin/shutdown.sh 180 ~/.nutchez/tomcat/bin/startup.sh 169 181 } 170 182 … … 173 185 FIREFOX=`which firefox` 174 186 RET=$? 175 if [ RET == 0 ];then187 if [ $RET == 0 ];then 176 188 $FIREFOX -D 0.0 http://localhost:$PORT 177 else 178 $DIALOG --msgbox "Congratulations! \n you can explore the url: \n http://localhost:8080" 0 0 189 RET=$? 190 fi 191 if ! [ $RET == 0 ];then 192 $DIALOG --msgbox "Congratulations! \n you can explore the url: \n http://localhost:$PORT" 0 0 179 193 fi 180 194 }
Note: See TracChangeset
for help on using the changeset viewer.