Changes between Version 25 and Version 26 of waue/Hadoop_DRBL


Ignore:
Timestamp:
Mar 19, 2009, 5:40:45 PM (15 years ago)
Author:
waue
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • waue/Hadoop_DRBL

    v25 v26  
    99= 零、環境說明 =
    1010
    11 環境中共有台機器,一台為drbl server,也是hadoop的namenode,其他節點則client 與datanode,如下:
     11環境中共有十三台機器,一台為drbl server,也是hadoop的namenode,其他節點則client 與datanode,如下:
    1212|| 名稱 || ip || drbl用途 || hadoop 用途 ||
    1313|| hadoop || 192.168.1.254 || drbl server || namenode ||
    14 || hadoop || 192.168.1.2 || drbl server || namenode ||
    15 || hadoop || 192.168.1.3 || drbl clinet || datanode ||
    16 || hadoop || 192.168.1.4 || drbl clinet || datanode ||
    17 || hadoop || 192.168.1.5 || drbl clinet || datanode ||
    18 || hadoop || 192.168.1.6 || drbl clinet || datanode ||
    19 || hadoop || 192.168.1.7 || drbl clinet || datanode ||
     14|| hadoop || 192.168.1.1~12 || drbl client || datanode ||
     15
    2016
    2117介紹drbl server環境如下:
     
    5753$ wget http://ftp.twaren.net/Unix/Web/apache/hadoop/core/hadoop-0.18.3/hadoop-0.18.3.tar.gz
    5854$ tar zxvf hadoop-0.18.3.tar.gz
    59 hadoop:/opt# ln -sf hadoop-0.18.3 hadoop
     55
     56}}}
     57
     58== 1.4 設定使用者 ==
     59{{{
     60$ su -
     61$ addgroup hdfsgrp
     62$ adduser --ingroup hdfsgrp hdfsadm
     63$ chown -R hdfsadm:hdfsgrp /opt/hadoop-0.18.3
     64$ chmod -R 775 /opt/hadoop-0.18.3
     65$ su - hdfsadm
     66$ cd /opt/hadoop
     67$ ln -sf hadoop-0.18.3 hadoop
    6068}}}
    6169
     
    6472 * 在 /etc/bash.bashrc 的最末加入 以下資訊
    6573{{{
     74#!sh
    6675PATH=$PATH:/opt/drbl/bin:/opt/drbl/sbin
    6776export JAVA_HOME=/usr/lib/jvm/java-6-sun
    6877export HADOOP_HOME=/opt/hadoop/
    6978}}}
    70 
     79 * 載入設定值
     80{{{
     81$ source /etc/bash.bashrc
     82}}}
    7183 * 編輯 /etc/hosts 把下面內容貼在最後
    7284{{{
     
    90102--- hadoop-0.18.3/conf/hadoop-env.sh.org
    91103+++ hadoop-0.18.3/conf/hadoop-env.sh
    92 @@ -6,7 +6,9 @@
     104@@ -6,7 +6,10 @@
    93105 # remote nodes.
    94106 # The java implementation to use.  Required.
    95107-# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
    96108+export JAVA_HOME=/usr/lib/jvm/java-6-sun
    97 +export HADOOP_HOME=/opt/hadoop-0.18.3
     109+export HADOOP_HOME=/opt/hadoop
    98110+export HADOOP_CONF_DIR=$HADOOP_HOME/conf
    99 +export HADOOP_LOG_DIR=/root/hadoop/logs
     111+export HADOOP_LOG_DIR=/home/hdfsadm/hdfs/logs
     112+export HADOOP_PID_DIR=/home/hdfsadm/hdfs/pids
     113
    100114 # Extra Java CLASSPATH elements.  Optional.
    101115 # export HADOOP_CLASSPATH=
     
    128142+    </description>
    129143+  </property>
     144+  <property>
     145+    <name>hadoop.tmp.dir</name>
     146+    <value>/tmp/hadoop/hadoop-${user.name}</value>
     147+    <description>A base for other temporary directories.</description>
     148+  </property>
    130149 </configuration>
    131150}}}
     
    133152 * 編輯 /opt/hadoop/conf/slaves
    134153{{{
    135 hadoop102
    136 hadoop103
    137 hadoop104
    138 hadoop105
    139 hadoop106
    140 hadoop107
    141 hadoop
     154192.168.1.1
     155192.168.1.2
     156192.168.1.3
     157192.168.1.4
     158192.168.1.5
     159192.168.1.6
     160192.168.1.7
     161192.168.1.8
     162192.168.1.9
     163192.168.1.10
     164192.168.1.11
     165192.168.1.12
    142166}}}
    143167
     
    156180|                              |
    157181|    +-- [eth1] 192.168.1.254  +- to clients group 1 [ 6 clients, their IP
    158 |                              |             from 192.168.1.2 - 192.168.1.7]
     182|                              |             from 192.168.1.1 - 192.168.1.12]
    159183+------------------------------+
    160184******************************************************
    161 Total clients: 6
     185Total clients: 12
    162186******************************************************
    163187}}}
     
    172196 * 執行
    173197{{{
     198$ su -
     199$ /opt/drbl/sbin/drbl-useradd -s hdfsadm hdfsgrp
     200$ su - hdfsadm
    174201$ ssh-keygen -t rsa -b 1024 -N "" -f ~/.ssh/id_rsa
    175202$ cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys
     
    177204}}}
    178205
    179  * 寫個自動化 auto.shell 並執行
    180 {{{
    181 #!sh
    182 #!/bin/bash
    183 
    184 for ((i=2;i<=7;i++));
    185 do
    186  scp -r ~/.ssh/ "192.168.1.$i":~/
    187  scp /etc/ssh/ssh_config "192.168.1.$i":/etc/ssh/ssh_config
    188  ssh "192.168.1.$i" /etc/init.d/ssh restart
    189 done
    190 }}}
    191206
    192207 * 正確無誤則可免密碼登入
    193208
    194209=== 3.2.1 dsh ===
    195  
     210
    196211{{{
    197212$ sudo apt-get install dsh
    198213$ mkdir -p .dsh
    199 $ for ((i=2;i<=7;i++)); do echo "192.168.1.$i" >> .dsh/machines.list; done
    200 }}}
    201 並執行
    202 {{{
    203 $ dsh -a scp hadoop:/etc/hosts /etc/
     214$ for ((i=1;i<=12;i++)); do echo "192.168.1.$i" >> .dsh/machines.list; done
     215}}} 
     216 * 測試並執行
     217{{{
     218$ dsh -a hostname
    204219$ dsh -a source /etc/bash.bashrc
    205220}}}
     221
    206222
    207223== 3.3 啟動 Hadoop  ==
     
    257273
    258274 * http://gm2.nchc.org.tw:50030/
    259    * 網頁中可以看到node數為7則代表所有的節點都有加入
     275   * 網頁中可以看到剛剛有在工作的node數
     276{{{
     277#!html
     278<h1>gm2 Hadoop Map/Reduce Administration</h1>
     279
     280<b>State:</b> RUNNING<br>
     281
     282<b>Started:</b> Tue Mar 17 16:22:46 EDT 2009<br>
     283<b>Version:</b> 0.18.3,
     284                r736250<br>
     285<b>Compiled:</b> Thu Jan 22 23:12:08 UTC 2009 by
     286                 ndaley<br>
     287<b>Identifier:</b> 200903171622<br>                 
     288                   
     289<hr>
     290
     291<h2>Cluster Summary</h2>
     292<center>
     293<table border="2" cellpadding="5" cellspacing="2">
     294<tbody><tr><th>Maps</th><th>Reduces</th><th>Total Submissions</th><th>Nodes</th><th>Map Task Capacity</th><th>Reduce Task Capacity</th><th>Avg. Tasks/Node</th></tr>
     295<tr><td>0</td><td>0</td><td>1</td><td><a href="machines.jsp">9</a></td><td>18</td><td>18</td><td>4.00</td></tr></tbody></table>
     296
     297</center>
     298<hr>
     299
     300<h2>Running Jobs</h2>
     301<center>
     302<table border="2" cellpadding="5" cellspacing="2">
     303<tbody><tr><td colspan="9" align="center"><b>Running Jobs </b></td></tr>
     304<tr><td colspan="8" align="center"><i>none</i></td></tr>
     305</tbody></table>
     306</center>
     307
     308<hr>
     309
     310<h2>Completed Jobs</h2>
     311<center>
     312<table border="2" cellpadding="5" cellspacing="2">
     313<tbody><tr><td colspan="9" align="center"><b>Completed Jobs </b></td></tr>
     314<tr><td><b>Jobid</b></td><td><b>User</b></td><td><b>Name</b></td><td><b>Map % Complete</b></td><td><b>Map Total</b></td><td><b>Maps Completed</b></td><td><b>Reduce % Complete</b></td><td><b>Reduce Total</b></td><td><b>Reduces Completed</b></td></tr>
     315<tr><td><a href="jobdetails.jsp?jobid=job_200903171622_0001&amp;refresh=0">job_200903171622_0001</a></td><td>hdfsadm</td><td>wordcount</td><td>100.00%<table border="1" width="80"><tbody><tr><td cellspacing="0" class="perc_filled" width="100%"></td></tr></tbody></table></td><td>5</td><td>5</td><td>100.00%<table border="1" width="80"><tbody><tr><td cellspacing="0" class="perc_filled" width="100%"></td></tr></tbody></table></td><td>1</td><td> 1</td></tr>
     316
     317</tbody></table>
     318</center>
     319
     320
     321<hr>
     322
     323
     324}}}
     325
    260326 * http://gm2.nchc.org.tw:50075/browseDirectory.jsp?dir=%2Fuser%2Froot&namenodeInfoPort=50070
    261327   * 可以看到輸出結果
    262    
     328
    263329== 3.5 停止hadoop ==
    264330{{{
     
    269335{{{
    270336$ bin/stop-all.sh
    271 $ dsh -a rm -rf /root/hadoop/* /tmp/hadoop-root*
     337$ dsh -a rm -rf ~/hdfs/logs/* ~/hdfs/pids/* /tmp/hadoop/*
    272338$ bin/hadoop namenode -format
    273339$ bin/start-all.sh
     
    2813471. 在drbl系統新增帳號 huser
    282348{{{
    283 <root>$ /opt/drbl/sbin/drbl-useradd -s huser huser
    284 }}}
    285 2. 用hdfs的superuser(此篇文章為root)在hdfs上建立資料夾
    286 {{{
    287 <root>$ /opt/hadoop/bin/hadoop dfs -mkdir /user/huser
     349$ su -
     350$ /opt/drbl/sbin/drbl-useradd -s huser huser
     351}}}
     3522. 用hdfs的superuser(此篇文章為hdfsadm)在hdfs上建立資料夾
     353{{{
     354$ su - hdfsadm
     355$ /opt/hadoop/bin/hadoop dfs -mkdir /user/huser
    288356}}}
    2893573. 用superuser 設定hdfs上該資料夾的權限與擁有者
    290358{{{
    291 <root>$ /opt/hadoop/bin/hadoop dfs -chown -R huser /user/huser
    292 <root>$ /opt/hadoop/bin/hadoop dfs -chmod -R 775 /user/huser
     359$ /opt/hadoop/bin/hadoop dfs -chown -R huser /user/huser
     360$ /opt/hadoop/bin/hadoop dfs -chmod -R 775 /user/huser
    293361}}}
    2943624. 測試:用huser瀏覽或寫入檔案
     
    300368}}}
    301369
    302 == 4.1 多帳號 ==
     370== 4.2 多帳號執行 ==
    303371
    304372 * 測試兩個user: rock , waue 同時執行,沒有問題