|   | 1 | ◢ <[wiki:III140412/Lab19 實作十九]> | <[wiki:III140412 回課程大綱]> ▲ | <[wiki:III140412/Lab21 實作二十一]> ◣ | 
                  
                          |   | 2 |  | 
                  
                          |   | 3 | = 實作二十 Lab20 = | 
                  
                          |   | 4 |  | 
                  
                          |   | 5 | {{{ | 
                  
                          |   | 6 | #!html | 
                  
                          |   | 7 | <p style="text-align: center;"><big style="font-weight: bold;"><big>預設的輸入格式<br/>TextInputFormat</big></big></p> | 
                  
                          |   | 8 | }}} | 
                  
                          |   | 9 |  | 
                  
                          |   | 10 | [[PageOutline]] | 
                  
                          |   | 11 |  | 
                  
                          |   | 12 | {{{ | 
                  
                          |   | 13 | #!text | 
                  
                          |   | 14 | 請先連線至 nodeN.3du.me , N 為您的報名編號 | 
                  
                          |   | 15 | }}} | 
                  
                          |   | 16 |  | 
                  
                          |   | 17 |  * 為了觀察 !FileInputFormat 的行為,我們使用 update jar 的技巧,對 !TextInputFormat.java 做了小幅度的修改。 | 
                  
                          |   | 18 |  * 官方實作的 !TextInputFormat.java 有兩個(一個新版,一個舊版) | 
                  
                          |   | 19 | {{{ | 
                  
                          |   | 20 | lab@node1:~$ find /home/user/hadoop/src/ -name "TextInputFormat.java" | 
                  
                          |   | 21 | /home/user/hadoop/src/mapred/org/apache/hadoop/mapreduce/lib/input/TextInputFormat.java | 
                  
                          |   | 22 | /home/user/hadoop/src/mapred/org/apache/hadoop/mapred/TextInputFormat.java | 
                  
                          |   | 23 | }}} | 
                  
                          |   | 24 |  * 這裡我們修改新版的 !TextInputFormat.java | 
                  
                          |   | 25 | {{{ | 
                  
                          |   | 26 | #!diff | 
                  
                          |   | 27 | --- /home/user/hadoop/src/mapred/org/apache/hadoop/mapreduce/lib/input/TextInputFormat.java   2012-10-03 13:17:16.000000000 +0800 | 
                  
                          |   | 28 | +++ /home/user/hadoop_labs/lab011/src/TextInputFormat.java      2013-10-19 11:25:16.419320587 +0800 | 
                  
                          |   | 29 | @@ -38,11 +38,13 @@ | 
                  
                          |   | 30 |    public RecordReader<LongWritable, Text> | 
                  
                          |   | 31 |      createRecordReader(InputSplit split, | 
                  
                          |   | 32 |                         TaskAttemptContext context) { | 
                  
                          |   | 33 | +    System.err.println("TextInputFormat.createRecordReader()"); | 
                  
                          |   | 34 |      return new LineRecordReader(); | 
                  
                          |   | 35 |    } | 
                  
                          |   | 36 |  | 
                  
                          |   | 37 |    @Override | 
                  
                          |   | 38 |    @Override | 
                  
                          |   | 39 |    protected boolean isSplitable(JobContext context, Path file) { | 
                  
                          |   | 40 | +    System.err.println("TextInputFormat.isSplitable(context," + file.toString() + ")"); | 
                  
                          |   | 41 |      CompressionCodec codec = | 
                  
                          |   | 42 |        new CompressionCodecFactory(context.getConfiguration()).getCodec(file); | 
                  
                          |   | 43 |      return codec == null; | 
                  
                          |   | 44 | }}} | 
                  
                          |   | 45 |  * 讓我們先來觀察一下執行的結果 | 
                  
                          |   | 46 | {{{ | 
                  
                          |   | 47 | unset HADOOP_CONF_DIR | 
                  
                          |   | 48 | cd ~/hadoop_labs/lab011 | 
                  
                          |   | 49 | ant | 
                  
                          |   | 50 | cd ~/hadoop_labs/lab010 | 
                  
                          |   | 51 | mkdir -p my_input | 
                  
                          |   | 52 | echo "A B C D" > my_input/input1 | 
                  
                          |   | 53 | echo "C D A B" > my_input/input2 | 
                  
                          |   | 54 | hadoop fs -put my_input my_input | 
                  
                          |   | 55 | sed -i 's#setNumReduceTasks(0)#setNumReduceTasks(1)#g' ~/hadoop_labs/lab010/src/WordCount.java | 
                  
                          |   | 56 | ant | 
                  
                          |   | 57 | hadoop jar WordCount.jar my_input my_output | 
                  
                          |   | 58 | }}} | 
                  
                          |   | 59 |  | 
                  
                          |   | 60 |  *  | 
                  
                          |   | 61 | {{{ | 
                  
                          |   | 62 | export HADOOP_CONF_DIR=~/hadoop/conf.local/ | 
                  
                          |   | 63 | hadoop jar WordCount.jar my_input my_output | 
                  
                          |   | 64 | unset HADOOP_CONF_DIR | 
                  
                          |   | 65 | }}} | 
                  
                          |   | 66 |  | 
                  
                          |   | 67 |  * Reference: | 
                  
                          |   | 68 |   1. http://hadoop.apache.org/docs/r1.0.4/api/org/apache/hadoop/mapreduce/InputFormat.html | 
                  
                          |   | 69 |   2. http://hadoop.apache.org/docs/r1.0.4/api/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.html | 
                  
                          |   | 70 |   3. http://hadoop.apache.org/docs/r1.0.4/api/org/apache/hadoop/mapreduce/lib/input/TextInputFormat.html | 
                  
                          |   | 71 |  | 
                  
                          |   | 72 | == 實作習題 == | 
                  
                          |   | 73 |  | 
                  
                          |   | 74 |  <問題 1> 當運行於全分散式模式,請問執行任務時,您觀察到幾行 "!TextInputFormat.isSplitable" | 
                  
                          |   | 75 |  | 
                  
                          |   | 76 |  <問題 2> 當運行於全分散式模式,請問執行任務時,您觀察到幾行 "!TextInputFormat.createRecordReader" | 
                  
                          |   | 77 |  | 
                  
                          |   | 78 |  <問題 3> 當運行於單機模式,請問執行任務時,您觀察到幾行 "!TextInputFormat.isSplitable" | 
                  
                          |   | 79 |  | 
                  
                          |   | 80 |  <問題 4> 當運行於單機模式,請問執行任務時,您觀察到幾行 "!TextInputFormat.createRecordReader" |