|   | 1 | ◢ <[wiki:Hinet120814/Lab10 實作十]> | <[wiki:Hinet120814 回課程大綱]> ▲ | <[wiki:Hinet120814/Lab12 實作十二]> ◣ | 
                  
                          |   | 2 |  | 
                  
                          |   | 3 | = 實作十一 Lab 11 = | 
                  
                          |   | 4 | [[PageOutline]] | 
                  
                          |   | 5 | {{{ | 
                  
                          |   | 6 | #!html | 
                  
                          |   | 7 | <div style="text-align: center;"><big style="font-weight: bold;"><big>練習豬的拉丁語<br/>Pig Latin in Practice</big></big></div> | 
                  
                          |   | 8 | }}} | 
                  
                          |   | 9 |  | 
                  
                          |   | 10 | {{{ | 
                  
                          |   | 11 | #!text | 
                  
                          |   | 12 | 以下練習,請連線至 hadoop.nchc.org.tw 操作。底下的 hXXXX 等於您的用戶名稱。 | 
                  
                          |   | 13 | }}} | 
                  
                          |   | 14 |  | 
                  
                          |   | 15 | == Aggregation (Local Mode) == | 
                  
                          |   | 16 |  | 
                  
                          |   | 17 | {{{ | 
                  
                          |   | 18 | ~$ wget http://hadoop.nchc.org.tw/excite-small.log | 
                  
                          |   | 19 | ~$ pig -x local | 
                  
                          |   | 20 | grunt> log = LOAD 'excite-small.log' AS (user, timestamp, query); | 
                  
                          |   | 21 | grunt> grpd = GROUP log BY user; | 
                  
                          |   | 22 | grunt> cntd = FOREACH grpd GENERATE group, COUNT(log); | 
                  
                          |   | 23 | grunt> STORE cntd INTO 'lab8_out1'; | 
                  
                          |   | 24 | grunt> quit | 
                  
                          |   | 25 | ~$ head lab8_out1 | 
                  
                          |   | 26 | }}} | 
                  
                          |   | 27 |  | 
                  
                          |   | 28 | == Filter (Local Mode) == | 
                  
                          |   | 29 |  | 
                  
                          |   | 30 | {{{ | 
                  
                          |   | 31 | ~$ pig -x local | 
                  
                          |   | 32 | grunt> log = LOAD 'excite-small.log' AS (user, timestamp, query); | 
                  
                          |   | 33 | grunt> grpd = GROUP log BY user; | 
                  
                          |   | 34 | grunt> cntd = FOREACH grpd GENERATE group, COUNT(log) AS cnt; | 
                  
                          |   | 35 | grunt> fltrd = FILTER cntd BY cnt > 50; | 
                  
                          |   | 36 | grunt> STORE fltrd INTO 'lab8_out2'; | 
                  
                          |   | 37 | grunt> quit | 
                  
                          |   | 38 | ~$ head lab8_out2 | 
                  
                          |   | 39 | }}} | 
                  
                          |   | 40 |  | 
                  
                          |   | 41 | == Sorting (Local Mode) == | 
                  
                          |   | 42 |  | 
                  
                          |   | 43 | {{{ | 
                  
                          |   | 44 | ~$ pig -x local | 
                  
                          |   | 45 | grunt> log = LOAD 'excite-small.log' AS (user, timestamp, query); | 
                  
                          |   | 46 | grunt> grpd = GROUP log BY user; | 
                  
                          |   | 47 | grunt> cntd = FOREACH grpd GENERATE group, COUNT(log) AS cnt; | 
                  
                          |   | 48 | grunt> fltrd = FILTER cntd BY cnt > 50; | 
                  
                          |   | 49 | grunt> srtd = ORDER fltrd BY cnt; | 
                  
                          |   | 50 | grunt> STORE srtd INTO 'lab8_out3'; | 
                  
                          |   | 51 | grunt> quit | 
                  
                          |   | 52 | ~$ head lab8_out3 | 
                  
                          |   | 53 | }}} | 
                  
                          |   | 54 |  | 
                  
                          |   | 55 | == Connect Pig to Hadoop (Full Distributed Mode) == | 
                  
                          |   | 56 |  | 
                  
                          |   | 57 | {{{ | 
                  
                          |   | 58 | ~$ hadoop fs -put excite-small.log . | 
                  
                          |   | 59 | ~$ pig | 
                  
                          |   | 60 | grunt> log = LOAD 'excite-small.log' AS (user, timestamp, query); | 
                  
                          |   | 61 | grunt> grpd = GROUP log BY user; | 
                  
                          |   | 62 | grunt> cntd = FOREACH grpd GENERATE group, COUNT(log); | 
                  
                          |   | 63 | grunt> STORE cntd INTO 'lab8_out1'; | 
                  
                          |   | 64 | grunt> quit | 
                  
                          |   | 65 | ~$ hadoop fs -cat lab8_out1/part-00000 | 
                  
                          |   | 66 | }}} |