|   | 1 | [[PageOutline]] | 
                  
                          |   | 2 |  | 
                  
                          |   | 3 | ◢ <[wiki:III140614/Lab10 實作十]> | <[wiki:III140614 回課程大綱]> ▲ | <[wiki:III140614/Lab12 實作十二]> ◣ | 
                  
                          |   | 4 |  | 
                  
                          |   | 5 | = 實作十一 Lab 11 = | 
                  
                          |   | 6 |  | 
                  
                          |   | 7 | {{{ | 
                  
                          |   | 8 | #!html | 
                  
                          |   | 9 | <div style="text-align: center;"><big style="font-weight: bold;"><big>練習豬的拉丁語<br/>Pig Latin in Practice</big></big></div> | 
                  
                          |   | 10 | }}} | 
                  
                          |   | 11 |  | 
                  
                          |   | 12 | {{{ | 
                  
                          |   | 13 | #!text | 
                  
                          |   | 14 | 以下練習,請連線至 hadoop.3du.me 操作。底下的 userXX 等於您的用戶名稱。 | 
                  
                          |   | 15 | }}} | 
                  
                          |   | 16 |  | 
                  
                          |   | 17 | == Aggregation (Local Mode) == | 
                  
                          |   | 18 |  | 
                  
                          |   | 19 | {{{ | 
                  
                          |   | 20 | ~$ wget http://www.hadoop.tw/excite-small.log | 
                  
                          |   | 21 | ~$ pig -x local | 
                  
                          |   | 22 | grunt> log = LOAD 'excite-small.log' AS (user, timestamp, query); | 
                  
                          |   | 23 | grunt> grpd = GROUP log BY user; | 
                  
                          |   | 24 | grunt> cntd = FOREACH grpd GENERATE group, COUNT(log); | 
                  
                          |   | 25 | grunt> STORE cntd INTO 'lab8_out1'; | 
                  
                          |   | 26 | grunt> quit | 
                  
                          |   | 27 | ~$ head lab8_out1/part-* | 
                  
                          |   | 28 | }}} | 
                  
                          |   | 29 |  | 
                  
                          |   | 30 | == Filter (Local Mode) == | 
                  
                          |   | 31 |  | 
                  
                          |   | 32 | {{{ | 
                  
                          |   | 33 | ~$ pig -x local | 
                  
                          |   | 34 | grunt> log = LOAD 'excite-small.log' AS (user, timestamp, query); | 
                  
                          |   | 35 | grunt> grpd = GROUP log BY user; | 
                  
                          |   | 36 | grunt> cntd = FOREACH grpd GENERATE group, COUNT(log) AS cnt; | 
                  
                          |   | 37 | grunt> fltrd = FILTER cntd BY cnt > 50; | 
                  
                          |   | 38 | grunt> STORE fltrd INTO 'lab8_out2'; | 
                  
                          |   | 39 | grunt> quit | 
                  
                          |   | 40 | ~$ head lab8_out2/part-* | 
                  
                          |   | 41 | }}} | 
                  
                          |   | 42 |  | 
                  
                          |   | 43 | == Sorting (Local Mode) == | 
                  
                          |   | 44 |  | 
                  
                          |   | 45 | {{{ | 
                  
                          |   | 46 | ~$ pig -x local | 
                  
                          |   | 47 | grunt> log = LOAD 'excite-small.log' AS (user, timestamp, query); | 
                  
                          |   | 48 | grunt> grpd = GROUP log BY user; | 
                  
                          |   | 49 | grunt> cntd = FOREACH grpd GENERATE group, COUNT(log) AS cnt; | 
                  
                          |   | 50 | grunt> fltrd = FILTER cntd BY cnt > 50; | 
                  
                          |   | 51 | grunt> srtd = ORDER fltrd BY cnt; | 
                  
                          |   | 52 | grunt> STORE srtd INTO 'lab8_out3'; | 
                  
                          |   | 53 | grunt> quit | 
                  
                          |   | 54 | ~$ head lab8_out3/part-* | 
                  
                          |   | 55 | }}} | 
                  
                          |   | 56 |  | 
                  
                          |   | 57 | == Connect Pig to Hadoop (Full Distributed Mode) == | 
                  
                          |   | 58 |  | 
                  
                          |   | 59 | {{{ | 
                  
                          |   | 60 | ~$ hadoop fs -put excite-small.log . | 
                  
                          |   | 61 | ~$ pig | 
                  
                          |   | 62 | grunt> log = LOAD 'excite-small.log' AS (user, timestamp, query); | 
                  
                          |   | 63 | grunt> grpd = GROUP log BY user; | 
                  
                          |   | 64 | grunt> cntd = FOREACH grpd GENERATE group, COUNT(log); | 
                  
                          |   | 65 | grunt> STORE cntd INTO 'lab8_out1'; | 
                  
                          |   | 66 | grunt> quit | 
                  
                          |   | 67 | ~$ hadoop fs -cat lab8_out1/part-00000 | 
                  
                          |   | 68 | }}} |