| | 1 | [[PageOutline]] |
| | 2 | |
| | 3 | ◢ <[wiki:NCTU110329/Lab3 實作三]> | <[wiki:NCTU110329 回課程大綱]> ▲ | <[wiki:NCTU110329/Lab5 實作五]> ◣ |
| | 4 | |
| | 5 | = 實作四 Lab 4 = |
| | 6 | |
| | 7 | {{{ |
| | 8 | #!html |
| | 9 | <div style="text-align: center;"><big style="font-weight: bold;"><big>HDFS Shell操作練習<br/>HDFS Shell in practice</big></big></div> |
| | 10 | }}} |
| | 11 | |
| | 12 | == Content 1: HDFS Shell 基本操作 == |
| | 13 | == Content 1: Basic HDFS Shell Commands == |
| | 14 | |
| | 15 | === 1.1 瀏覽你HDFS目錄 === |
| | 16 | === 1.1 Browsing Your HDFS Folder === |
| | 17 | |
| | 18 | {{{ |
| | 19 | /opt/hadoop$ bin/hadoop fs -ls |
| | 20 | /opt/hadoop$ bin/hadoop fs -lsr |
| | 21 | }}} |
| | 22 | |
| | 23 | === 1.2 上傳資料到 HDFS 目錄 === |
| | 24 | === 1.2 Upload Files or Folder to HDFS === |
| | 25 | |
| | 26 | * 上傳 Upload |
| | 27 | |
| | 28 | {{{ |
| | 29 | /opt/hadoop$ bin/hadoop fs -put conf input |
| | 30 | }}} |
| | 31 | |
| | 32 | * 檢查 Check |
| | 33 | |
| | 34 | {{{ |
| | 35 | /opt/hadoop$ bin/hadoop fs -ls |
| | 36 | /opt/hadoop$ bin/hadoop fs -ls input |
| | 37 | }}} |
| | 38 | |
| | 39 | === 1.3 下載 HDFS 的資料到本地目錄 === |
| | 40 | === 1.3 Download HDFS Files or Folder to Local === |
| | 41 | |
| | 42 | * 下載 Download |
| | 43 | |
| | 44 | {{{ |
| | 45 | /opt/hadoop$ bin/hadoop fs -get input fromHDFS |
| | 46 | }}} |
| | 47 | |
| | 48 | * 檢查 Check |
| | 49 | |
| | 50 | {{{ |
| | 51 | /opt/hadoop$ ls -al | grep fromHDFS |
| | 52 | /opt/hadoop$ ls -al fromHDFS |
| | 53 | }}} |
| | 54 | |
| | 55 | === 1.4 刪除檔案 === |
| | 56 | === 1.4 Remove Files or Folder === |
| | 57 | |
| | 58 | {{{ |
| | 59 | /opt/hadoop$ bin/hadoop fs -ls input |
| | 60 | /opt/hadoop$ bin/hadoop fs -rm input/masters |
| | 61 | }}} |
| | 62 | |
| | 63 | === 1.5 直接看檔案 === |
| | 64 | === 1.5 Browse Files Directly === |
| | 65 | |
| | 66 | {{{ |
| | 67 | /opt/hadoop$ bin/hadoop fs -ls input |
| | 68 | /opt/hadoop$ bin/hadoop fs -cat input/slaves |
| | 69 | }}} |
| | 70 | |
| | 71 | === 1.6 更多指令操作 === |
| | 72 | === 1.6 More Commands -- Help message === |
| | 73 | |
| | 74 | {{{ |
| | 75 | hadooper@vPro:/opt/hadoop$ bin/hadoop fs |
| | 76 | |
| | 77 | Usage: java FsShell |
| | 78 | [-ls <path>] |
| | 79 | [-lsr <path>] |
| | 80 | [-du <path>] |
| | 81 | [-dus <path>] |
| | 82 | [-count[-q] <path>] |
| | 83 | [-mv <src> <dst>] |
| | 84 | [-cp <src> <dst>] |
| | 85 | [-rm <path>] |
| | 86 | [-rmr <path>] |
| | 87 | [-expunge] |
| | 88 | [-put <localsrc> ... <dst>] |
| | 89 | [-copyFromLocal <localsrc> ... <dst>] |
| | 90 | [-moveFromLocal <localsrc> ... <dst>] |
| | 91 | [-get [-ignoreCrc] [-crc] <src> <localdst>] |
| | 92 | [-getmerge <src> <localdst> [addnl]] |
| | 93 | [-cat <src>] |
| | 94 | [-text <src>] |
| | 95 | [-copyToLocal [-ignoreCrc] [-crc] <src> <localdst>] |
| | 96 | [-moveToLocal [-crc] <src> <localdst>] |
| | 97 | [-mkdir <path>] |
| | 98 | [-setrep [-R] [-w] <rep> <path/file>] |
| | 99 | [-touchz <path>] |
| | 100 | [-test -[ezd] <path>] |
| | 101 | [-stat [format] <path>] |
| | 102 | [-tail [-f] <file>] |
| | 103 | [-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...] |
| | 104 | [-chown [-R] [OWNER][:[GROUP]] PATH...] |
| | 105 | [-chgrp [-R] GROUP PATH...] |
| | 106 | [-help [cmd]] |
| | 107 | |
| | 108 | Generic options supported are |
| | 109 | -conf <configuration file> specify an application configuration file |
| | 110 | -D <property=value> use value for given property |
| | 111 | -fs <local|namenode:port> specify a namenode |
| | 112 | -jt <local|jobtracker:port> specify a job tracker |
| | 113 | -files <comma separated list of files> specify comma separated files to be copied to the map reduce cluster |
| | 114 | -libjars <comma separated list of jars> specify comma separated jar files to include in the classpath. |
| | 115 | -archives <comma separated list of archives> specify comma separated archives to be unarchived on the compute machines. |
| | 116 | The general command line syntax is |
| | 117 | bin/hadoop command [genericOptions] [commandOptions] |
| | 118 | }}} |
| | 119 | |
| | 120 | == Content 2: 使用網頁 GUI 瀏覽資訊 == |
| | 121 | == Content 2: User Web GUI to browse HDFS == |
| | 122 | |
| | 123 | * [http://localhost:50030 JobTracker Web Interface] |
| | 124 | * [http://localhost:50070 NameNode Web Interface] |
| | 125 | |
| | 126 | == Content 3: 更多 HDFS Shell 的用法 == |
| | 127 | == Content 3: More about HDFS Shell == |
| | 128 | |
| | 129 | * bin/hadoop fs <args> ,下面則列出 <args> 的用法[[BR]]Following are the examples of hadoop fs related commands. |
| | 130 | * 以下操作預設的目錄在 /user/<$username>/ 下[[BR]]By default, your working directory will be at /user/<$username>/. |
| | 131 | {{{ |
| | 132 | $ bin/hadoop fs -ls input |
| | 133 | Found 4 items |
| | 134 | -rw-r--r-- 2 hadooper supergroup 115045564 2009-04-02 11:51 /user/hadooper/input/1.txt |
| | 135 | -rw-r--r-- 2 hadooper supergroup 987864 2009-04-02 11:51 /user/hadooper/input/2.txt |
| | 136 | -rw-r--r-- 2 hadooper supergroup 1573048 2009-04-02 11:51 /user/hadooper/input/3.txt |
| | 137 | -rw-r--r-- 2 hadooper supergroup 25844527 2009-04-02 11:51 /user/hadooper/input/4.txt |
| | 138 | }}} |
| | 139 | * 完整的路徑則是 '''hdfs://node:port/path''' 如:[[BR]]Or you have to give a __''absolute path''__, such as '''hdfs://node:port/path''' |
| | 140 | {{{ |
| | 141 | $ bin/hadoop fs -ls hdfs://gm1.nchc.org.tw:9000/user/hadooper/input |
| | 142 | Found 4 items |
| | 143 | -rw-r--r-- 2 hadooper supergroup 115045564 2009-04-02 11:51 /user/hadooper/input/1.txt |
| | 144 | -rw-r--r-- 2 hadooper supergroup 987864 2009-04-02 11:51 /user/hadooper/input/2.txt |
| | 145 | -rw-r--r-- 2 hadooper supergroup 1573048 2009-04-02 11:51 /user/hadooper/input/3.txt |
| | 146 | -rw-r--r-- 2 hadooper supergroup 25844527 2009-04-02 11:51 /user/hadooper/input/4.txt |
| | 147 | }}} |
| | 148 | |
| | 149 | === -cat === |
| | 150 | |
| | 151 | * 將路徑指定文件的內容輸出到 STDOUT [[BR]] Print given file content to STDOUT |
| | 152 | {{{ |
| | 153 | $ bin/hadoop fs -cat quota/hadoop-env.sh |
| | 154 | }}} |
| | 155 | |
| | 156 | === -chgrp === |
| | 157 | |
| | 158 | * 改變文件所屬的組 [[BR]] Change '''owner group''' of given file or folder |
| | 159 | {{{ |
| | 160 | $ bin/hadoop fs -chgrp -R hadooper own |
| | 161 | }}} |
| | 162 | |
| | 163 | === -chmod === |
| | 164 | |
| | 165 | * 改變文件的權限 [[BR]] Change '''read and write permission''' of given file or folder |
| | 166 | {{{ |
| | 167 | $ bin/hadoop fs -chmod -R 755 own |
| | 168 | }}} |
| | 169 | |
| | 170 | === -chown === |
| | 171 | |
| | 172 | * 改變文件的擁有者 [[BR]] Change '''owner''' of given file or folder |
| | 173 | {{{ |
| | 174 | $ bin/hadoop fs -chown -R hadooper own |
| | 175 | }}} |
| | 176 | |
| | 177 | === -copyFromLocal, -put === |
| | 178 | |
| | 179 | * 從 local 放檔案到 hdfs [[BR]] Both commands will copy given file or folder from local to HDFS |
| | 180 | {{{ |
| | 181 | $ bin/hadoop fs -put input dfs_input |
| | 182 | }}} |
| | 183 | |
| | 184 | === -copyToLocal, -get === |
| | 185 | |
| | 186 | * 把hdfs上得檔案下載到 local [[BR]] Both commands will copy given file or folder from HDFS to local |
| | 187 | {{{ |
| | 188 | $ bin/hadoop fs -get dfs_input input1 |
| | 189 | }}} |
| | 190 | |
| | 191 | === -cp === |
| | 192 | |
| | 193 | * 將文件從 hdfs 原本路徑複製到 hdfs 目標路徑 [[BR]] Copy given file or folder from HDFS source path to HDFS target path |
| | 194 | {{{ |
| | 195 | $ bin/hadoop fs -cp own hadooper |
| | 196 | }}} |
| | 197 | |
| | 198 | === -du === |
| | 199 | |
| | 200 | * 顯示目錄中所有文件的大小 [[BR]] Display the size of files in given folder |
| | 201 | {{{ |
| | 202 | $ bin/hadoop fs -du input |
| | 203 | |
| | 204 | Found 4 items |
| | 205 | 115045564 hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/1.txt |
| | 206 | 987864 hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/2.txt |
| | 207 | 1573048 hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/3.txt |
| | 208 | 25844527 hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/4.txt |
| | 209 | }}} |
| | 210 | === -dus === |
| | 211 | |
| | 212 | * 顯示該目錄/文件的總大小 [[BR]] Display total size of given folder |
| | 213 | {{{ |
| | 214 | $ bin/hadoop fs -dus input |
| | 215 | |
| | 216 | hdfs://gm1.nchc.org.tw:9000/user/hadooper/input 143451003 |
| | 217 | }}} |
| | 218 | |
| | 219 | === -expunge === |
| | 220 | |
| | 221 | * 清空垃圾桶 [[BR]] Clean up Recycled |
| | 222 | {{{ |
| | 223 | $ bin/hadoop fs -expunge |
| | 224 | }}} |
| | 225 | |
| | 226 | === -getmerge === |
| | 227 | |
| | 228 | * 將來源目錄<src>下所有的文件都集合到本地端一個<localdst>檔案內 [[BR]] Merge all files in HDFS source folder <src> into one local file |
| | 229 | {{{ |
| | 230 | $ bin/hadoop fs -getmerge <src> <localdst> |
| | 231 | }}} |
| | 232 | {{{ |
| | 233 | $ echo "this is one; " >> in1/input |
| | 234 | $ echo "this is two; " >> in1/input2 |
| | 235 | $ bin/hadoop fs -put in1 in1 |
| | 236 | $ bin/hadoop fs -getmerge in1 merge.txt |
| | 237 | $ cat ./merge.txt |
| | 238 | }}} |
| | 239 | |
| | 240 | == -ls === |
| | 241 | |
| | 242 | * 列出文件或目錄的資訊 [[BR]] List files and folders |
| | 243 | * 文件名 <副本數> 文件大小 修改日期 修改時間 權限 用戶ID 組ID [[BR]] <file name> <replication> <size> <modified date> <modified time> <permission> <user id> <group id> |
| | 244 | * 目錄名 <dir> 修改日期 修改時間 權限 用戶ID 組ID [[BR]] <folder name> <modified date> <modified time> <permission> <user id> <group id> |
| | 245 | {{{ |
| | 246 | $ bin/hadoop fs -ls |
| | 247 | }}} |
| | 248 | |
| | 249 | === -lsr === |
| | 250 | |
| | 251 | * ls 命令的遞迴版本 [[BR]] list files and folders with recursive |
| | 252 | {{{ |
| | 253 | $ bin/hadoop fs -lsr / |
| | 254 | }}} |
| | 255 | |
| | 256 | === -mkdir === |
| | 257 | |
| | 258 | * 建立資料夾 [[BR]] create directories |
| | 259 | {{{ |
| | 260 | $ bin/hadoop fs -mkdir a b c |
| | 261 | }}} |
| | 262 | |
| | 263 | === -moveFromLocal === |
| | 264 | |
| | 265 | * 將 local 端的資料夾剪下移動到 hdfs 上 [[BR]] move local files or folder to HDFS ( it will delete local files or folder. ) |
| | 266 | {{{ |
| | 267 | $ bin/hadoop fs -moveFromLocal in1 in2 |
| | 268 | }}} |
| | 269 | |
| | 270 | === -mv === |
| | 271 | |
| | 272 | * 更改資料的名稱 [[BR]] Change file name or folder name. |
| | 273 | {{{ |
| | 274 | $ bin/hadoop fs -mv in2 in3 |
| | 275 | }}} |
| | 276 | |
| | 277 | === -rm === |
| | 278 | |
| | 279 | * 刪除指定的檔案(不可資料夾)[[BR]] Remove given files (not folders) |
| | 280 | {{{ |
| | 281 | $ bin/hadoop fs -rm in1/input |
| | 282 | }}} |
| | 283 | === -rmr === |
| | 284 | |
| | 285 | * 遞迴刪除資料夾(包含在內的所有檔案) [[BR]] Remove given files and folders with recursive |
| | 286 | {{{ |
| | 287 | $ bin/hadoop fs -rmr in1 |
| | 288 | }}} |
| | 289 | |
| | 290 | === -setrep === |
| | 291 | |
| | 292 | * 設定副本係數 [[BR]] setup replication numbers of given files or folder |
| | 293 | {{{ |
| | 294 | $ bin/hadoop fs -setrep [-R] [-w] <rep> <path/file> |
| | 295 | }}} |
| | 296 | {{{ |
| | 297 | $ bin/hadoop fs -setrep -w 2 -R input |
| | 298 | Replication 2 set: hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/1.txt |
| | 299 | Replication 2 set: hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/2.txt |
| | 300 | Replication 2 set: hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/3.txt |
| | 301 | Replication 2 set: hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/4.txt |
| | 302 | Waiting for hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/1.txt ... done |
| | 303 | Waiting for hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/2.txt ... done |
| | 304 | Waiting for hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/3.txt ... done |
| | 305 | Waiting for hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/4.txt ... done |
| | 306 | }}} |
| | 307 | |
| | 308 | === -stat === |
| | 309 | |
| | 310 | * 印出時間資訊 [[BR]] Print Status of time stamp of folder |
| | 311 | {{{ |
| | 312 | $ bin/hadoop fs -stat input |
| | 313 | 2009-04-02 03:51:29 |
| | 314 | }}} |
| | 315 | === -tail === |
| | 316 | |
| | 317 | * 將文件的最後 1K 內容輸出 [[BR]] Display the last 1K contents of given file |
| | 318 | * 用法 Usage |
| | 319 | {{{ |
| | 320 | bin/hadoop fs -tail [-f] 檔案 (-f 參數用來顯示如果檔案增大,則秀出被append上得內容) |
| | 321 | bin/hadoop fs -tail [-f] <path/file> (-f is used when file had appended) |
| | 322 | }}} |
| | 323 | {{{ |
| | 324 | $ bin/hadoop fs -tail input/1.txt |
| | 325 | }}} |
| | 326 | |
| | 327 | === -test === |
| | 328 | |
| | 329 | * 測試檔案, -e 檢查文件是否存在(1=存在, 0=否), -z 檢查文件是否為空(1=空, 0=不為空), -d 檢查是否為目錄(1=存在, 0=否) [[BR]] test files or folders [[BR]] -e : check if file or folder existed ( 1 = exist , 0 = false )[[BR]] -z : check if file is empty ( 1 = empty , 0 = false ) [[BR]] -d : check if given path is folder ( 1 = it's folder , 0 = false ) |
| | 330 | * 要用 echo $? 來看回傳值為 0 or 1 [[BR]] You have to use '''echo $?''' to get the return value |
| | 331 | * 用法 Usage |
| | 332 | {{{ |
| | 333 | $ bin/hadoop fs -test -[ezd] URI |
| | 334 | }}} |
| | 335 | |
| | 336 | {{{ |
| | 337 | $ bin/hadoop fs -test -e /user/hadooper/input/5.txt |
| | 338 | $ bin/hadoop fs -test -z /user/hadooper/input/5.txt |
| | 339 | test: File does not exist: /user/hadooper/input/5.txt |
| | 340 | $ bin/hadoop fs -test -d /user/hadooper/input/5.txt |
| | 341 | |
| | 342 | test: File does not exist: /user/hadooper/input/5.txt |
| | 343 | }}} |
| | 344 | |
| | 345 | === -text === |
| | 346 | |
| | 347 | * 將檔案(如壓縮檔, textrecordinputstream)輸出為純文字格式 [[BR]] Display archive file contents into STDOUT |
| | 348 | {{{ |
| | 349 | $ hadoop fs -text <src> |
| | 350 | }}} |
| | 351 | {{{ |
| | 352 | $ hadoop fs -text macadr-eth1.txt.gz |
| | 353 | 00:1b:fc:61:75:b1 |
| | 354 | 00:1b:fc:58:9c:23 |
| | 355 | }}} |
| | 356 | * ps : 目前沒支援zip的函式庫 [[BR]] PS. It does not support zip files yet. |
| | 357 | {{{ |
| | 358 | $ bin/hadoop fs -text b/a.txt.zip |
| | 359 | PK |
| | 360 | ���:��H{ |
| | 361 | a.txtUT b��Ib��IUx��sssss |
| | 362 | test |
| | 363 | PK |
| | 364 | ���:��H{ |
| | 365 | ��a.txtUTb��IUxPK@C |
| | 366 | }}} |
| | 367 | |
| | 368 | === -touchz === |
| | 369 | |
| | 370 | * 建立一個空文件 [[BR]] creat an empty file |
| | 371 | {{{ |
| | 372 | $ bin/hadoop fs -touchz b/kk |
| | 373 | $ bin/hadoop fs -test -z b/kk |
| | 374 | $ echo $? |
| | 375 | 1 |
| | 376 | $ bin/hadoop fs -test -z b/a.txt.zip |
| | 377 | $ echo $? |
| | 378 | 0 |
| | 379 | }}} |