wiki:waue/2010/0402

Version 6 (modified by waue, 9 years ago) (diff)

--

hadoop + Hbase + thrift + php
程式碼解析

零、前言

  • thrift 是透過非java的其他程式語言,直接對hbase 進行存取的中介函式庫
  • 此篇介紹的是如何用php透過 thrift 對 hbase 操作
hadoop /opt/hadoop
hbase /opt/hbase
網頁根目錄 /var/www/
hbase 的php碼目錄 /var/www/hbase
thrift php /var/www/hbase/thrift
  • 測試程式之前,請先確定
    • hbase , hadoop 都有正常運作中
    • $ bin/hbase thrift start 尚在執行

一、php引用thrift lib

<?
$GLOBALS['THRIFT_ROOT'] = '/var/www/hbase/thrift';

require_once( $GLOBALS['THRIFT_ROOT'].'/Thrift.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/transport/TSocket.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/transport/TBufferedTransport.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/protocol/TBinaryProtocol.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/packages/Hbase/Hbase.php' );

$socket = new TSocket( 'secuse.nchc.org.tw', 9090 );
$socket->setSendTimeout( 10000 ); // Ten seconds (too long for production, but this is just a demo ;)
$socket->setRecvTimeout( 20000 ); // Twenty seconds
$transport = new TBufferedTransport( $socket );
$protocol = new TBinaryProtocol( $transport );
$client = new HbaseClient( $protocol );
$transport->open();
?>
........
其他html 碼
<?
或是 下面提到的各式讀寫操作

?>
.......
<?
$transport->close();
?>
  • 所有的程式碼都必須包含這些引入函式庫、開啟關閉socket 的敘述

二、各種對hbase的操作

2.1 列出hbase 裡的所有 table

<?
echo( "listing tables...\n" );
$tables = $client->getTableNames();
sort( $tables );
foreach ( $tables as $name ) {
  echo( "  found: {$name}\n" );
}
}
?>

2.2 刪除table

<?
$name = "hbase table name";
if ($client->isTableEnabled( $name )) {
      echo( "    disabling table: {$name}\n");
      $client->disableTable( $name );
}
echo( "    deleting table: {$name}\n" );
    $client->deleteTable( $name );
}
?>

2.3 新增table

  • 我們先定義columns 的物件結構如下
    <?
    $columns = array(
      new ColumnDescriptor( array(
        'name' => 'entry:',
        'maxVersions' => 10
      ) ),
      new ColumnDescriptor( array(
        'name' => 'unused:'
      ) )
    );
    ?>
    
  • 將剛剛的column 放到table 內
    <?
    $t = "table name";
    echo( "creating table: {$t}\n" );
    
    try {
      $client->createTable( $t, $columns );
    } catch ( AlreadyExists $ae ) {
      echo( "WARN: {$ae->message}\n" );
    }
    
    ?>
    

2.4 列出 table內的家族成員 family

<?
$t = "table name";
echo( "column families in {$t}:\n" );

$descriptors = $client->getColumnDescriptors( $t );
asort( $descriptors );
foreach ( $descriptors as $col ) {
  echo( "  column: {$col->name}, maxVer: {$col->maxVersions}\n" );
}
?>

2.5 寫入資料

<?
$t = "table name";
$row = "row name"
$valid = "foobar-\xE7\x94\x9F\xE3\x83\x93";

$mutations = array(
  new Mutation( array(
    'column' => 'entry:foo',
    'value' => $valid
  ) ),
);

$client->mutateRow( $t, $row, $mutations );
?>

2.6 讀取資料

get 取得一個 column value

  • get 取得一個 column value 的用法
<?
$table_name = 't1';
$row_name = '1';
$fam_col_name = 'f1:c1';

  $arr = $client->get($table_name, $row_name , $fam_col_name);
  // $arr = array
  foreach ( $arr as $k=>$v  ) {
    // $k = TCell
    echo ("value = {$v->value} , <br>  "); 
    echo ("timestamp = {$v->timestamp}  <br>");
  }
}
?>

getRow 取得一整個row

  • getRow($tableName, $row) 用法
    <?
    $table_name = "table name";
    $row_name = "row name";
    
    $arr = $client->getRow($table_name, $row_name);
    // $client->getRow return a array
    foreach ( $arr as $k=>$TRowResult  ) {
        // $k = 0 ; non-use
        // $TRowResult = TRowResult
        printTRowResult($TRowResult);
    }
    ?>
    

scan 一整個table

<?
$table_name = 't1';
$start_row = ""; // 從row 的起點開始
$family = array( "f1","f2","f3" );


$scanner = $client->scannerOpen( $table_name, $start_row , $family );
// $scanner 是一個遞增數字 for open socket
// scannerGet() 一次只抓一row,因此要用while迴圈不斷地抓
while (true ){
    $get_arr = $client->scannerGet( $scanner );
    // get_arr is an array

    if($get_arr == null) break;
    // 沒有回傳值代表已經沒有資料可抓,跳脫此無限迴圈

    foreach ( $get_arr as $TRowResult ){
      // $TRowResult = TRowResult
      echo (" row = {$TRowResult->row} ; <br> ");
      $column = $TRowResult->columns;
      foreach ($column as $family_column=>$Tcell){
          echo ("family:column = $family_column ");
          // $family_column = family_column
          // $Tcell = Tcell
          echo (" value = {$Tcell->value} ");
          echo (" timestamp = {$Tcell->timestamp}  <br>");
      }
    }
}
echo( "<br> ----------------- " );
echo( "<br> Scanner finished <br>" );
$client->scannerClose( $scanner );
?>

補充

TCell

<?
class TCell {
  static $_TSPEC;

  public $value = null;
  public $timestamp = null;

  public function __construct($vals=null) {
    if (!isset(self::$_TSPEC)) {
      self::$_TSPEC = array(
      1 => array(
          'var' => 'value',
          'type' => TType::STRING,
      ),
      2 => array(
          'var' => 'timestamp',
          'type' => TType::I64,
      ),
      );
    }
    if (is_array($vals)) {
      if (isset($vals['value'])) {
        $this->value = $vals['value'];
      }
      if (isset($vals['timestamp'])) {
        $this->timestamp = $vals['timestamp'];
      }
    }
  }
}
?>

TRowResult

<?
class TRowResult {
  static $_TSPEC;

  public $row = null;
  public $columns = null;

  public function __construct($vals=null) {
    if (!isset(self::$_TSPEC)) {
      self::$_TSPEC = array(
      1 => array(
          'var' => 'row',
          'type' => TType::STRING,
      ),
      2 => array(
          'var' => 'columns',
          'type' => TType::MAP,
          'ktype' => TType::STRING,
          'vtype' => TType::STRUCT,
          'key' => array(
            'type' => TType::STRING,
      ),
          'val' => array(
            'type' => TType::STRUCT,
            'class' => 'TCell',
      ),
      ),
      );
    }
    if (is_array($vals)) {
      if (isset($vals['row'])) {
        $this->row = $vals['row'];
      }
      if (isset($vals['columns'])) {
        $this->columns = $vals['columns'];
      }
    }
  }
}
?>