|
1 | 1 | ## Create spark sql table map to existing hbase (only single column mapping to hbase rowkey is supported)
|
2 | 2 | (1) Create table in hbase, populate data
|
3 | 3 | ```
|
4 |
| -create 'hbase10k', 'f' |
5 |
| -for i in '1'..'10000' do for j in '1'..'2' do put 'hbase10k', "row#{i}", "f:c#{j}", "#{i}#{j}" end end |
6 |
| -``` |
| 4 | +
|
7 | 5 |
|
8 | 6 | (2) Map hbase table with sparksql table
|
9 | 7 | ```
|
10 |
| -CREATE TABLE spark10k(rowkey STRING, a INTEGER, b INTEGER, PRIMARY KEY (rowkey)) MAPPED BY (hbase10k, COLS=[a=f.c1, b=f.c2]); |
| 8 | +CREATE TABLE sales1m(id STRING, product STRING, region STRING, sales INTEGER, quantity INTEGER, PRIMARY KEY (id, product, region)) MAPPED BY (hbase_sales1m, COLS=[sales=f.sales, quantity=f.quantity]); |
| 9 | +CREATE TABLE sales1m_onekey(id STRING, product STRING, region STRING, sales INTEGER, quantity INTEGER, PRIMARY KEY (id)) MAPPED BY (hbase_sales1m_onekey, COLS=[product=f.product, region=f.region, sales=f.sales, quantity=f.quantity]); |
| 10 | + |
| 11 | +CREATE TABLE sales10m(id STRING, product STRING, region STRING, sales INTEGER, quantity INTEGER, PRIMARY KEY (id, product, region)) MAPPED BY (hbase_sales10m, COLS=[sales=f.sales, quantity=f.quantity]); |
| 12 | +CREATE TABLE sales10m_onekey(id STRING, product STRING, region STRING, sales INTEGER, quantity INTEGER, PRIMARY KEY (id)) MAPPED BY (hbase_sales10m_onekey, COLS=[product=f.product, region=f.region, sales=f.sales, quantity=f.quantity]); |
| 13 | +``` |
| 14 | +
|
| 15 | +(4) Load data : |
11 | 16 | ```
|
| 17 | +LOAD DATA INPATH './examples/sales1m.csv' INTO TABLE sales1m FIELDS TERMINATED BY "," ; |
| 18 | +LOAD DATA INPATH './examples/sales1m.csv' INTO TABLE sales1m_onekey FIELDS TERMINATED BY "," ; |
| 19 | + |
| 20 | +LOAD DATA INPATH './examples/sales10m.csv' INTO TABLE sales10m FIELDS TERMINATED BY "," ; |
| 21 | +LOAD DATA INPATH './examples/sales10m.csv' INTO TABLE sales10m_onekey FIELDS TERMINATED BY "," ; |
| 22 | +``` |
| 23 | +
|
12 | 24 |
|
13 | 25 | (3) Query:
|
14 | 26 | ```
|
15 | 27 | // test count *
|
16 |
| - (1) select count(*) from spark10k |
| 28 | + (1) select count(*) from sales1m |
17 | 29 |
|
18 | 30 | // test group by
|
19 |
| - (2) select avg(a), b from spark10k group by b |
| 31 | + (2) select product, region, avg(sales) from sales1m where product="product4" group by product, region; |
20 | 32 | ```
|
0 commit comments