improve example doc

jackylk · jackylk · commit 220e7e9dee7b · 2015-07-21T11:56:14.000-07:00
diff --git a/examples/spark-hbase-demo1.md b/examples/spark-hbase-demo1.md
@@ -1,28 +1,34 @@
-## Create and query SparkSQL table map to HBase (support multiple columns mapping to hbase rowkey)
-(1)TableName :
+## Create and query SparkSQL table map to HBase table (multiple columns map to hbase table rowkey)
+In this example, we create a new SparkSQL table and map it to a new HBase table with multiple column in rowkey.
+
+Steps:
+(1) Create table in hbase-sql shell:
+```
+$SPARK_HBASE_Home/bin/hbase-sql
+CREATE TABLE teacher1k(grade int, class int, subject string, teacher_name string, teacher_age int, PRIMARY KEY (grade, class, subject)) MAPPED BY (hbase1k, COLS=[teacher_name=teacher.name, teacher_age=teacher.age]);
+```
+
+This command will create following tables:
+Tables :
   spark :  teacher1k
   hbase :  hbase1k
   
-(2)Fields :
+Fields :
   [grade,int]
   [class,int]
   [subject,string]
   [teacher_name,string]
   [teacher_age,int]
 
-  keyCols : grade,class,subject
-
-(3) Create table:
-```
-CREATE TABLE teacher1k(grade int, class int, subject string, teacher_name string, teacher_age int, PRIMARY KEY (grade, class, subject)) MAPPED BY (hbase1k, COLS=[teacher_name=teacher.name, teacher_age=teacher.age]);
-```
-
-(4) Load data :
+  key columns : grade,class,subject
+  non-key colums: teacher_name, teacher_age
+  
+(2) Load data from a csv data file:
 ```
 LOAD DATA INPATH './examples/teacher1k.csv' INTO TABLE teacher1k FIELDS TERMINATED BY "," ;
 ```
 
-(5) Query :
+(3) Query :
 ```
     // test where
     (1) select teacher_name,teacher_age from teacher1k where teacher_age > 25;
@@ -33,7 +39,7 @@ LOAD DATA INPATH './examples/teacher1k.csv' INTO TABLE teacher1k FIELDS TERMINAT
     // test subquery
     (3) select t1.teacher_name,t1.teacher_age from (select * from teacher1k where teacher_name like 'teacher_2_3%') t1 where t1.teacher_age < 25
 
-    //test group
+    //test group by
     (4) select teacher_name, sum(teacher_age) from teacher1k where grade=1 group by teacher_name
 
     //test join
diff --git a/examples/spark-hbase-demo2.md b/examples/spark-hbase-demo2.md
@@ -1,12 +1,17 @@
-## Create spark sql table map to existing hbase (only single column mapping to hbase rowkey is supported)
-(1) Create table in hbase, populate data
+## Create spark sql table map to existing hbase 
+In this example, we create SparkSQL table and map it to a existing HBase table. (a single column map to hbase rowkey)
+
+Steps:
+(1) Create table and populate data in HBase shell
 ```
+$HBase_Home/bin/hbase shell
 create 'hbase10k', 'f'
 for i in '1'..'10000' do for j in '1'..'2' do put 'hbase10k', "row#{i}", "f:c#{j}", "#{i}#{j}" end end
 ```   
 
-(2) Map hbase table with sparksql table
+(2) Map hbase table with sparksql table in hbase-sql shell
 ```
+$SPARK_HBASE_Home/bin/hbase-sql
 CREATE TABLE spark10k(rowkey STRING, a INTEGER, b INTEGER, PRIMARY KEY (rowkey)) MAPPED BY (hbase10k, COLS=[a=f.c1, b=f.c2]);
 ```
 
diff --git a/examples/spark-hbase-demo3.md b/examples/spark-hbase-demo3.md
@@ -1,27 +1,19 @@
-## Create spark sql table map to existing hbase (only single column mapping to hbase rowkey is supported)
-(1) Create table in hbase, populate data
-```
-
+## Similar to demo 1, but with larger sample file
+In this example, we create a new SparkSQL table and map it to a new HBase table with multiple column in rowkey.
 
-(2) Map hbase table with sparksql table
+(2) Create table in SparkSQL and in HBase 
 ```
+$SPARK_HBASE_HOME/bin/hbase-sql
 CREATE TABLE sales1m(id STRING, product STRING, region STRING, sales INTEGER, quantity INTEGER, PRIMARY KEY (id, product, region)) MAPPED BY (hbase_sales1m, COLS=[sales=f.sales, quantity=f.quantity]);
 CREATE TABLE sales1m_onekey(id STRING, product STRING, region STRING, sales INTEGER, quantity INTEGER, PRIMARY KEY (id)) MAPPED BY (hbase_sales1m_onekey, COLS=[product=f.product, region=f.region, sales=f.sales, quantity=f.quantity]);
-
-CREATE TABLE sales10m(id STRING, product STRING, region STRING, sales INTEGER, quantity INTEGER, PRIMARY KEY (id, product, region)) MAPPED BY (hbase_sales10m, COLS=[sales=f.sales, quantity=f.quantity]);
-CREATE TABLE sales10m_onekey(id STRING, product STRING, region STRING, sales INTEGER, quantity INTEGER, PRIMARY KEY (id)) MAPPED BY (hbase_sales10m_onekey, COLS=[product=f.product, region=f.region, sales=f.sales, quantity=f.quantity]);
 ```
 
 (4) Load data :
 ```
 LOAD DATA INPATH './examples/sales1m.csv' INTO TABLE sales1m FIELDS TERMINATED BY "," ;
 LOAD DATA INPATH './examples/sales1m.csv' INTO TABLE sales1m_onekey FIELDS TERMINATED BY "," ;
-
-LOAD DATA INPATH './examples/sales10m.csv' INTO TABLE sales10m FIELDS TERMINATED BY "," ;
-LOAD DATA INPATH './examples/sales10m.csv' INTO TABLE sales10m_onekey FIELDS TERMINATED BY "," ;
 ```
 
-
 (3) Query:
 ```
    // test count *