Sqoop Cammand

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 8

-----------------------------------------------------

MYSQL COMMANDS
-----------------------------------------------------

How to install mysql:


-----------------------------------------------------
sudo apt-get install mysql-server mysql-client

How to un-install mysql:


-----------------------------------------------------
sudo apt-get autoremove mysql-server mysql-client

How to start mysql:


-----------------------------------------------------
sudo service mysql start

How to stop mysql:


-----------------------------------------------------
sudo service mysql stop

How to connect to mysql:


-----------------------------------------------------
mysql -u root -p

Export the data from `mysql database` to `sql file`


-----------------------------------------------------
mysqldump -u root -p deepak > /home/deepak/work/sqoop_inputs/deepak.sql
mysqldump -u root -p sqoop > /home/deepak/work/sqoop_inputs/sqoop.sql

Import the data from `sql file` to `mysql database`


-----------------------------------------------------
create database deepak;
create database sqoop;

mysql -u root -p deepak < /home/deepak/work/sqoop_inputs/deepak.sql


mysql -u root -p sqoop < /home/deepak/work/sqoop_inputs/sqoop.sql

-----------------------------------------------------
SQOOP COMMANDS
-----------------------------------------------------

VERIFY MYSQL DATA USING `SQOOP`


-----------------------------------------------------
sqoop list-databases \
--connect "jdbc:mysql://localhost:3306" \
--username root \
--password hadoop

sqoop list-tables \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username root \
--password hadoop
sqoop eval \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username root \
--password hadoop \
--query "select count(1) from order_items"

sqoop eval \
--options-file /home/deepak/work/sqoop_inputs/connection_details.txt \
--query "show tables"

IMPORT DATA FROM `RDBMS` TO `HDFS`


-----------------------------------------------------
sqoop import-all-tables \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username=root \
--password=hadoop \
--warehouse-dir=/sqoop/import-all-data/deepak.db \
--exclude-tables customers,order_items,orders,products

sqoop import \
--connect "jdbc:mysql://localhost:3306/sqoop" \
--username=root \
--password=hadoop \
--table pet \
--as-textfile \
--target-dir=/sqoop/import-data/sqoop.db/pet_m \
-m 1

sqoop import \
--connect "jdbc:mysql://localhost:3306/sqoop" \
--username=root \
--password=hadoop \
--table pet \
--as-sequencefile \
--target-dir=/sqoop/import-data/sqoop.db/pet_split_by \
--split-by name

SQOOP SUPPORTS DIFFERENT STORAGE OPTIONS


-----------------------------------------------------
--as-textfile
--as-sequencefile
--as-avrodatafile
--as-parquetfile

IMPORT DATA USING `boundary-query`, `columns` and `delete-target-dir`


-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/sqoop" \
--username=root \
--password=hadoop \
--table student \
--target-dir /sqoop/boundary-data/sqoop.db/student \
--delete-target-dir \
--boundary-query "select 1, 10 from student limit 1" \
--columns id,name,class \
-m 1

IMPORT DATA USING `boundary-query`, `append`,


`fields-terminated-by`, `lines-terminated-by`
-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/sqoop" \
--username=root \
--password=hadoop \
--table student \
--target-dir /sqoop/boundary-data/sqoop.db/student \
--boundary-query "select 11, 20 from student limit 1" \
--append \
--fields-terminated-by '|' \
--lines-terminated-by '\n' \
--num-mappers 1 \
--outdir java_files

IMPORT DATA USING `split-by`, `append`,


`fields-terminated-by`, `lines-terminated-by`
-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/sqoop" \
--username=root \
--password=hadoop \
--table student \
--target-dir /sqoop/boundary-data/sqoop.db/student \
--append \
--fields-terminated-by ',' \
--lines-terminated-by '\n' \
--split-by id \
--outdir java_files

IMPORT DATA USING `query`, `split-by`,


-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username=root \
--password=hadoop \
--query="select * from orders join order_items on orders.order_id =
order_items.order_item_order_id where \$CONDITIONS" \
--target-dir /sqoop/join-data/deepak.db/order_join \
--split-by order_id \
--num-mappers 2

IMPORT DATA USING `where`, `num-mappers`, `delete-target-dir`,


`fields-terminated-by`, `lines-terminated-by`
-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/sqoop" \
--username=root \
--password=hadoop \
--table student \
--target-dir /sqoop/data/sqoop.db/student \
--delete-target-dir \
--fields-terminated-by '|' \
--lines-terminated-by '\n' \
--num-mappers 1 \
--where "id <= 10" \
--outdir java_files

IMPORT DATA USING `incremental`, `last-value`, `query`,


`append`, `fields-terminated-by`, `lines-terminated-by`
-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/sqoop" \
--username=root \
--password=hadoop \
--target-dir /sqoop/data/sqoop.db/student \
--query="select * from student where id <= 20 and \$CONDITIONS" \
--append \
--num-mappers 1 \
--fields-terminated-by '|' \
--lines-terminated-by '\n' \
--check-column "id" \
--incremental append \
--last-value 10 \
--outdir java_files

`SQOOP JOB` EXAMPES


-----------------------------------------------------
sqoop job --list

sqoop job --show myjob1

sqoop job --exec myjob1

sqoop job --delete myjob1

HOW TO CREATE `SQOOP JOB`


-----------------------------------------------------
sqoop job --create myjob1 \
-- list-databases \
--connect "jdbc:mysql://localhost:3306" \
--username root \
--password hadoop

sqoop job --create myjob2 \


-- list-tables \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username root \
--password hadoop

-----------------------------------------------------
Connect to mysql and create database for reporting database
-----------------------------------------------------
mysql -u root -p

// create new table with schema & data


create table deepak.departments1 as select * from deepak.departments;

// create new table with schema


create table deepak.departments2 like deepak.departments;

exit;

-----------------------------------------------------
EXPORT DATA FROM `HDFS` TO `RDBMS TABLE`
-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username=root \
--password=hadoop \
--table departments1 \
--target-dir=/sqoop/deepak.db/departments \
--delete-target-dir \
--fields-terminated-by ',' \
--lines-terminated-by '\n' \
--as-textfile \
--num-mappers 1

sqoop export \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username root \
--password hadoop \
--table departments2 \
--export-dir /sqoop/deepak.db/departments \
--input-fields-terminated-by ',' \
--input-lines-terminated-by '\n' \
--num-mappers 1 \
--batch \
--outdir java_files

-----------------------------------------------------
HOW TO WRITE `eval` & `merge` QUERIES USING SQOOP
-----------------------------------------------------

1. UPDATE DATA IN `RDBMS TABLE`


-----------------------------------------------------
sqoop eval --connect "jdbc:mysql://localhost:3306/deepak" \
--username root \
--password hadoop \
--query "update departments1 set department_name='updated' where department_id =
7"

2. INSERT DATA IN `RDBMS TABLE`


-----------------------------------------------------
sqoop eval --connect "jdbc:mysql://localhost:3306/deepak" \
--username root \
--password hadoop \
--query "insert into departments1 values (8, 'inserted')"

3. READ DATA FROM `RDBMS TABLE`


-----------------------------------------------------
sqoop eval --c onnect "jdbc:mysql://localhost:3306/deepak" \
--username root \
--password hadoop \
--query "select * from departments1"

4. IMPORT DATA FROM `RDBMS TABLE` TO `HDFS`


-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username=root \
--password=hadoop \
--table departments1 \
--as-textfile \
--num-mappers 1 \
--target-dir=/sqoop/deepak.db/departments_delta \
--where "department_id >= 7" \
--bindir java_files \
--outdir java_files

5. IMPORT DATA FROM `RDBMS TABLE` TO `HDFS`


-----------------------------------------------------
sqoop merge --merge-key department_id \
--new-data /sqoop/deepak.db/departments_delta \
--onto /sqoop/deepak.db/departments \
--target-dir /sqoop/deepak.db/departments_stage \
--class-name departments1 \
--jar-file <get_it_from_last_import>

sqoop merge --merge-key department_id \


--new-data /sqoop/deepak.db/departments_delta \
--onto /sqoop/deepak.db/departments \
--target-dir /sqoop/deepak.db/departments_stage \
--class-name departments1 \
--jar-file java_files/departments1.jar

6. DELETE DATA FROM `HDFS`


-----------------------------------------------------
hadoop fs -rm -R /sqoop/deepak.db/departments_delta

7. EXPORT DATA FROM `HDFS` TO `RDBMS TABLE` with 'update-mode'


-----------------------------------------------------
sqoop export \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username root \
--password hadoop \
--table departments2 \
--export-dir /sqoop/deepak.db/departments_stage \
--update-key department_id \
--update-mode updateonly \
--outdir java_files \
--batch \
--num-mappers 1

sqoop export \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username root \
--password hadoop \
--table departments2 \
--export-dir /sqoop/deepak.db/departments_stage \
--update-key department_id \
--update-mode allowinsert \
--outdir java_files \
--batch \
--num-mappers 1

Integrate SQOOP with HIVE


------------------------------------------------
copy 'hive-site.xml' file from '$HIVE_HOME/conf' folder to '$SQOOP_HOME/conf'
folder

IMPORT ALL TABLES FROM `RDBMS` TO `HIVE` WITH COMPRESSION


----------------------------------------------------------------------------
sqoop import-all-tables \
--connect "jdbc:mysql://localhost:3306/sqoop" \
--username=root \
--password=hadoop \
--hive-import \
--hive-overwrite \
--create-hive-table \
--warehouse-dir=/sqoop/hive_data/sqoop.db \
--compress \
--compression-codec org.apache.hadoop.io.compress.BZip2Codec \
--num-mappers 1 \
--outdir java_files

VERIFY ALL TABLES IN HIVE 'default' DATABASE


-----------------------------------------------------
USE default;

SHOW TABLES;

SELECT * FROM pet;

IMPORT DATA FROM `RDBMS` TO `HIVE`


-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username=root \
--password=hadoop \
--table orders \
--fields-terminated-by '|' \
--lines-terminated-by '\n' \
--hive-import \
--hive-overwrite \
--hive-table myorders1 \
--outdir java_files
sqoop import \
--connect "jdbc:mysql://localhost:3306/deepak" \
--username=root \
--password=hadoop \
--table orders \
--fields-terminated-by '|' \
--lines-terminated-by '\n' \
--hive-import \
--create-hive-table \
--hive-table myorders2 \
--outdir java_files

IMPORT DATA FROM `RDBMS` TO `HBASE`


-----------------------------------------------------
sqoop import \
--connect "jdbc:mysql://localhost:3306/sqoop" \
--username=root \
--password=hadoop \
--table student \
--hbase-create-table \
--hbase-table student \
--column-family cf \
--hbase-row-key id \
--outdir java_files \
-m 1

(Note: hadoop-2.6.0, hbase-1.1.2, sqoop-1.4.6 are not compatible)

(Note: hadoop-2.6.0, hbase-0.98.4, sqoop-1.4.6 are compatible)

/*
sqoop export \
--connect "jdbc:mysql://localhost:3306/file" \
--username root \
--password hortonworks1 \
--table Employee\
--export-dir /file/em \
--input-fields-terminated-by ',' \
--input-lines-terminated-by '\n' \
--num-mappers 1
*/

You might also like