Skip to content

Commit b9e1d04

Browse files
author
xudong.yang
committed
add xlearning train script
1 parent 3ddf50c commit b9e1d04

File tree

6 files changed

+201
-0
lines changed

6 files changed

+201
-0
lines changed

DIN/xlearning.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/sh
2+
set -e -x
3+
export HADOOP_HOME=/data2/hadoop-2.6.0-cdh5.4.0
4+
5+
hadoop fs -rm -r -f hdfs://bigdata/tmp/bbdin/${bizdate}
6+
hadoop fs -rm -r -f hdfs://bigdata/tmp/bbdin-output/${bizdate}
7+
8+
train_data_file=`hadoop fs -ls /user/xudong.yang/mainstream/samples | tail -n 7 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
9+
test_data_file=`hadoop fs -ls /user/xudong.yang/mainstream/eval | tail -n 7 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
10+
11+
/opt/beibei/xlearning/bin/xl-submit \
12+
--app-type "tensorflow" \
13+
--input-strategy "Placeholder" \
14+
--app-name "mainstream_din_model" \
15+
--board-logdir hdfs://bigdata/tmp/bbdin/${bizdate} \
16+
--input ${train_data_file}#train_data\
17+
--input ${test_data_file}#eval_data\
18+
--files hdfs://bigdata/user/xudong.yang/din/train_bb.py,hdfs://bigdata/user/xudong.yang/din/bb_input_fn.py,hdfs://bigdata/user/xudong.yang/din/deep_interest_network.py \
19+
--launch-cmd "python train_bb.py --learning_rate=0.01 --attention_hidden_units=16 --shuffle_buffer_size=25600 --save_checkpoints_steps=10000 --train_steps=200000 --batch_size=256\
20+
--dropout_rate=0.5 --optimizer=Adagrad --train_data train_data --eval_data eval_data --model_dir hdfs://bigdata/tmp/bbdin/${bizdate} --output_model hdfs://bigdata/tmp/bbdin-output/${bizdate}" \
21+
--worker-memory 12G \
22+
--worker-num 10 \
23+
--worker-cores 8 \
24+
--ps-memory 10G \
25+
--ps-num 1 \
26+
--ps-cores 8 \
27+
--queue default
28+
29+
30+
#hadoop fs -mkdir /user/xudong.yang/mainstream/bbdin_output/
31+
model_fold=`hadoop fs -ls hdfs://bigdata/tmp/bbdin-output/${bizdate} | tail -n 1 | awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
32+
hadoop fs -rm -r -f /user/xudong.yang/mainstream/bbdin_output/*
33+
hadoop fs -cp ${model_fold}/* /user/xudong.yang/mainstream/bbdin_output

DUPN/xlearning.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/bin/sh
2+
set -e -x
3+
export HADOOP_HOME=/data2/hadoop-2.6.0-cdh5.4.0
4+
5+
hadoop fs -rm -r -f hdfs://bigdata/tmp/dupn-share/${bizdate}
6+
hadoop fs -rm -r -f hdfs://bigdata/tmp/dupn-share-output/${bizdate}
7+
8+
train_data_file=`hadoop fs -ls hdfs://bigdata/user/xudong.yang/bd_mainstream/new_samples/ | tail -n 7 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
9+
test_data_file=`hadoop fs -ls hdfs://bigdata/user/xudong.yang/bd_mainstream/new_eval/ | tail -n 7 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
10+
11+
/opt/beibei/xlearning/bin/xl-submit \
12+
--app-type "tensorflow" \
13+
--input-strategy "Placeholder" \
14+
--app-name "bd_mainstream_share_dupn_model" \
15+
--board-logdir hdfs://bigdata/tmp/dupn-share/${bizdate} \
16+
--input ${train_data_file}#train_data\
17+
--input ${test_data_file}#eval_data\
18+
--files hdfs://bigdata/user/xudong.yang/mainstream/model/dupn.py \
19+
--launch-cmd "python dupn.py --hidden_units=512,256 --learning_rate=0.005 --shuffle_buffer_size=10000 --save_checkpoints_steps=10000 --train_steps=100000 --batch_size=256\
20+
--train_data train_data --eval_data eval_data --model_dir hdfs://bigdata/tmp/dupn-share/${bizdate} --output_model hdfs://bigdata/tmp/dupn-share-output/${bizdate}" \
21+
--worker-memory 15G \
22+
--worker-num 5 \
23+
--worker-cores 8 \
24+
--ps-memory 5G \
25+
--ps-num 1 \
26+
--ps-cores 5 \
27+
--queue default
28+
29+
set +e
30+
hadoop fs -test -e /user/xudong.yang/mainstream/dupn_share_output/
31+
[ $? -ne 0 ] && hadoop fs -mkdir /user/xudong.yang/mainstream/dupn_share_output/
32+
model_fold=`hadoop fs -ls hdfs://bigdata/tmp/dupn-share-output/${bizdate} | awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
33+
hadoop fs -rm -r -f /user/xudong.yang/mainstream/dupn_share_output/*
34+
hadoop fs -cp ${model_fold}/* /user/xudong.yang/mainstream/dupn_share_output

base-model/xlearning.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/sh
2+
3+
set -e -x
4+
export GRPC_VERBOSITY="DEBUG"
5+
export HADOOP_HOME=/data2/hadoop-2.6.0-cdh5.4.0
6+
7+
hadoop fs -rm -r -f hdfs://bigdata/tmp/base-model/${bizdate}
8+
hadoop fs -rm -r -f hdfs://bigdata/tmp/base-model-output/${bizdate}
9+
#hadoop fs -rm -f hdfs://bigdata/user/xudong.yang/mainstream/samples/pt=${bizdate}/_SUCCESS
10+
train_data_file=`hadoop fs -ls /user/xudong.yang/mainstream/samples | tail -n 7 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
11+
test_data_file=`hadoop fs -ls /user/xudong.yang/mainstream/eval | tail -n 7 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
12+
13+
/opt/beibei/xlearning/bin/xl-submit \
14+
--app-type "tensorflow" \
15+
--input-strategy "Placeholder" \
16+
--app-name "mainstream_dnn_base_model" \
17+
--board-logdir hdfs://bigdata/tmp/base-model/${bizdate} \
18+
--input ${train_data_file}#train_data\
19+
--input ${test_data_file}#eval_data\
20+
--files hdfs://bigdata/user/xudong.yang/mainstream/model/base-model-weighted.py \
21+
--launch-cmd "python base-model-weighted.py --save_checkpoints_steps=10000 --train_steps=200000 --batch_size=256 --train_data train_data --eval_data eval_data --model_dir hdfs://bigdata/tmp/base-model/${bizdate} --output_model hdfs://bigdata/tmp/base-model-output/${bizdate}" \
22+
--worker-memory 10G \
23+
--worker-num 6 \
24+
--worker-cores 8 \
25+
--ps-memory 3G \
26+
--ps-num 1 \
27+
--ps-cores 5 \
28+
--queue default
29+
30+
#hadoop fs -mkdir /user/xudong.yang/mainstream/model_output_v2/
31+
model_fold=`hadoop fs -ls hdfs://bigdata/tmp/base-model-output/${bizdate} | awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
32+
hadoop fs -rm -r -f /user/xudong.yang/mainstream/model_output_v2/*
33+
hadoop fs -cp ${model_fold}/* /user/xudong.yang/mainstream/model_output_v2

deepFM/xlearning.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/bin/sh
2+
set -e -x
3+
export HADOOP_HOME=/data2/hadoop-2.6.0-cdh5.4.0
4+
5+
hadoop fs -rm -r -f hdfs://bigdata/tmp/deepfm/${bizdate}
6+
hadoop fs -rm -r -f hdfs://bigdata/tmp/deepfm-output/${bizdate}
7+
8+
train_data_file=`hadoop fs -ls /user/xudong.yang/mainstream/samples | tail -n 7 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
9+
test_data_file=`hadoop fs -ls /user/xudong.yang/mainstream/eval | tail -n 7 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
10+
11+
/opt/beibei/xlearning/bin/xl-submit \
12+
--app-type "tensorflow" \
13+
--input-strategy "Placeholder" \
14+
--app-name "mainstream_deepfm_model" \
15+
--board-logdir hdfs://bigdata/tmp/deepfm/${bizdate} \
16+
--input ${train_data_file}#train_data\
17+
--input ${test_data_file}#eval_data\
18+
--files hdfs://bigdata/user/xudong.yang/deepfm/train_deepfm.py,hdfs://bigdata/user/xudong.yang/deepfm/deepfm_input_fn.py,hdfs://bigdata/user/xudong.yang/deepfm/deepfm.py \
19+
--launch-cmd "python train_deepfm.py --learning_rate=0.01 --shuffle_buffer_size=30000 --save_checkpoints_steps=10000 --train_steps=200000 --batch_size=256\
20+
--dropout_rate=0.5 --train_data train_data --eval_data eval_data --model_dir hdfs://bigdata/tmp/deepfm/${bizdate} --output_model hdfs://bigdata/tmp/deepfm-output/${bizdate}" \
21+
--worker-memory 12G \
22+
--worker-num 6 \
23+
--worker-cores 8 \
24+
--ps-memory 4G \
25+
--ps-num 1 \
26+
--ps-cores 5 \
27+
--queue default
28+
29+
set +e
30+
hadoop fs -test -e /user/xudong.yang/mainstream/deepfm_output/
31+
[ $? -ne 0 ] && hadoop fs -mkdir /user/xudong.yang/mainstream/deepfm_output/
32+
model_fold=`hadoop fs -ls hdfs://bigdata/tmp/deepfm-output/${bizdate} | tail -n 1 | awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
33+
hadoop fs -rm -r -f /user/xudong.yang/mainstream/deepfm_output/*
34+
hadoop fs -cp ${model_fold}/* /user/xudong.yang/mainstream/deepfm_output

esmm/xlearning.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/sh
2+
set -e -x
3+
export HADOOP_HOME=/data2/hadoop-2.6.0-cdh5.4.0
4+
5+
hadoop fs -rm -r -f hdfs://bigdata/tmp/esmm-model/${bizdate}
6+
hadoop fs -rm -r -f hdfs://bigdata/tmp/esmm-model-output/${bizdate}
7+
hadoop fs -rm -f hdfs://bigdata/user/xudong.yang/bd_mainstream/samples/pt=${bizdate}/_SUCCESS
8+
9+
train_data_file=`hadoop fs -ls /user/xudong.yang/bd_mainstream/samples | tail -n 5 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
10+
test_data_file=`hadoop fs -ls /user/xudong.yang/bd_mainstream/eval | tail -n 5 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
11+
12+
/opt/beibei/xlearning/bin/xl-submit \
13+
--app-type "tensorflow" \
14+
--input-strategy "Placeholder" \
15+
--app-name "bd_mainstream_esmm_model" \
16+
--board-logdir hdfs://bigdata/tmp/esmm-model/${bizdate} \
17+
--input ${train_data_file}#train_data\
18+
--input ${test_data_file}#eval_data\
19+
--files hdfs://bigdata/user/xudong.yang/mainstream/model/esmm.py \
20+
--launch-cmd "python esmm.py --hidden_units=512,256 --learning_rate=0.005 --shuffle_buffer_size=300000 --save_checkpoints_steps=10000 --train_steps=100000 --batch_size=512\
21+
--train_data train_data --eval_data eval_data --model_dir hdfs://bigdata/tmp/esmm-model/${bizdate} --output_model hdfs://bigdata/tmp/esmm-model-output/${bizdate}" \
22+
--worker-memory 10G \
23+
--worker-num 6 \
24+
--worker-cores 8 \
25+
--ps-memory 3G \
26+
--ps-num 1 \
27+
--ps-cores 5 \
28+
--queue default
29+
30+
#hadoop fs -mkdir /user/xudong.yang/mainstream/esmm_model_output/
31+
model_fold=`hadoop fs -ls hdfs://bigdata/tmp/esmm-model-output/${bizdate} | awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
32+
hadoop fs -rm -r -f /user/xudong.yang/mainstream/esmm_model_output/*
33+
hadoop fs -cp ${model_fold}/* /user/xudong.yang/mainstream/esmm_model_output

esmm_ext/xlearning.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/bin/sh
2+
set -e -x
3+
export HADOOP_HOME=/data2/hadoop-2.6.0-cdh5.4.0
4+
5+
hadoop fs -rm -r -f hdfs://bigdata/tmp/esmm-dcn/${bizdate}
6+
hadoop fs -rm -r -f hdfs://bigdata/tmp/esmm-dcn-output/${bizdate}
7+
8+
train_data_file=`hadoop fs -ls /user/xudong.yang/bd_mainstream/samples | tail -n 5 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
9+
test_data_file=`hadoop fs -ls /user/xudong.yang/bd_mainstream/eval | tail -n 5 |awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
10+
11+
/opt/beibei/xlearning/bin/xl-submit \
12+
--app-type "tensorflow" \
13+
--input-strategy "Placeholder" \
14+
--app-name "bd_mainstream_esmm_dcn" \
15+
--board-logdir hdfs://bigdata/tmp/esmm-dcn/${bizdate} \
16+
--input ${train_data_file}#train_data\
17+
--input ${test_data_file}#eval_data\
18+
--files hdfs://bigdata/user/xudong.yang/esmm/esmm.py,hdfs://bigdata/user/xudong.yang/esmm/train.py,hdfs://bigdata/user/xudong.yang/esmm/dcn_input_fn.py,hdfs://bigdata/user/xudong.yang/esmm/dcn_logit_fn.py,hdfs://bigdata/user/xudong.yang/esmm/din_logit_fn.py \
19+
--launch-cmd "python train.py --hidden_units=256,256 --learning_rate=0.0005 --shuffle_buffer_size=12800 --save_checkpoints_steps=10000 --train_steps=10000 --batch_size=128\
20+
--use_batch_norm=false --train_data train_data --eval_data eval_data --model_dir hdfs://bigdata/tmp/esmm-dcn/${bizdate} --output_model hdfs://bigdata/tmp/esmm-dcn-output/${bizdate}" \
21+
--worker-memory 15G \
22+
--worker-num 8 \
23+
--worker-cores 8 \
24+
--ps-memory 5G \
25+
--ps-num 1 \
26+
--ps-cores 5 \
27+
--queue default
28+
29+
set +e
30+
hadoop fs -test -e /user/xudong.yang/mainstream/esmm_dcn_output/
31+
[ $? -ne 0 ] && hadoop fs -mkdir /user/xudong.yang/mainstream/esmm_dcn_output/
32+
model_fold=`hadoop fs -ls hdfs://bigdata/tmp/esmm-dcn-output/${bizdate} | awk -F" " '{print $8}' | xargs | sed -e 's/ /,/g'`
33+
hadoop fs -rm -r -f /user/xudong.yang/mainstream/esmm_dcn_output/*
34+
hadoop fs -cp ${model_fold}/* /user/xudong.yang/mainstream/esmm_dcn_output

0 commit comments

Comments
 (0)