1
+ name : Distribute CI (V100)
2
+
3
+ on :
4
+ pull_request :
5
+ types : [opened, synchronize, reopened]
6
+ branches : [develop]
7
+ schedule :
8
+ - cron : " 2 0 * * *"
9
+ workflow_call :
10
+ inputs :
11
+ run_downstream :
12
+ required : true
13
+ type : string
14
+ image_name :
15
+ required : true
16
+ type : string
17
+
18
+
19
+ concurrency :
20
+ group : ${{ github.workflow }}-${{ github.event.pull_request.number }}
21
+ cancel-in-progress : true
22
+
23
+ env :
24
+ PR_ID : ${{ github.event.pull_request.number }}
25
+ COMMIT_ID : ${{ github.event.pull_request.head.sha }}
26
+ TASK : paddlenlp-CI-${{ github.event.pull_request.number }}-Distribut-V100
27
+ ci_scripts : /workspace/PaddleNLP/scripts/distribute
28
+ BRANCH : ${{ github.event.pull_request.base.ref }}
29
+ AGILE_COMPILE_BRANCH : ${{ github.event.pull_request.base.ref }}
30
+ CI_name : distribute-ci
31
+ no_proxy : " localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
32
+ GITHUB_EVENT_NAME : ${{ github.event_name }}
33
+ RUN_DOWNSTREAM : ${{ inputs.run_downstream }}
34
+
35
+ defaults :
36
+ run :
37
+ shell : bash
38
+
39
+ jobs :
40
+ distribute-v100-ci :
41
+ name : distribute-v100-ci
42
+ runs-on :
43
+ group : Auto-Parallel
44
+ steps :
45
+ - name : Determine Image Name
46
+ env :
47
+ IMAGE_NAME : ${{ inputs.image_name }}
48
+ run : |
49
+ if [[ -n "${IMAGE_NAME}" ]]; then
50
+ echo "IMAGE_NAME=${IMAGE_NAME}" >> "$GITHUB_ENV"
51
+ else
52
+ echo "IMAGE_NAME=registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82" >> "$GITHUB_ENV"
53
+ fi
54
+
55
+ - name : Run Container
56
+ env :
57
+ work_dir : ${{ github.workspace }}
58
+ CACHE_DIR : /home/data/cfs/.cache
59
+ FLAGS_dynamic_static_unified_comm : " True"
60
+ python_version : " 3.10"
61
+ paddle_whl : https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-GpuSome-LinuxCentos-Gcc82-Cuda118-Cudnn86-Trt85-Py310-CINN-Compile/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
62
+ run : |
63
+ container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
64
+ echo "container_name=${container_name}" >> "$GITHUB_ENV"
65
+ if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
66
+ echo "Not in a pull_request or test_build event. Skipping..."
67
+ else
68
+ nvidia-docker run -d -t --name ${container_name} --net=host -v /dev/shm:/dev/shm --shm-size=32G \
69
+ -v $work_dir/../../..:$work_dir/../../.. \
70
+ -v $work_dir:/workspace \
71
+ -v /home/.cache/pip:/home/.cache/pip \
72
+ -v /home/FleetX_CI:/fleetx_data \
73
+ -v /home/Llm_gpt_CI:/llm_gpt_data \
74
+ -v /home/Llama_CI:/llama_data \
75
+ -e BRANCH \
76
+ -e AGILE_COMPILE_BRANCH \
77
+ -e PR_ID \
78
+ -e COMMIT_ID \
79
+ -e work_dir \
80
+ -e ci_scripts \
81
+ -e no_proxy \
82
+ -e CI_name \
83
+ -e paddle_whl \
84
+ -e FLAGS_dynamic_static_unified_comm \
85
+ -e python_version \
86
+ -w /workspace --runtime=nvidia ${{ env.IMAGE_NAME }}
87
+ fi
88
+
89
+ - name : Download Code
90
+ run : |
91
+ if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
92
+ echo "Not in a pull_request or test_build event. Skipping.."
93
+ else
94
+ docker exec -t $container_name /bin/bash -c '
95
+ rm -rf * .[^.]*
96
+ echo "Downloading PaddleNLP.tar.gz"
97
+ wget -q --no-proxy https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddleNLP.tar --no-check-certificate
98
+ echo "Extracting PaddleNLP.tar.gz"
99
+ tar xf PaddleNLP.tar && rm -rf PaddleNLP.tar
100
+ source $work_dir/../../../proxy
101
+ cd PaddleNLP
102
+ git config --global user.name "PaddleCI"
103
+ git config --global user.email "paddle_ci@example.com"
104
+ git pull
105
+ git submodule update --init --recursive --force
106
+ if [ -n "${PR_ID}" ]; then
107
+ git fetch origin pull/${PR_ID}/head
108
+ git checkout -b PR_${PR_ID} FETCH_HEAD
109
+ git remote add upstream https://github.com/PaddlePaddle/PaddleFormers.git
110
+ git fetch upstream ${BRANCH}
111
+ git merge ${BRANCH} --no-edit
112
+ git diff --numstat ${BRANCH} -- | awk "{print \$NF}"
113
+ else
114
+ echo "Not in a pull_request event. Skipping PR-specific operations."
115
+ fi
116
+ git log --pretty=oneline -10
117
+ '
118
+ fi
119
+
120
+ - name : Test
121
+ run : |
122
+ if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
123
+ echo "Not in a pull_request or test_build event. Skipping..."
124
+ else
125
+ docker exec -t $container_name /bin/bash -c '
126
+ ldconfig
127
+ ln -sf $(which python${python_version}) /usr/bin/python
128
+ pip config set global.cache-dir "/home/.cache/pip"
129
+ source $work_dir/../../../proxy
130
+ set -e
131
+ cd /workspace/PaddleNLP && git config --global --add safe.directory $PWD
132
+ timeout 80m bash scripts/distribute/run_ci.sh ${paddle_whl}
133
+ '
134
+ fi
135
+
136
+ - name : Upload Logs
137
+ if : always()
138
+ env :
139
+ home_path : ${{ github.workspace }}/../../..
140
+ bos_file : ${{ github.workspace }}/../../../bos/BosClient.py
141
+ run : |
142
+ if [[ "$RUN_DOWNSTREAM" == "false" ]]; then
143
+ echo "Not in a pull_request or test_build event. Skipping..."
144
+ else
145
+ docker exec -t $container_name /bin/bash -c '
146
+ if [ ! -f "${{ env.bos_file }}" ]; then
147
+ wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate
148
+ mkdir ${{ env.home_path }}/bos
149
+ tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos
150
+ fi
151
+
152
+ if [[ "${{ env.RUN_DOWNSTREAM }}" == "" && -n "${PR_ID}" ]]; then
153
+ bos_prefix="${PR_ID}/${COMMIT_ID}"
154
+ elif [[ "${{ env.RUN_DOWNSTREAM }}" == "true" && -n "${PR_ID}" ]]; then
155
+ bos_prefix="${PR_ID}/${COMMIT_ID}/test_build"
156
+ else
157
+ bos_prefix="schedule/$(date +%Y%m%d)"
158
+ fi
159
+
160
+ cd /workspace/case_logs
161
+ for FILE in /workspace/case_logs/*; do
162
+ file=$(basename "$FILE")
163
+ python ${{ env.bos_file }} $file paddle-github-action/PR/PaddleNLP/distribute/${bos_prefix}/logs
164
+ echo "$file: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/distribute/${bos_prefix}/logs/$file"
165
+ done
166
+ tar -czf products.tar.gz ./
167
+ python ${{ env.bos_file }} products.tar.gz paddle-github-action/PR/PaddleNLP/distribute/${bos_prefix}/logs
168
+ echo "products: https://paddle-github-action.bj.bcebos.com/PR/PaddleNLP/distribute/${bos_prefix}/logs/products.tar.gz"
169
+ '
170
+ fi
171
+
172
+ - name : Terminate And Delete the Container
173
+ if : always()
174
+ run : |
175
+ docker rm -f $container_name 2>/dev/null || true
0 commit comments