Skip to content

Commit 544b7db

Browse files
authored
open gpt dp CI test (#10740)
* open gpt dp CI test * update a100 loss base * update base * update loss base * run gpt base * update base * restore
1 parent f2477c0 commit 544b7db

File tree

1 file changed

+15
-15
lines changed

1 file changed

+15
-15
lines changed

scripts/distribute/ci_case_auto.sh

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,11 @@ function llm_gpt_case_list_auto() {
160160
fun_list=(
161161
# The test name must have "llm_gpt_dygraph_auto_" as a prefix,
162162
# which will be used for tracking the execution status of the case.
163-
# llm_gpt_dygraph_auto_bs8_fp32_DP2
164-
# llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2
165-
# llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2-PP2
166-
# llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2
167-
# llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2_intermediate
163+
llm_gpt_dygraph_auto_bs8_fp32_DP2
164+
llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2
165+
llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2-PP2
166+
llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2
167+
llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2_intermediate
168168
llm_gpt_pir_auto_bs4_TP2
169169
llm_gpt_pir_auto_bs4_TP2_PP2
170170
llm_gpt_pir_auto_bs8_DP2_TP2_PP2
@@ -2293,11 +2293,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2() {
22932293
ips=-1
22942294
mem=-1
22952295
echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5"
2296-
loss_base=10.55848312 # output of dropout is different after supporting spmd
2296+
loss_base=10.55853653 # output of dropout is different after supporting spmd
22972297
ips_base=-1
22982298
mem_base=-1
22992299
if [ $IS_A100 -ne 0 ];then
2300-
loss_base=10.55920792 # after add dropout spmd
2300+
loss_base=10.56019211 # after add dropout spmd
23012301
fi
23022302
check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem}
23032303
echo "=========== $FUNCNAME run end ==========="
@@ -2365,11 +2365,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2() {
23652365
ips=-1
23662366
mem=-1
23672367
echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5"
2368-
loss_base=10.56786537 # output of dropout is different after supporting spmd
2368+
loss_base=10.5657959 # output of dropout is different after supporting spmd
23692369
ips_base=-1
23702370
mem_base=-1
23712371
if [ $IS_A100 -ne 0 ];then
2372-
loss_base=10.57873726 # after add dropout spmd
2372+
loss_base=10.5760107 # after add dropout spmd
23732373
fi
23742374
check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem}
23752375
echo "=========== $FUNCNAME run end ==========="
@@ -2438,11 +2438,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2-PP2() {
24382438
mem=-1
24392439
echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5"
24402440
# loss_base=10.59993172 # note: need to debug
2441-
loss_base=10.57312012 # output of dropout is different after supporting spmd
2441+
loss_base=10.57174778 # output of dropout is different after supporting spmd
24422442
ips_base=-1
24432443
mem_base=-1
24442444
if [ $IS_A100 -ne 0 ];then
2445-
loss_base=10.5769043 # after add dropout spmd
2445+
loss_base=10.57701015 # after add dropout spmd
24462446
fi
24472447
check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem}
24482448
echo "=========== $FUNCNAME run end ==========="
@@ -2511,11 +2511,11 @@ function llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2() {
25112511
mem=-1
25122512
echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5"
25132513
# loss_base=10.58456802 # note: need to debug
2514-
loss_base=10.57452488
2514+
loss_base=10.57304478
25152515
ips_base=-1
25162516
mem_base=-1
25172517
if [ $IS_A100 -ne 0 ];then
2518-
loss_base=10.57843781 # after add dropout spmd
2518+
loss_base=10.57861042 # after add dropout spmd
25192519
fi
25202520
check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem}
25212521
echo "=========== $FUNCNAME run end ==========="
@@ -2585,11 +2585,11 @@ function llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2_intermediate() {
25852585
mem=-1
25862586
echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5"
25872587
# loss_base=10.58456802 # note: need to debug
2588-
loss_base=10.566679
2588+
loss_base=10.56716251
25892589
ips_base=-1
25902590
mem_base=-1
25912591
if [ $IS_A100 -ne 0 ];then
2592-
loss_base=10.56109619 # after add dropout spmd
2592+
loss_base=10.56166935 # after add dropout spmd
25932593
fi
25942594
check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem}
25952595
echo "=========== $FUNCNAME run end ==========="

0 commit comments

Comments
 (0)