[Auto-Parallel] optimize the perfermance of GPT-3 (#10780)

liym27 · web-flow · commit 53545aa563c7 · 2025-07-01T16:08:19.000+08:00
diff --git a/llm/auto_parallel/gpt-3/run_pretrain_auto.py b/llm/auto_parallel/gpt-3/run_pretrain_auto.py
@@ -545,6 +545,7 @@ def main():
     config.use_recompute = training_args.recompute
     config.tensor_parallel_degree = training_args.tensor_parallel_degree
     config.tensor_parallel_rank = training_args.tensor_parallel_rank
+    config.to_static = training_args.to_static
 
     if training_args.strategy.pipeline.enable and config.virtual_pp_degree > 1:
         pipeline = training_args.strategy.pipeline
@@ -554,6 +555,14 @@ def main():
     config.hidden_dropout_prob = model_args.hidden_dropout_prob
     config.attention_probs_dropout_prob = model_args.attention_probs_dropout_prob
     print("Final pre-training config:", config)
+    if (
+        "replace_with_parallel_cross_entropy" in training_args.tensor_parallel_config
+        and config.tensor_parallel_degree > 1
+        and config.to_static is False
+    ):
+        from llm.utils.replace_ops import replace_cross_entropy
+
+        replace_cross_entropy()
 
     # Set the dtype for loading model
     dtype = "float32"
diff --git a/llm/utils/replace_ops.py b/llm/utils/replace_ops.py
@@ -106,7 +106,7 @@ def parallel_cross_entropy(
             "1. soft_label=False is set for parallel computation (current value: {}) \n"
             "2. Input tensor is properly sharded (current sharding status: {}) \n".format(
                 soft_label, 
-                input_placement,
+                input.placements,
             )
         )
 

Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,7 @@ def parallel_cross_entropy(`
`106`	`106`	`"1. soft_label=False is set for parallel computation (current value: {}) \n"`
`107`	`107`	`"2. Input tensor is properly sharded (current sharding status: {}) \n".format(`
`108`	`108`	`soft_label,`
`109`		`- input_placement,`
	`109`	`+ input.placements,`
`110`	`110`	`)`
`111`	`111`	`)`
`112`	`112`