pytorch · AnthonyBarbier · Jun 24, 2025 · Jun 24, 2025 · Jun 25, 2025 · Jun 25, 2025
diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py
@@ -4280,10 +4280,11 @@ def _run(
         test_name: str,
         file_name: str,
         parent_pipe,
+        seed: int,
         **kwargs,
     ) -> None:
         cls.parent = parent_conn
-        super()._run(rank, test_name, file_name, parent_pipe)
+        super()._run(rank, test_name, file_name, parent_pipe, seed)
 
     @property
     def local_device(self):

diff --git a/test/jit/test_autodiff_subgraph_slicing.py b/test/jit/test_autodiff_subgraph_slicing.py
@@ -27,6 +27,9 @@
 )
 
 
+assert GRAPH_EXECUTOR is not None
+
+
 @unittest.skipIf(
     GRAPH_EXECUTOR == ProfilingMode.SIMPLE, "Simple Executor doesn't support gradients"
 )

diff --git a/test/test_cpp_api_parity.py b/test/test_cpp_api_parity.py
@@ -35,6 +35,11 @@ class TestCppApiParity(common.TestCase):
     functional_test_params_map = {}
 
 
+if __name__ == "__main__":
+    # The value of the SEED depends on command line arguments so make sure they're parsed
+    # before instantiating tests because some modules as part of get_new_module_tests() will call torch.randn
+    common.parse_cmd_line_args()
+
 expected_test_params_dicts = []
 
 for test_params_dicts, test_instance_class in [

diff --git a/test/test_expanded_weights.py b/test/test_expanded_weights.py
@@ -1008,6 +1008,13 @@ def filter_supported_tests(t):
         return True
 
 
+if __name__ == "__main__":
+    from torch.testing._internal.common_utils import parse_cmd_line_args
+
+    # The value of the SEED depends on command line arguments so make sure they're parsed
+    # before instantiating tests because some modules as part of get_new_module_tests() will call torch.randn
+    parse_cmd_line_args()
+
 # TODO: Once all of these use ModuleInfo, replace with ModuleInfo tests
 # These currently use the legacy nn tests
 supported_tests = [

diff --git a/test/test_jit.py b/test/test_jit.py
@@ -3,6 +3,13 @@
 
 import torch
 
+if __name__ == '__main__':
+    from torch.testing._internal.common_utils import parse_cmd_line_args
+
+    # The value of GRAPH_EXECUTOR and SEED depend on command line arguments so make sure they're parsed
+    # before instantiating tests.
+    parse_cmd_line_args()
+
 # This is how we include tests located in test/jit/...
 # They are included here so that they are invoked when you call `test_jit.py`,
 # do not run these test files directly.
@@ -97,7 +104,7 @@
 from torch.testing._internal import jit_utils
 from torch.testing._internal.common_jit import check_against_reference
 from torch.testing._internal.common_utils import run_tests, IS_WINDOWS, \
-    suppress_warnings, IS_SANDCASTLE, GRAPH_EXECUTOR, ProfilingMode, \
+    GRAPH_EXECUTOR, suppress_warnings, IS_SANDCASTLE, ProfilingMode, \
     TestCase, freeze_rng_state, slowTest, TemporaryFileName, \
     enable_profiling_mode_for_profiling_tests, TEST_MKL, set_default_dtype, num_profiled_runs, \
     skipIfCrossRef, skipIfTorchDynamo
@@ -158,6 +165,7 @@ def doAutodiffCheck(testname):
     if "test_t_" in testname or testname == "test_t":
         return False
 
+    assert GRAPH_EXECUTOR
     if GRAPH_EXECUTOR == ProfilingMode.SIMPLE:
         return False
 
@@ -201,6 +209,7 @@ def doAutodiffCheck(testname):
     return testname not in test_exceptions
 
 
+assert GRAPH_EXECUTOR
 # TODO: enable TE in PE when all tests are fixed
 torch._C._jit_set_texpr_fuser_enabled(GRAPH_EXECUTOR == ProfilingMode.PROFILING)
 torch._C._jit_set_profiling_executor(GRAPH_EXECUTOR != ProfilingMode.LEGACY)

diff --git a/test/test_jit_autocast.py b/test/test_jit_autocast.py
@@ -5,12 +5,17 @@
 from typing import Optional
 
 import unittest
-from test_jit import JitTestCase
 from torch.testing._internal.common_cuda import TEST_CUDA
-from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo
+from torch.testing._internal.common_utils import parse_cmd_line_args, run_tests, skipIfTorchDynamo
 from torch.testing import FileCheck
 from jit.test_models import MnistNet
 
+if __name__ == '__main__':
+    # The value of GRAPH_EXECUTOR depends on command line arguments so make sure they're parsed
+    # before instantiating tests.
+    parse_cmd_line_args()
+
+from test_jit import JitTestCase
 TEST_BFLOAT16 = TEST_CUDA and torch.cuda.is_bf16_supported()
 
 @skipIfTorchDynamo("Not a TorchDynamo suitable test")

diff --git a/test/test_jit_fuser.py b/test/test_jit_fuser.py
@@ -9,6 +9,13 @@
 from torch.testing import FileCheck
 from unittest import skipIf
 
+if __name__ == "__main__":
+    from torch.testing._internal.common_utils import parse_cmd_line_args
+
+    # The value of GRAPH_EXECUTOR depends on command line arguments so make sure they're parsed
+    # before instantiating tests.
+    parse_cmd_line_args()
+
 from torch.testing._internal.common_utils import run_tests, IS_SANDCASTLE, ProfilingMode, GRAPH_EXECUTOR, \
     enable_profiling_mode_for_profiling_tests, IS_WINDOWS, TemporaryDirectoryName, shell
 from torch.testing._internal.jit_utils import JitTestCase, enable_cpu_fuser, _inline_everything, \

diff --git a/test/test_jit_fuser_legacy.py b/test/test_jit_fuser_legacy.py
@@ -2,6 +2,14 @@
 
 import sys
 sys.argv.append("--jit-executor=legacy")
+
+if __name__ == "__main__":
+    from torch.testing._internal.common_utils import parse_cmd_line_args
+
+    # The value of GRAPH_EXECUTOR depends on command line arguments so make sure they're parsed
+    # before instantiating tests.
+    parse_cmd_line_args()
+
 from test_jit_fuser import *  # noqa: F403
 
 if __name__ == '__main__':

diff --git a/test/test_jit_fuser_te.py b/test/test_jit_fuser_te.py
@@ -22,6 +22,13 @@
 torch._C._jit_set_profiling_executor(True)
 torch._C._get_graph_executor_optimize(True)
 
+if __name__ == "__main__":
+    from torch.testing._internal.common_utils import parse_cmd_line_args
+
+    # The value of GRAPH_EXECUTOR depends on command line arguments so make sure they're parsed
+    # before instantiating tests.
+    parse_cmd_line_args()
+
 from itertools import combinations, permutations, product
 from textwrap import dedent
 

diff --git a/test/test_jit_legacy.py b/test/test_jit_legacy.py
@@ -2,7 +2,14 @@
 
 import sys
 sys.argv.append("--jit-executor=legacy")
-from test_jit import *  # noqa: F403
+from torch.testing._internal.common_utils import parse_cmd_line_args, run_tests
+
+if __name__ == '__main__':
+    # The value of GRAPH_EXECUTOR depends on command line arguments so make sure they're parsed
+    # before instantiating tests.
+    parse_cmd_line_args()
+
+from test_jit import *  # noqa: F403, F401
 
 if __name__ == '__main__':
     run_tests()
diff --git a/test/test_nn.py b/test/test_nn.py
@@ -7643,6 +7643,13 @@ def with_tf32_on(self, test=test, kwargs=kwargs):
         else:
             add(cuda_test_name, with_tf32_off)
 
+if __name__ == '__main__':
+    from torch.testing._internal.common_utils import parse_cmd_line_args
+
+    # The value of the SEED depends on command line arguments so make sure they're parsed
+    # before instantiating tests because some modules as part of get_new_module_tests() will call torch.randn
+    parse_cmd_line_args()
+
 for test_params in module_tests + get_new_module_tests():
     # TODO: CUDA is not implemented yet
     if 'constructor' not in test_params:

diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py
@@ -32,6 +32,7 @@
 from torch._C._autograd import DeviceType
 from torch._C._distributed_c10d import _SymmetricMemory
 from torch._logging._internal import trace_log
+from torch.testing._internal import common_utils
 from torch.testing._internal.common_utils import (
     FILE_SCHEMA,
     find_free_port,
@@ -671,6 +672,7 @@ def __init__(
         if methodName != "runTest":
             method_name = methodName
         super().__init__(method_name)
+        self.seed = None
         try:
             fn = getattr(self, method_name)
             setattr(self, method_name, self.join_or_run(fn))
@@ -715,13 +717,26 @@ def _current_test_name(self) -> str:
 
     def _start_processes(self, proc) -> None:
         self.processes = []
+        # distributed tests don't support setting the seed via the command line so hardcode it here.
+        hardcoded_seed = 1234
+        if common_utils.SEED and common_utils.SEED != hardcoded_seed:
+            msg = ("Distributed tests do not support setting the seed via the command line. "
+                   f"the seed will be reset to its default value ({hardcoded_seed} now")
+            logger.warning(msg)
+        common_utils.SEED = hardcoded_seed
         for rank in range(int(self.world_size)):
             parent_conn, child_conn = torch.multiprocessing.Pipe()
             process = proc(
                 target=self.__class__._run,
                 name="process " + str(rank),
-                args=(rank, self._current_test_name(), self.file_name, child_conn),
+                args=(
+                    rank,
+                    self._current_test_name(),
+                    self.file_name,
+                    child_conn,
+                ),
                 kwargs={
+                    "seed": common_utils.SEED,
                     "fake_pg": getattr(self, "fake_pg", False),
                 },
             )
@@ -775,11 +790,12 @@ def _event_listener(parent_pipe, signal_pipe, rank: int):
 
     @classmethod
     def _run(
-        cls, rank: int, test_name: str, file_name: str, parent_pipe, **kwargs
+        cls, rank: int, test_name: str, file_name: str, parent_pipe, seed: int, **kwargs
     ) -> None:
         self = cls(test_name)
         self.rank = rank
         self.file_name = file_name
+        self.seed = seed
         self.run_test(test_name, parent_pipe)
 
     def run_test(self, test_name: str, parent_pipe) -> None:
@@ -798,6 +814,9 @@ def run_test(self, test_name: str, parent_pipe) -> None:
         # Show full C++ stacktraces when a Python error originating from C++ is raised.
         os.environ["TORCH_SHOW_CPP_STACKTRACES"] = "1"
 
+        if self.seed is not None:
+            common_utils.set_rng_seed(self.seed)
+
         # self.id() == e.g. '__main__.TestDistributed.test_get_rank'
         # We're retrieving a corresponding test and executing it.
         try:
@@ -1535,14 +1554,15 @@ def world_size(self) -> int:
 
     @classmethod
     def _run(
-        cls, rank: int, test_name: str, file_name: str, parent_pipe, **kwargs
+        cls, rank: int, test_name: str, file_name: str, parent_pipe, seed: int, **kwargs
     ) -> None:
         trace_log.addHandler(logging.NullHandler())
 
         # The rest is copypasta from MultiProcessTestCase._run
         self = cls(test_name)
         self.rank = rank
         self.file_name = file_name
+        self.seed = seed
         self.run_test(test_name, parent_pipe)
 
 

diff --git a/torch/testing/_internal/common_fsdp.py b/torch/testing/_internal/common_fsdp.py
@@ -57,6 +57,7 @@
 from torch.testing._internal.common_utils import (
     FILE_SCHEMA,
     get_cycles_per_ms,
+    set_rng_seed,
     TEST_CUDA,
     TEST_HPU,
     TEST_XPU,
@@ -1180,7 +1181,7 @@ def run_subtests(self, *args, **kwargs):
         return run_subtests(self, *args, **kwargs)
 
     @classmethod
-    def _run(cls, rank, test_name, file_name, pipe, **kwargs):  # type: ignore[override]
+    def _run(cls, rank, test_name, file_name, pipe, seed, **kwargs):  # type: ignore[override]
         self = cls(test_name)
         self.rank = rank
         self.file_name = file_name
@@ -1226,6 +1227,7 @@ def _run(cls, rank, test_name, file_name, pipe, **kwargs):  # type: ignore[overr
         dist.barrier(device_ids=device_ids)
 
         torch._dynamo.reset()
+        set_rng_seed(seed)
         self.run_test(test_name, pipe)
         torch._dynamo.reset()
 

diff --git a/torch/testing/_internal/common_nn.py b/torch/testing/_internal/common_nn.py
@@ -15,6 +15,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn import _reduction as _Reduction
+from torch.testing._internal import common_utils
 from torch.testing._internal.common_utils import TestCase, to_gpu, freeze_rng_state, is_iterable, \
     gradcheck, gradgradcheck, set_default_dtype, skipIfTorchDynamo, TEST_WITH_ROCM
 from torch.testing._internal.common_cuda import TEST_CUDA, SM90OrLater
@@ -1078,6 +1079,7 @@ def unsqueeze_inp(inp):
 
 
 def get_new_module_tests():
+    assert common_utils.SEED is not None, "Make sure the seed is set before calling get_new_module_tests()"
     new_module_tests = [
         poissonnllloss_no_reduce_test(),
         bceloss_no_reduce_test(),
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,6 +27,9 @@ @@
     )
+    assert GRAPH_EXECUTOR is not None
     @unittest.skipIf(
         GRAPH_EXECUTOR == ProfilingMode.SIMPLE, "Simple Executor doesn't support gradients"
     )
@@ Expand Down @@