pytorch
diff --git a/‎test/export/test_export.py
Lines changed: 57 additions & 0 deletions b/‎test/export/test_export.py
Lines changed: 57 additions & 0 deletions
diff --git a/‎test/export/test_serialize.py
Lines changed: 19 additions & 0 deletions b/‎test/export/test_serialize.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎test/functorch/test_aotdispatch.py
Lines changed: 26 additions & 7 deletions b/‎test/functorch/test_aotdispatch.py
Lines changed: 26 additions & 7 deletions
diff --git a/‎torch/_export/serde/export_schema.thrift
Lines changed: 7 additions & 1 deletion b/‎torch/_export/serde/export_schema.thrift
Lines changed: 7 additions & 1 deletion
diff --git a/‎torch/_export/serde/schema.py
Lines changed: 7 additions & 0 deletions b/‎torch/_export/serde/schema.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎torch/_export/serde/schema.yaml
Lines changed: 10 additions & 1 deletion b/‎torch/_export/serde/schema.yaml
Lines changed: 10 additions & 1 deletion
diff --git a/‎torch/_export/serde/serialize.py
Lines changed: 27 additions & 6 deletions b/‎torch/_export/serde/serialize.py
Lines changed: 27 additions & 6 deletions
diff --git a/‎torch/_export/verifier.py
Lines changed: 13 additions & 1 deletion b/‎torch/_export/verifier.py
Lines changed: 13 additions & 1 deletion
diff --git a/‎torch/_functorch/_aot_autograd/input_output_analysis.py
Lines changed: 1 addition & 0 deletions b/‎torch/_functorch/_aot_autograd/input_output_analysis.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎torch/_functorch/_aot_autograd/schemas.py
Lines changed: 15 additions & 4 deletions b/‎torch/_functorch/_aot_autograd/schemas.py
Lines changed: 15 additions & 4 deletions
@@ -326,6 +326,52 @@ def forward(self, *args):
             dynamic_shapes=dynamic_shapes,
         )
 
+    def test_no_grad_param_inplace(self):
+        class Foo(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.parameter = torch.nn.Parameter(torch.ones(4, 4))
+
+            def forward(self, x):
+                with torch.no_grad():
+                    self.parameter.div_(2)
+                return x + self.parameter
+
+        foo_ep = Foo()
+        foo_eager = Foo()
+        ep = export(foo_ep, (torch.rand(4, 4),)).run_decompositions()
+        val = ep.graph_signature.parameters_to_mutate
+        self.assertExpectedInline(
+            str(ep.graph).strip(),
+            """\
+graph():
+    %p_parameter : [num_users=1] = placeholder[target=p_parameter]
+    %x : [num_users=1] = placeholder[target=x]
+    %div : [num_users=2] = call_function[target=torch.ops.aten.div.Tensor](args = (%p_parameter, 2), kwargs = {})
+    %add : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%x, %div), kwargs = {})
+    return (div, add)""",
+        )
+
+        self.assertTrue("div" in val.keys())
+        self.assertTrue("parameter" in val.values())
+
+        test_inp = torch.rand(4, 4)
+
+        res = foo_eager(test_inp)
+
+        # TODO We almost need to make the param mutation happen outside
+        # of the graph. Or wrap the param mutation in a no_grad HOP. Simply
+        # overriding gm.__call__ doesn't seem to work due to:
+        #   1. graph module does something weird to __call__ so it is not easy to override
+        #   2. We inspect module.forward to bind fake args when retracing
+        with self.assertRaisesRegex(RuntimeError, "leaf"):
+            res_export = ep.module()(torch.rand(4, 4))
+
+        with torch.no_grad():
+            res_export = ep.module()(test_inp)
+
+        self.assertTrue(torch.allclose(res, res_export))
+
     def test_export_slice_unbacked_dim1(self):
         class MySlice(torch.nn.Module):
             def forward(self, x, seq_len):
@@ -4000,6 +4046,17 @@ def forward(self, x):
         inp = torch.randn(3, 3)
         self.assertTrue(torch.allclose(ep.module()(inp)[0], inp + 1))
 
+    def test_set_grad_as_side_effect(self):
+        class Foo(torch.nn.Module):
+            def forward(self, x):
+                torch._C._set_grad_enabled(False)
+                return x.sum()
+
+        before = torch.is_grad_enabled()
+        ep = torch.export.export(Foo(), (torch.randn(4, 4),))
+        after = torch.is_grad_enabled()
+        self.assertEqual(before, after)
+
     def test_derived_dim_out_of_order_simplified(self):
         _dimz = torch.export.Dim("_dimz", min=6, max=8)
         dimy = _dimz - 1
 
@@ -280,6 +280,25 @@ def forward(self, x):
         actual_out = loaded_ep.module()(*inp)
         self.assertEqual(exp_out, actual_out)
 
+    def test_serialize_param_mutation(self):
+        class Foo(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.parameter = torch.nn.Parameter(torch.ones(4, 4))
+
+            def forward(self, x):
+                with torch.no_grad():
+                    self.parameter.div_(2)
+                return x + self.parameter
+
+        foo = Foo()
+        ep = torch.export.export(foo, (torch.rand(4, 4),)).run_decompositions()
+        buffer = io.BytesIO()
+        save(ep, buffer)
+        loaded_ep = load(buffer)
+        val = loaded_ep.graph_signature.parameters_to_mutate
+        self.assertEqual({"div": "parameter"}, val)
+
     def test_serialize_constant_outputs(self):
         class MyModule(torch.nn.Module):
             def __init__(self) -> None:
 
@@ -5364,11 +5364,15 @@ def forward(self, x):
 
         mod = M()
         inp = torch.randn(2, requires_grad=True)
-        with self.assertRaisesRegex(
-            RuntimeError,
-            "Found a graph input that requires gradients, and received a mutation",
-        ):
-            aot_export_module(mod, [inp], trace_joint=False)
+        gm, _ = aot_export_module(mod, [inp], trace_joint=False)
+        self.assertExpectedInline(
+            str(gm.graph).strip(),
+            """\
+graph():
+    %arg0_1 : [num_users=1] = placeholder[target=arg0_1]
+    %add : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%arg0_1, 4), kwargs = {})
+    return (add, add)""",
+        )
 
     def test_aot_export_input_mutation_on_parameter_banned(self):
         def fn(p, x):
@@ -5379,11 +5383,26 @@ def fn(p, x):
         inp = torch.randn(2)
         with self.assertRaisesRegex(
             RuntimeError,
-            "Found a graph input that requires gradients, and received a mutation",
+            "aot_export_joint_simple does not support input mutations. ViewAndMutationMeta",
         ):
             aot_export_joint_simple(fn, [mod.p, inp], trace_joint=False)
+        with self.assertRaisesRegex(
+            RuntimeError,
+            "Found a graph input that requires gradients, and received a mutation",
+        ):
             aot_export_joint_simple(fn, [mod.p, inp], trace_joint=True)
-            aot_export_module(mod, [inp], trace_joint=False)
+
+        gm, _ = aot_export_module(mod, [inp], trace_joint=False)
+        self.assertExpectedInline(
+            str(gm.graph).strip(),
+            """\
+graph():
+    %arg0_1 : [num_users=1] = placeholder[target=arg0_1]
+    %arg1_1 : [num_users=1] = placeholder[target=arg1_1]
+    %mul : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%arg0_1, 2), kwargs = {})
+    %add : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul, %arg1_1), kwargs = {})
+    return (mul, add)""",
+        )
 
     def test_aot_export_synthetic_bases_banned(self):
         def fn(p, x, y):
 
@@ -1,5 +1,5 @@
 // @generated by update_schema.py
-// checksum<<e7f100132ac684ccc67fce91b241821062f1dfe496fdff4b9929aba4ac938b4f>>
+// checksum<<00d94226d15b290b97bd49f9ff12bbfe04b7252c75d2d1bae66d1756fd9b8517>>
 
 namespace py3 torch._export
 namespace cpp2 torch._export.schema
@@ -254,6 +254,11 @@ struct BufferMutationSpec {
   20: string buffer_name;
 }
 
+struct ParameterMutationSpec {
+  10: TensorArgument arg;
+  20: string parameter_name;
+}
+
 struct GradientToParameterSpec {
   10: TensorArgument arg;
   20: string parameter_name;
@@ -281,6 +286,7 @@ union OutputSpec {
   50: GradientToUserInputSpec gradient_to_user_input;
   60: UserInputMutationSpec user_input_mutation;
   70: OutputTokenSpec token;
+  80: ParameterMutationSpec parameter_mutation;
 }
 
 struct GraphSignature {
 
@@ -327,6 +327,12 @@ class BufferMutationSpec:
     buffer_name: Annotated[str, 20]
 
 
+@dataclass
+class ParameterMutationSpec:
+    arg: Annotated[TensorArgument, 10]
+    parameter_name: Annotated[str, 20]
+
+
 @dataclass
 class GradientToParameterSpec:
     arg: Annotated[TensorArgument, 10]
@@ -359,6 +365,7 @@ class OutputSpec(_Union):
     gradient_to_user_input: Annotated[GradientToUserInputSpec, 50]
     user_input_mutation: Annotated[UserInputMutationSpec, 60]
     token: Annotated[OutputTokenSpec, 70]
+    parameter_mutation: Annotated[ParameterMutationSpec, 80]
 
 
 @dataclass
 
@@ -1,5 +1,5 @@
 # @generated by update_schema.py
-# checksum<<afe0cc0f99e72d00aa05f1a94da938ecb619aabc5d131d3ade489b57799f1e5a>>
+# checksum<<face83b52f81c45eeaeccc97cee19e146b3f7416ed91e015b4510ada7549a72f>>
 AOTInductorModelPickleData:
   kind: struct
   fields:
@@ -383,11 +383,20 @@ OutputSpec:
       type: UserInputMutationSpec
     token:
       type: OutputTokenSpec
+    parameter_mutation:
+      type: ParameterMutationSpec
 OutputTokenSpec:
   kind: struct
   fields:
     arg:
       type: TokenArgument
+ParameterMutationSpec:
+  kind: struct
+  fields:
+    arg:
+      type: TensorArgument
+    parameter_name:
+      type: str
 RangeConstraint:
   kind: struct
   fields:
 
@@ -69,6 +69,7 @@
     OptionalTensorArgument,
     OutputSpec,
     OutputTokenSpec,
+    ParameterMutationSpec,
     RangeConstraint,
     ScalarType,
     SCHEMA_VERSION,
@@ -1241,6 +1242,15 @@ def serialize_output_spec(self, spec: ep.OutputSpec) -> OutputSpec:
                     buffer_name=spec.target,
                 )
             )
+        elif spec.kind == ep.OutputKind.PARAMETER_MUTATION:
+            assert spec.target is not None
+            assert isinstance(spec.arg, ep.TensorArgument)
+            return OutputSpec.create(
+                parameter_mutation=ParameterMutationSpec(
+                    arg=TensorArgument(name=spec.arg.name),
+                    parameter_name=spec.target,
+                )
+            )
         elif spec.kind == ep.OutputKind.GRADIENT_TO_PARAMETER:
             assert spec.target is not None
             assert isinstance(spec.arg, ep.TensorArgument)
@@ -2199,6 +2209,12 @@ def deserialize_output_spec(self, o: OutputSpec) -> ep.OutputSpec:
                 arg=ep.TensorArgument(name=o.buffer_mutation.arg.name),
                 target=o.buffer_mutation.buffer_name,
             )
+        elif o.type == "parameter_mutation":
+            return ep.OutputSpec(
+                kind=ep.OutputKind.PARAMETER_MUTATION,
+                arg=ep.TensorArgument(name=o.parameter_mutation.arg.name),
+                target=o.parameter_mutation.parameter_name,
+            )
         elif o.type == "gradient_to_parameter":
             return ep.OutputSpec(
                 kind=ep.OutputKind.GRADIENT_TO_PARAMETER,
@@ -3377,17 +3393,19 @@ def rank_output(out) -> tuple[int, Optional[str], int]:
         idx, (_arg, spec) = out
         assert isinstance(spec, OutputSpec)
         if spec.type == "user_output":
-            return 3, None, idx
+            return 4, None, idx
         elif spec.type == "loss_output":
-            return 3, None, idx
+            return 4, None, idx
+        elif spec.type == "parameter_mutation":
+            return 1, spec.parameter_mutation.parameter_name, idx
         elif spec.type == "buffer_mutation":
-            return 1, spec.buffer_mutation.buffer_name, idx
+            return 2, spec.buffer_mutation.buffer_name, idx
         elif spec.type == "gradient_to_parameter":
-            return 4, spec.gradient_to_parameter.parameter_name, idx
+            return 5, spec.gradient_to_parameter.parameter_name, idx
         elif spec.type == "gradient_to_user_input":
-            return 5, None, idx
+            return 6, None, idx
         elif spec.type == "user_input_mutation":
-            return 2, None, idx
+            return 3, None, idx
         elif spec.type == "token":
             return 0, None, idx
         else:
@@ -3500,6 +3518,9 @@ def replace_output(out):
         elif spec.type == "buffer_mutation":
             t = spec.buffer_mutation.arg
             t.name = replace_table[t.name]
+        elif spec.type == "parameter_mutation":
+            t = spec.parameter_mutation.arg
+            t.name = replace_table[t.name]
         elif spec.type == "gradient_to_parameter":
             t = spec.gradient_to_parameter.arg
             t.name = replace_table[t.name]
 
@@ -463,7 +463,12 @@ def _verify_exported_program_signature(exported_program) -> None:
         )
 
     num_tokens = len(gs.output_tokens)
-    end = len(gs.buffers_to_mutate) + len(gs.user_inputs_to_mutate) + num_tokens
+    end = (
+        len(gs.buffers_to_mutate)
+        + len(gs.parameters_to_mutate)
+        + len(gs.user_inputs_to_mutate)
+        + num_tokens
+    )
     mutate_nodes: list[str] = output_nodes[num_tokens:end]
     user_output_nodes = output_nodes[end : end + len(gs.user_outputs)]
 
@@ -475,6 +480,13 @@ def _verify_exported_program_signature(exported_program) -> None:
                     f"Dict of buffers that are mutated, in order: {gs.buffers_to_mutate} \n"
                     f"Buffer nodes available: {gs.buffers} \n"
                 )
+        elif mutation_node in gs.parameters_to_mutate:
+            if gs.parameters_to_mutate[mutation_node] not in gs.parameters:
+                raise SpecViolationError(
+                    f"Parameter output {mutation_node} does not point to a parameter that exists. \n"
+                    f"Dict of parameters that are mutated, in order: {gs.parameters_to_mutate} \n"
+                    f"Parameter nodes available: {gs.parameters} \n"
+                )
         elif mutation_node in gs.user_inputs_to_mutate:
             if gs.user_inputs_to_mutate[mutation_node] not in gs.user_inputs:
                 raise SpecViolationError(
 
@@ -460,6 +460,7 @@ def create_graph_signature(
         named_buffers=buffer_names,
         num_user_inputs=num_user_args,
         num_user_outputs=num_user_fw_outs,
+        trace_joint=trace_joint,
         loss_index=loss_index,
         backward_signature=backward_signature,
     )
@@ -829,6 +829,7 @@ class GraphSignature:
     # "graph outputs that correspond to updated buffers"
     # to the FQN names of those mutated buffers.
     buffers_to_mutate: dict[GraphOutputName, FQN]
+    parameters_to_mutate: dict[GraphOutputName, FQN]
     user_inputs_to_mutate: dict[GraphOutputName, GraphInputName]
 
     in_spec: pytree.TreeSpec
@@ -852,6 +853,7 @@ def from_tracing_metadata(
         named_buffers: list[str],
         num_user_inputs: int,
         num_user_outputs: int,
+        trace_joint: bool,
         loss_index: Optional[int],
         backward_signature: Optional[BackwardSignature],
     ) -> GraphSignature:
@@ -897,8 +899,9 @@ def from_tracing_metadata(
         mutations = []
         for idx, input_info in enumerate(view_mutation_metadata.input_info):
             if input_info.mutates_data:
-                # Only buffers can be mutated, not parameters
-                assert idx >= len(parameters)
+                if trace_joint:
+                    # Only buffers can be mutated, not parameters
+                    assert idx >= len(parameters)
                 mutations.append(names[idx + num_tokens])
 
         assert len(mutations) == view_mutation_metadata.num_mutated_inp_runtime_indices
@@ -911,12 +914,16 @@ def from_tracing_metadata(
 
         user_inputs_to_mutate = {}
         buffers_to_mutate = {}
+        parameters_to_mutate = {}
         for output_name, mutation_name in outputs_to_mutations.items():
             if mutation_name in user_inputs:
                 user_inputs_to_mutate[output_name] = mutation_name
             else:
-                assert mutation_name in buffers
-                buffers_to_mutate[output_name] = mutation_name
+                assert mutation_name in buffers or mutation_name in parameters
+                if mutation_name in buffers:
+                    buffers_to_mutate[output_name] = mutation_name
+                else:
+                    parameters_to_mutate[output_name] = mutation_name
 
         start, stop = stop, stop + num_user_outputs
         user_outputs = graph_outputs[start:stop]
@@ -937,6 +944,7 @@ def from_tracing_metadata(
             inputs_to_parameters=inputs_to_parameters,  # type: ignore[arg-type]
             user_inputs_to_mutate=user_inputs_to_mutate,
             buffers_to_mutate=buffers_to_mutate,  # type: ignore[arg-type]
+            parameters_to_mutate=parameters_to_mutate,  # type: ignore[arg-type]
             in_spec=in_spec,
             out_spec=out_spec,
             backward_signature=backward_signature,
@@ -983,6 +991,9 @@ class AOTConfig:
     ignore_shape_env: bool = False
     precompile_backend_id: Optional[str] = None
     force_non_lazy_backward_lowering: bool = False
+    # This config makes sure to check certain things like
+    # mutating input with req_grad in export joint tracing.
+    export_trace_joint: bool = False
 
     def __post_init__(self):
         if self.pre_dispatch:
Original file line number	Diff line number	Diff line change
`@@ -460,6 +460,7 @@ def create_graph_signature(`
`460`	`460`	`named_buffers=buffer_names,`
`461`	`461`	`num_user_inputs=num_user_args,`
`462`	`462`	`num_user_outputs=num_user_fw_outs,`
	`463`	`+ trace_joint=trace_joint,`
`463`	`464`	`loss_index=loss_index,`
`464`	`465`	`backward_signature=backward_signature,`
`465`	`466`	`)`