pytorch · anijain2305 · May 5, 2025 · May 5, 2025 · May 5, 2025 · May 5, 2025
diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py
@@ -7431,6 +7431,10 @@ def _has_aliased_buffers(buffers: Sequence[IRNode]) -> bool:
 
 @ir_dataclass(frozen=False)
 class InvokeSubgraph(ExternKernel):
+    """
+    Implementation of InvokeSubgraph HOP
+    """
+
     subgraph: Optional[Subgraph] = None
     operands: Optional[list[TensorBox]] = None
     outputs: Optional[list[MultiOutput]] = None
@@ -7515,6 +7519,17 @@ def create_output(output: IRNode, ind: int):
                     skip_size_stride_alignment_checks=True,
                 )
 
+        # Force the output strides to be same as the original strides
+        new_outputs = []
+        fake_outputs = V.graph.current_node.meta["val"]
+        for idx, output in enumerate(outputs):
+            if isinstance(output, (ShapeAsConstantBuffer, NoneAsConstantBuffer)):
+                new_outputs.append(output)
+            else:
+                example_stride = handle_sym_expr(fake_outputs[idx].stride())
+                new_outputs.append(cls.require_exact_strides(output, example_stride))
 if _should_save_eager_input_vals(target, (args, kwargs)): 
     # NOTE "eager_input_vals" 
     # We save the original (args, kwargs) FakeTensor values for nodes 
     # that have exact stride requirements. This is useful downstream. 
     # We use this information inside Inductor to ensure that inputs to 
     # stride-sensitive operators have the correct strides. 
     arg_inp, kwarg_inp = torch.fx.node.map_aggregate((args, kwargs), map_fn)  # type: ignore[misc, arg-type] 
     node.meta["eager_input_vals"] = (arg_inp, kwarg_inp) 
 if _should_save_eager_input_vals(target, (args, kwargs)): 
     # NOTE "eager_input_vals" 
     # We save the original (args, kwargs) FakeTensor values for nodes 
     # that have exact stride requirements. This is useful downstream. 
     # We use this information inside Inductor to ensure that inputs to 
     # stride-sensitive operators have the correct strides. 
     arg_inp, kwarg_inp = torch.fx.node.map_aggregate((args, kwargs), map_fn)  # type: ignore[misc, arg-type] 
     node.meta["eager_input_vals"] = (arg_inp, kwarg_inp) 
+        outputs = new_outputs
+
         outputs = [create_output(output, i) for i, output in enumerate(outputs)]
         invoke_subgraph.outputs = outputs
         return outputs