Update on "[inductor] add lowering for repeat_interleave.Tensor with output size specified (#147160)"

v0i0 · v0i0 · commit 6c3757553b19 · 2025-07-16T16:49:33.000-07:00
[ghstack-poisoned]
diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
@@ -13626,13 +13626,19 @@ def forward(self, x):
                 FileCheck().check("cpp_fused_add_0").run(code)
             self.assertEqual(refe_out, test_out)
 
-    def test_repeat_interleave_pass(self):
+
+    @parametrize("dtype", [torch.int32, torch.int64])
+    def test_repeat_interleave_Tensor_decomp(self, dtype):
+        device = "cpu"
+        if self.device.lower() == "cuda":
+            device = "cuda"
+
         # https://github.com/pytorch/pytorch/issues/147160
         def f(input, repeats):
             return torch.repeat_interleave(input, repeats, dim=0, output_size=3) + 1
 
-        input = torch.tensor([[1, 2], [3, 4]], device="cuda")
-        repeat = torch.tensor([1, 2], device="cuda")
+        input = torch.tensor([[1, 2], [3, 4]], dtype=dtype, device=device)
+        repeat = torch.tensor([1, 2], device=device)
         f_compiled = torch.compile(f)
         test, (code,) = run_and_get_code(f_compiled, input, repeat)
         self.assertEqual(test, f(input, repeat))
diff --git a/torch/_inductor/decomposition.py b/torch/_inductor/decomposition.py
@@ -1159,6 +1159,8 @@ def repeast_interleave_Tensor(
 ) -> torch.Tensor:
     if output_size is None or type(output_size) is not int:
         return NotImplemented
+    if repeat.dtype not in [torch.int32, torch.int64]:
+        return NotImplemented
     cumsum = repeat.cumsum(0)
     pos = torch.arange(output_size, device=repeat.device)
-    return torch.searchsorted(cumsum, pos, right=True)
+    return torch.searchsorted(cumsum, pos, out_int32=(repeat.dtype == torch.int32), right=True)