From 476535ecefcf7f9507e60e9860bebd6365801866 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:13:24 +0800 Subject: [PATCH 1/4] JIT: Strip references when roundtripping through str or tuple --- Include/internal/pycore_optimizer.h | 6 ++++++ Python/optimizer_bytecodes.c | 13 ++++++++----- Python/optimizer_cases.c.h | 6 +++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 3a5ae0a054ed3c..01c98c36f052a9 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -251,6 +251,12 @@ PyJitRef_Wrap(JitOptSymbol *sym) return (JitOptRef){.bits=(uintptr_t)sym}; } +static inline JitOptRef +PyJitRef_StripReferenceInfo(JitOptRef ref) +{ + return PyJitRef_Wrap(PyJitRef_Unwrap(ref)); +} + static inline JitOptRef PyJitRef_Borrow(JitOptRef ref) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 781296dc7f48fc..eccbddf0546ab3 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -762,9 +762,8 @@ dummy_func(void) { } op(_RETURN_VALUE, (retval -- res)) { - // We wrap and unwrap the value to mimic PyStackRef_MakeHeapSafe - // in bytecodes.c - JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); + // Mimics PyStackRef_MakeHeapSafe in the interpreter. + JitOptRef temp = PyJitRef_StripReferenceInfo(retval); DEAD(retval); SAVE_STACK(); ctx->frame->stack_pointer = stack_pointer; @@ -925,7 +924,9 @@ dummy_func(void) { op(_CALL_STR_1, (unused, unused, arg -- res)) { if (sym_matches_type(arg, &PyUnicode_Type)) { // e.g. str('foo') or str(foo) where foo is known to be a string - res = arg; + // Note: we must strip the reference information because it goes + // through str() which strips the reference information from it. + res = PyJitRef_StripReferenceInfo(arg); } else { res = sym_new_type(ctx, &PyUnicode_Type); @@ -1065,7 +1066,9 @@ dummy_func(void) { op(_CALL_TUPLE_1, (callable, null, arg -- res)) { if (sym_matches_type(arg, &PyTuple_Type)) { // e.g. tuple((1, 2)) or tuple(foo) where foo is known to be a tuple - res = arg; + // Note: we must strip the reference information because it goes + // through tuple() which strips the reference information from it. + res = PyJitRef_StripReferenceInfo(arg); } else { res = sym_new_type(ctx, &PyTuple_Type); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 14e985b42ea0ce..8617355e25f418 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1060,7 +1060,7 @@ JitOptRef retval; JitOptRef res; retval = stack_pointer[-1]; - JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); + JitOptRef temp = PyJitRef_StripReferenceInfo(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; @@ -2496,7 +2496,7 @@ JitOptRef res; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyUnicode_Type)) { - res = arg; + res = PyJitRef_StripReferenceInfo(arg); } else { res = sym_new_type(ctx, &PyUnicode_Type); @@ -2522,7 +2522,7 @@ JitOptRef res; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyTuple_Type)) { - res = arg; + res = PyJitRef_StripReferenceInfo(arg); } else { res = sym_new_type(ctx, &PyTuple_Type); From 6751f2ea9fd7adc114002e5eeed402b1d19a6b21 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:32:09 +0800 Subject: [PATCH 2/4] Add tests --- Lib/test/test_capi/test_opt.py | 49 ++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index c796b0dd4b5b7e..d2f3e5dc2c6eb5 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2501,6 +2501,55 @@ def testfunc(n): # For now... until we constant propagate it away. self.assertIn("_BINARY_OP", uops) + def test_strip_reference_information_through_str(self): + # This test needs to be in another script due to Python's + # string interning details. + script_helper.assert_python_ok('-c', textwrap.dedent(""" + import _testinternalcapi + import _opcode + + def get_first_executor(func): + code = func.__code__ + co_code = code.co_code + for i in range(0, len(co_code), 2): + try: + return _opcode.get_executor(code, i) + except ValueError: + pass + return None + + def iter_opnames(ex): + for item in ex: + yield item[0] + + def get_opnames(ex): + return list(iter_opnames(ex)) + + def testfunc(n): + for _ in range(n): + str("abcde") + + + testfunc(_testinternalcapi.TIER2_THRESHOLD) + ex = get_first_executor(testfunc) + assert ex is not None + uops = get_opnames(ex) + assert "_POP_TOP_NOP" not in uops + """)) + + def test_strip_reference_information_through_tuple(self): + def testfunc(n): + for _ in range(n): + tuple((1,)) + + testfunc(TIER2_THRESHOLD * 2) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertNotIn("_POP_TOP_NOP", uops) + def global_identity(x): return x From 2a99327b7d077442185f86b1fc3d2877107f713d Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 3 Sep 2025 15:35:39 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-09-03-15-35-34.gh-issue-138431.EUsrtA.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-09-03-15-35-34.gh-issue-138431.EUsrtA.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-03-15-35-34.gh-issue-138431.EUsrtA.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-03-15-35-34.gh-issue-138431.EUsrtA.rst new file mode 100644 index 00000000000000..11fc3c05d5aa7a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-03-15-35-34.gh-issue-138431.EUsrtA.rst @@ -0,0 +1 @@ +Fix a bug in the JIT optimizer when round-tripping strings and tuples. From bb59d45e4fd64a113552beba67ce7918b14ff51c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 4 Sep 2025 01:19:58 +0800 Subject: [PATCH 4/4] Address review Co-Authored-By: Mark Shannon <9448417+markshannon@users.noreply.github.com> --- Lib/test/test_capi/test_opt.py | 55 +++++++--------------------------- 1 file changed, 11 insertions(+), 44 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index d2f3e5dc2c6eb5..ffd65dbb1464f8 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2501,54 +2501,21 @@ def testfunc(n): # For now... until we constant propagate it away. self.assertIn("_BINARY_OP", uops) - def test_strip_reference_information_through_str(self): - # This test needs to be in another script due to Python's - # string interning details. - script_helper.assert_python_ok('-c', textwrap.dedent(""" - import _testinternalcapi - import _opcode - - def get_first_executor(func): - code = func.__code__ - co_code = code.co_code - for i in range(0, len(co_code), 2): - try: - return _opcode.get_executor(code, i) - except ValueError: - pass - return None + def test_reference_tracking_across_call_doesnt_crash(self): - def iter_opnames(ex): - for item in ex: - yield item[0] + def f1(): + for _ in range(TIER2_THRESHOLD + 1): + # Choose a value that won't occur elsewhere to avoid sharing + str("value that won't occur elsewhere to avoid sharing") - def get_opnames(ex): - return list(iter_opnames(ex)) + f1() - def testfunc(n): - for _ in range(n): - str("abcde") - - - testfunc(_testinternalcapi.TIER2_THRESHOLD) - ex = get_first_executor(testfunc) - assert ex is not None - uops = get_opnames(ex) - assert "_POP_TOP_NOP" not in uops - """)) - - def test_strip_reference_information_through_tuple(self): - def testfunc(n): - for _ in range(n): - tuple((1,)) - - testfunc(TIER2_THRESHOLD * 2) - - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - uops = get_opnames(ex) + def f2(): + for _ in range(TIER2_THRESHOLD + 1): + # Choose a value that won't occur elsewhere to avoid sharing + tuple((31, -17, 25, "won't occur elsewhere")) - self.assertNotIn("_POP_TOP_NOP", uops) + f2() def global_identity(x):