From e95ddd6b82e0dec77ed4595fe8de90cb1242a35d Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Wed, 11 Sep 2019 07:29:59 -0700 Subject: [PATCH 1/6] Fix bpo-38115, a bug in the peephole optimizer that caused lnotab to have invalid bytecode offsets when the optimizer got rid of the last line(s) of a function. Also fix a bug where the optimizer bails out of optimization but leaves the lnotab modified. --- Lib/test/test_peepholer.py | 69 +++++++++++++++++++++++++++++++++++++- Python/peephole.c | 39 ++++++++++++++------- 2 files changed, 94 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index c90a53210a9316..a05a8aaa01d824 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -40,6 +40,20 @@ def check_jump_targets(self, code): self.fail(f'{instr.opname} at {instr.offset} ' f'jumps to {tgt.opname} at {tgt.offset}') + def check_lnotab(self, code): + "Check that the lnotab byte offsets are sensible." + code = dis._get_code_object(code) + lnotab = list(dis.findlinestarts(code)) + # Don't bother checking if the line info is sensible, because + # most of the line info we can get at comes from lnotab. + min_bytecode = min(t[0] for t in lnotab) + max_bytecode = max(t[0] for t in lnotab) + self.assertGreaterEqual(min_bytecode, 0) + self.assertLess(max_bytecode, len(code.co_code)) + # This could conceivably test more (and probably should, as there + # aren't very many tests of lnotab), if peepholer wasn't scheduled + # to be replaced anyway. + def test_unot(self): # UNARY_NOT POP_JUMP_IF_FALSE --> POP_JUMP_IF_TRUE' def unot(x): @@ -48,6 +62,7 @@ def unot(x): self.assertNotInBytecode(unot, 'UNARY_NOT') self.assertNotInBytecode(unot, 'POP_JUMP_IF_FALSE') self.assertInBytecode(unot, 'POP_JUMP_IF_TRUE') + self.check_lnotab(unot) def test_elim_inversion_of_is_or_in(self): for line, cmp_op in ( @@ -58,6 +73,7 @@ def test_elim_inversion_of_is_or_in(self): ): code = compile(line, '', 'single') self.assertInBytecode(code, 'COMPARE_OP', cmp_op) + self.check_lnotab(code) def test_global_as_constant(self): # LOAD_GLOBAL None/True/False --> LOAD_CONST None/True/False @@ -75,6 +91,7 @@ def h(): for func, elem in ((f, None), (g, True), (h, False)): self.assertNotInBytecode(func, 'LOAD_GLOBAL') self.assertInBytecode(func, 'LOAD_CONST', elem) + self.check_lnotab(func) def f(): 'Adding a docstring made this test fail in Py2.5.0' @@ -82,6 +99,7 @@ def f(): self.assertNotInBytecode(f, 'LOAD_GLOBAL') self.assertInBytecode(f, 'LOAD_CONST', None) + self.check_lnotab(f) def test_while_one(self): # Skip over: LOAD_CONST trueconst POP_JUMP_IF_FALSE xx @@ -93,6 +111,7 @@ def f(): self.assertNotInBytecode(f, elem) for elem in ('JUMP_ABSOLUTE',): self.assertInBytecode(f, elem) + self.check_lnotab(f) def test_pack_unpack(self): for line, elem in ( @@ -104,6 +123,7 @@ def test_pack_unpack(self): self.assertInBytecode(code, elem) self.assertNotInBytecode(code, 'BUILD_TUPLE') self.assertNotInBytecode(code, 'UNPACK_TUPLE') + self.check_lnotab(code) def test_folding_of_tuples_of_constants(self): for line, elem in ( @@ -116,6 +136,7 @@ def test_folding_of_tuples_of_constants(self): code = compile(line,'','single') self.assertInBytecode(code, 'LOAD_CONST', elem) self.assertNotInBytecode(code, 'BUILD_TUPLE') + self.check_lnotab(code) # Long tuples should be folded too. code = compile(repr(tuple(range(10000))),'','single') @@ -124,6 +145,7 @@ def test_folding_of_tuples_of_constants(self): load_consts = [instr for instr in dis.get_instructions(code) if instr.opname == 'LOAD_CONST'] self.assertEqual(len(load_consts), 2) + self.check_lnotab(code) # Bug 1053819: Tuple of constants misidentified when presented with: # . . . opcode_with_arg 100 unary_opcode BUILD_TUPLE 1 . . . @@ -141,6 +163,7 @@ def crater(): 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ],) + self.check_lnotab(crater) def test_folding_of_lists_of_constants(self): for line, elem in ( @@ -153,6 +176,7 @@ def test_folding_of_lists_of_constants(self): code = compile(line, '', 'single') self.assertInBytecode(code, 'LOAD_CONST', elem) self.assertNotInBytecode(code, 'BUILD_LIST') + self.check_lnotab(code) def test_folding_of_sets_of_constants(self): for line, elem in ( @@ -166,6 +190,7 @@ def test_folding_of_sets_of_constants(self): code = compile(line, '', 'single') self.assertNotInBytecode(code, 'BUILD_SET') self.assertInBytecode(code, 'LOAD_CONST', elem) + self.check_lnotab(code) # Ensure that the resulting code actually works: def f(a): @@ -176,9 +201,11 @@ def g(a): self.assertTrue(f(3)) self.assertTrue(not f(4)) + self.check_lnotab(f) self.assertTrue(not g(3)) self.assertTrue(g(4)) + self.check_lnotab(g) def test_folding_of_binops_on_constants(self): @@ -203,41 +230,50 @@ def test_folding_of_binops_on_constants(self): self.assertInBytecode(code, 'LOAD_CONST', elem) for instr in dis.get_instructions(code): self.assertFalse(instr.opname.startswith('BINARY_')) + self.check_lnotab(code) # Verify that unfoldables are skipped code = compile('a=2+"b"', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 2) self.assertInBytecode(code, 'LOAD_CONST', 'b') + self.check_lnotab(code) # Verify that large sequences do not result from folding code = compile('a="x"*10000', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 10000) self.assertNotIn("x"*10000, code.co_consts) + self.check_lnotab(code) code = compile('a=1<<1000', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 1000) self.assertNotIn(1<<1000, code.co_consts) + self.check_lnotab(code) code = compile('a=2**1000', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 1000) self.assertNotIn(2**1000, code.co_consts) + self.check_lnotab(code) def test_binary_subscr_on_unicode(self): # valid code get optimized code = compile('"foo"[0]', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 'f') self.assertNotInBytecode(code, 'BINARY_SUBSCR') + self.check_lnotab(code) code = compile('"\u0061\uffff"[1]', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', '\uffff') self.assertNotInBytecode(code,'BINARY_SUBSCR') + self.check_lnotab(code) # With PEP 393, non-BMP char get optimized code = compile('"\U00012345"[0]', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', '\U00012345') self.assertNotInBytecode(code, 'BINARY_SUBSCR') + self.check_lnotab(code) # invalid code doesn't get optimized # out of range code = compile('"fuu"[10]', '', 'single') self.assertInBytecode(code, 'BINARY_SUBSCR') + self.check_lnotab(code) def test_folding_of_unaryops_on_constants(self): for line, elem in ( @@ -252,13 +288,15 @@ def test_folding_of_unaryops_on_constants(self): self.assertInBytecode(code, 'LOAD_CONST', elem) for instr in dis.get_instructions(code): self.assertFalse(instr.opname.startswith('UNARY_')) + self.check_lnotab(code) # Check that -0.0 works after marshaling def negzero(): return -(1.0-1.0) - for instr in dis.get_instructions(code): + for instr in dis.get_instructions(negzero): self.assertFalse(instr.opname.startswith('UNARY_')) + self.check_lnotab(negzero) # Verify that unfoldables are skipped for line, elem, opname in ( @@ -268,6 +306,7 @@ def negzero(): code = compile(line, '', 'single') self.assertInBytecode(code, 'LOAD_CONST', elem) self.assertInBytecode(code, opname) + self.check_lnotab(code) def test_elim_extra_return(self): # RETURN LOAD_CONST None RETURN --> RETURN @@ -277,6 +316,7 @@ def f(x): returns = [instr for instr in dis.get_instructions(f) if instr.opname == 'RETURN_VALUE'] self.assertEqual(len(returns), 1) + self.check_lnotab(f) def test_elim_jump_to_return(self): # JUMP_FORWARD to RETURN --> RETURN @@ -290,6 +330,7 @@ def f(cond, true_value, false_value): returns = [instr for instr in dis.get_instructions(f) if instr.opname == 'RETURN_VALUE'] self.assertEqual(len(returns), 2) + self.check_lnotab(f) def test_elim_jump_to_uncond_jump(self): # POP_JUMP_IF_FALSE to JUMP_FORWARD --> POP_JUMP_IF_FALSE to non-jump @@ -302,6 +343,7 @@ def f(): else: baz() self.check_jump_targets(f) + self.check_lnotab(f) def test_elim_jump_to_uncond_jump2(self): # POP_JUMP_IF_FALSE to JUMP_ABSOLUTE --> POP_JUMP_IF_FALSE to non-jump @@ -312,6 +354,7 @@ def f(): or d): a = foo() self.check_jump_targets(f) + self.check_lnotab(f) def test_elim_jump_to_uncond_jump3(self): # Intentionally use two-line expressions to test issue37213. @@ -320,18 +363,21 @@ def f(a, b, c): return ((a and b) and c) self.check_jump_targets(f) + self.check_lnotab(f) self.assertEqual(count_instr_recursively(f, 'JUMP_IF_FALSE_OR_POP'), 2) # JUMP_IF_TRUE_OR_POP to JUMP_IF_TRUE_OR_POP --> JUMP_IF_TRUE_OR_POP to non-jump def f(a, b, c): return ((a or b) or c) self.check_jump_targets(f) + self.check_lnotab(f) self.assertEqual(count_instr_recursively(f, 'JUMP_IF_TRUE_OR_POP'), 2) # JUMP_IF_FALSE_OR_POP to JUMP_IF_TRUE_OR_POP --> POP_JUMP_IF_FALSE to non-jump def f(a, b, c): return ((a and b) or c) self.check_jump_targets(f) + self.check_lnotab(f) self.assertNotInBytecode(f, 'JUMP_IF_FALSE_OR_POP') self.assertInBytecode(f, 'JUMP_IF_TRUE_OR_POP') self.assertInBytecode(f, 'POP_JUMP_IF_FALSE') @@ -340,6 +386,7 @@ def f(a, b, c): return ((a or b) and c) self.check_jump_targets(f) + self.check_lnotab(f) self.assertNotInBytecode(f, 'JUMP_IF_TRUE_OR_POP') self.assertInBytecode(f, 'JUMP_IF_FALSE_OR_POP') self.assertInBytecode(f, 'POP_JUMP_IF_TRUE') @@ -360,6 +407,7 @@ def f(cond1, cond2): returns = [instr for instr in dis.get_instructions(f) if instr.opname == 'RETURN_VALUE'] self.assertLessEqual(len(returns), 6) + self.check_lnotab(f) def test_elim_jump_after_return2(self): # Eliminate dead code: jumps immediately after returns can't be reached @@ -374,6 +422,7 @@ def f(cond1, cond2): returns = [instr for instr in dis.get_instructions(f) if instr.opname == 'RETURN_VALUE'] self.assertLessEqual(len(returns), 2) + self.check_lnotab(f) def test_make_function_doesnt_bail(self): def f(): @@ -381,6 +430,7 @@ def g()->1+1: pass return g self.assertNotInBytecode(f, 'BINARY_ADD') + self.check_lnotab(f) def test_constant_folding(self): # Issue #11244: aggressive constant folding. @@ -401,17 +451,20 @@ def test_constant_folding(self): self.assertFalse(instr.opname.startswith('UNARY_')) self.assertFalse(instr.opname.startswith('BINARY_')) self.assertFalse(instr.opname.startswith('BUILD_')) + self.check_lnotab(code) def test_in_literal_list(self): def containtest(): return x in [a, b] self.assertEqual(count_instr_recursively(containtest, 'BUILD_LIST'), 0) + self.check_lnotab(containtest) def test_iterate_literal_list(self): def forloop(): for x in [a, b]: pass self.assertEqual(count_instr_recursively(forloop, 'BUILD_LIST'), 0) + self.check_lnotab(forloop) def test_condition_with_binop_with_bools(self): def f(): @@ -419,6 +472,7 @@ def f(): return 1 return 0 self.assertEqual(f(), 1) + self.check_lnotab(f) def test_if_with_if_expression(self): # Check bpo-37289 @@ -427,6 +481,19 @@ def f(x): return True return False self.assertTrue(f(True)) + self.check_lnotab(f) + + def test_trailing_nops(self): + # Check the lnotab of a function that even after trivial + # optimization has trailing nops, which the lnotab adjustment has to + # handle properly (bpo-38115). + def f(x): + while 1: + return 3 + while 1: + return 5 + return 6 + self.check_lnotab(f) class TestBuglets(unittest.TestCase): diff --git a/Python/peephole.c b/Python/peephole.c index d859648411fa3f..3a275bfbea762c 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -457,7 +457,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, } } - /* Fixup lnotab */ + /* Map old to new bytecode positions */ for (i = 0, nops = 0; i < codelen; i++) { size_t block = (size_t)i - nops; /* cannot overflow: codelen <= INT_MAX */ @@ -468,18 +468,14 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, nops++; } } - cum_orig_offset = 0; - last_offset = 0; - for (i=0 ; i < tabsiz ; i+=2) { - unsigned int offset_delta, new_offset; - cum_orig_offset += lnotab[i]; - assert(cum_orig_offset % sizeof(_Py_CODEUNIT) == 0); - new_offset = blocks[cum_orig_offset / sizeof(_Py_CODEUNIT)] * - sizeof(_Py_CODEUNIT); - offset_delta = new_offset - last_offset; - assert(offset_delta <= 255); - lnotab[i] = (unsigned char)offset_delta; - last_offset = new_offset; + /* The 'blocks' old-to-new opcode position map can leave the new opcode + * position invalid in the new codestring, if the old codestring ended + * with NOPs. Normally a NOP should map to the next bytecode after the + * NOP (in case the NOP is a jump target), but positions past the end + * should point at the end, instead. This (currently) doesn't matter for + * jump targets, but it matters for lnotab. */ + while (blocks[--i] > (codelen - nops - 1)) { + blocks[i] = (codelen - nops - 1); } /* Remove NOPs and fixup jump targets */ @@ -521,6 +517,23 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, h += ilen; } assert(h + (Py_ssize_t)nops == codelen); + assert(blocks[codelen - 1] < h); + + /* Fixup lnotab. This modifies the lnotab bytes object in-place, so it + * has to happen last. */ + cum_orig_offset = 0; + last_offset = 0; + for (i=0 ; i < tabsiz ; i+=2) { + unsigned int offset_delta, new_offset; + cum_orig_offset += lnotab[i]; + assert(cum_orig_offset % sizeof(_Py_CODEUNIT) == 0); + new_offset = blocks[cum_orig_offset / sizeof(_Py_CODEUNIT)] * + sizeof(_Py_CODEUNIT); + offset_delta = new_offset - last_offset; + assert(offset_delta <= 255); + lnotab[i] = (unsigned char)offset_delta; + last_offset = new_offset; + } PyMem_Free(blocks); code = PyBytes_FromStringAndSize((char *)codestr, h * sizeof(_Py_CODEUNIT)); From a64fc3ca90d3dd45455f5df5908857bcd5682ad4 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Wed, 11 Sep 2019 07:43:11 -0700 Subject: [PATCH 2/6] Add blurb. --- .../Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst b/Misc/NEWS.d/next/Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst new file mode 100644 index 00000000000000..a91a5e0eb49e02 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst @@ -0,0 +1,2 @@ +Fix a bug in the bytecode optimizer that could (rarely) generate invalid +bytecode offsets in the code object's line number information. From d870d185f3d7e5228434d5f11679ca794fbcc9be Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 11 Sep 2019 14:50:52 +0000 Subject: [PATCH 3/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Core and Builtins/2019-09-11-14-50-48.bpo-38115.cVUqj0.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-09-11-14-50-48.bpo-38115.cVUqj0.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-09-11-14-50-48.bpo-38115.cVUqj0.rst b/Misc/NEWS.d/next/Core and Builtins/2019-09-11-14-50-48.bpo-38115.cVUqj0.rst new file mode 100644 index 00000000000000..066f491952338c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-09-11-14-50-48.bpo-38115.cVUqj0.rst @@ -0,0 +1 @@ +Fix a bug in the bytecode optimizer that could (rarely) generate invalid bytecode offsets in the code object's line number information. \ No newline at end of file From 8b74c424ba4b2fabc938a20f2df5ad3d3455ff68 Mon Sep 17 00:00:00 2001 From: "T. Wouters" Date: Wed, 11 Sep 2019 07:51:27 -0700 Subject: [PATCH 4/6] Delete 2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst --- .../Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst b/Misc/NEWS.d/next/Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst deleted file mode 100644 index a91a5e0eb49e02..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2019-09-11-15-41-11.bpo-38115-_Yhg1s.rst +++ /dev/null @@ -1,2 +0,0 @@ -Fix a bug in the bytecode optimizer that could (rarely) generate invalid -bytecode offsets in the code object's line number information. From 11433f2558adaa4e66c9dabdf5bcac3471720ff7 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Wed, 11 Sep 2019 08:21:13 -0700 Subject: [PATCH 5/6] Add lower bound check when walking backwards over the jump table. --- Python/peephole.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/peephole.c b/Python/peephole.c index 3a275bfbea762c..b892e32b8f18cf 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -474,7 +474,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, * NOP (in case the NOP is a jump target), but positions past the end * should point at the end, instead. This (currently) doesn't matter for * jump targets, but it matters for lnotab. */ - while (blocks[--i] > (codelen - nops - 1)) { + while (i-- > 0 && blocks[i] > (codelen - nops - 1)) { blocks[i] = (codelen - nops - 1); } From 069b41806e95b12f902105517a0d9e25280f1604 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Wed, 11 Sep 2019 08:35:17 -0700 Subject: [PATCH 6/6] Make the loop fixing up the tail end of 'blocks' more explicit, and fix the assert in the face of empty bytecode. --- Python/peephole.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/peephole.c b/Python/peephole.c index b892e32b8f18cf..001775ef43583d 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -474,7 +474,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, * NOP (in case the NOP is a jump target), but positions past the end * should point at the end, instead. This (currently) doesn't matter for * jump targets, but it matters for lnotab. */ - while (i-- > 0 && blocks[i] > (codelen - nops - 1)) { + for (i = codelen-1; i >= 0 && blocks[i] > (codelen - nops - 1); i--) { blocks[i] = (codelen - nops - 1); } @@ -517,7 +517,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, h += ilen; } assert(h + (Py_ssize_t)nops == codelen); - assert(blocks[codelen - 1] < h); + assert(codelen == 0 || blocks[codelen - 1] < h); /* Fixup lnotab. This modifies the lnotab bytes object in-place, so it * has to happen last. */