From 6860ed7b5c42ad61426d750d803f306ebeb809e9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 21 Nov 2022 20:21:11 -0800 Subject: [PATCH 01/26] Make BINARY_OP_INPLACE_ADD_UNICODE a legit super instruction This doesn't really make things much cleaner, but it works. --- Python/bytecodes.c | 22 ++++++----- Python/generated_cases.c.h | 76 +++++++++++++++++++++----------------- 2 files changed, 54 insertions(+), 44 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 41dd1acc937d71..c5939321990016 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -83,6 +83,8 @@ static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub static PyObject *container, *start, *stop, *v, *lhs, *rhs; static PyObject *list, *tuple, *dict; static PyObject *exit_func, *lasti, *val; +#define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1 +#define _BINARY_OP_INPLACE_ADD_UNICODE_PART_2 2 static PyObject * dummy_func( @@ -197,7 +199,7 @@ dummy_func( BINARY_OP_ADD_FLOAT, BINARY_OP_ADD_INT, BINARY_OP_ADD_UNICODE, - // BINARY_OP_INPLACE_ADD_UNICODE, // This is an odd duck. + _BINARY_OP_INPLACE_ADD_UNICODE_PART_1, BINARY_OP_MULTIPLY_FLOAT, BINARY_OP_MULTIPLY_INT, BINARY_OP_SUBTRACT_FLOAT, @@ -263,13 +265,10 @@ dummy_func( ERROR_IF(res == NULL, error); } - // This is a subtle one. It's a super-instruction for - // BINARY_OP_ADD_UNICODE followed by STORE_FAST - // where the store goes into the left argument. - // So the inputs are the same as for all BINARY_OP - // specializations, but there is no output. - // At the end we just skip over the STORE_FAST. - inst(BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { + // Part 1's output effect is a lie -- it has no result. + // Part 2's input effect is equally a lie, and the two lies + // cancel each other out. + op(_BINARY_OP_INPLACE_ADD_UNICODE_PART_1, (left, right, unused/1 -- unused)) { assert(cframe.use_tracing == 0); DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); @@ -295,9 +294,12 @@ dummy_func( PyUnicode_Append(target_local, right); _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); ERROR_IF(*target_local == NULL, error); - // The STORE_FAST is already done. - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP + 1); } + op(_BINARY_OP_INPLACE_ADD_UNICODE_PART_2, (unused --)) { + // The STORE_FAST is already done; oparg is dead. + } + super(BINARY_OP_INPLACE_ADD_UNICODE) = + _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 + _BINARY_OP_INPLACE_ADD_UNICODE_PART_2; inst(BINARY_OP_ADD_FLOAT, (left, right, unused/1 -- sum)) { assert(cframe.use_tracing == 0); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3a403824b49958..17210fa2ac1e1d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -218,40 +218,6 @@ DISPATCH(); } - TARGET(BINARY_OP_INPLACE_ADD_UNICODE) { - PyObject *right = PEEK(1); - PyObject *left = PEEK(2); - assert(cframe.use_tracing == 0); - DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); - DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); - _Py_CODEUNIT true_next = next_instr[INLINE_CACHE_ENTRIES_BINARY_OP]; - assert(_Py_OPCODE(true_next) == STORE_FAST || - _Py_OPCODE(true_next) == STORE_FAST__LOAD_FAST); - PyObject **target_local = &GETLOCAL(_Py_OPARG(true_next)); - DEOPT_IF(*target_local != left, BINARY_OP); - STAT_INC(BINARY_OP, hit); - /* Handle `left = left + right` or `left += right` for str. - * - * When possible, extend `left` in place rather than - * allocating a new PyUnicodeObject. This attempts to avoid - * quadratic behavior when one neglects to use str.join(). - * - * If `left` has only two references remaining (one from - * the stack, one in the locals), DECREFing `left` leaves - * only the locals reference, so PyUnicode_Append knows - * that the string is safe to mutate. - */ - assert(Py_REFCNT(left) >= 2); - _Py_DECREF_NO_DEALLOC(left); - PyUnicode_Append(target_local, right); - _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); - if (*target_local == NULL) goto pop_2_error; - // The STORE_FAST is already done. - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP + 1); - STACK_SHRINK(2); - DISPATCH(); - } - TARGET(BINARY_OP_ADD_FLOAT) { PyObject *right = PEEK(1); PyObject *left = PEEK(2); @@ -3812,6 +3778,48 @@ DISPATCH(); } + TARGET(BINARY_OP_INPLACE_ADD_UNICODE) { + PyObject *_tmp_1 = PEEK(2); + PyObject *_tmp_2 = PEEK(1); + { + PyObject *right = _tmp_2; + PyObject *left = _tmp_1; + assert(cframe.use_tracing == 0); + DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); + _Py_CODEUNIT true_next = next_instr[INLINE_CACHE_ENTRIES_BINARY_OP]; + assert(_Py_OPCODE(true_next) == STORE_FAST || + _Py_OPCODE(true_next) == STORE_FAST__LOAD_FAST); + PyObject **target_local = &GETLOCAL(_Py_OPARG(true_next)); + DEOPT_IF(*target_local != left, BINARY_OP); + STAT_INC(BINARY_OP, hit); + /* Handle `left = left + right` or `left += right` for str. + * + * When possible, extend `left` in place rather than + * allocating a new PyUnicodeObject. This attempts to avoid + * quadratic behavior when one neglects to use str.join(). + * + * If `left` has only two references remaining (one from + * the stack, one in the locals), DECREFing `left` leaves + * only the locals reference, so PyUnicode_Append knows + * that the string is safe to mutate. + */ + assert(Py_REFCNT(left) >= 2); + _Py_DECREF_NO_DEALLOC(left); + PyUnicode_Append(target_local, right); + _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); + if (*target_local == NULL) goto pop_2_error; + } + next_instr += 1; + NEXTOPARG(); + next_instr++; + { + // The STORE_FAST is already done; oparg is dead. + } + STACK_SHRINK(2); + DISPATCH(); + } + TARGET(END_FOR) { PyObject *_tmp_1 = PEEK(2); PyObject *_tmp_2 = PEEK(1); From dcba32eeccf5a613bc5a2db5595d94591fced441 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Nov 2022 18:23:21 -0800 Subject: [PATCH 02/26] COMPARE_OP --- Python/bytecodes.c | 28 ++++++++++------------------ Python/generated_cases.c.h | 18 ++++++++---------- 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c5939321990016..54e80d32162915 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2054,13 +2054,17 @@ dummy_func( JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); } - // stack effect: (__0 -- ) - inst(COMPARE_OP) { + // family(compare_op) = { + // COMPARE_OP, + // COMPARE_OP_FLOAT_JUMP, + // COMPARE_OP_INT_JUMP, + // COMPARE_OP_STR_JUMP, + // }; + + inst(COMPARE_OP, (unused/1, left, right, unused/1 -- res)) { _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); - PyObject *right = TOP(); - PyObject *left = SECOND(); next_instr--; _Py_Specialize_CompareOp(left, right, next_instr, oparg); DISPATCH_SAME_OPARG(); @@ -2068,16 +2072,10 @@ dummy_func( STAT_INC(COMPARE_OP, deferred); DECREMENT_ADAPTIVE_COUNTER(cache->counter); assert(oparg <= Py_GE); - PyObject *right = POP(); - PyObject *left = TOP(); - PyObject *res = PyObject_RichCompare(left, right, oparg); - SET_TOP(res); + res = PyObject_RichCompare(left, right, oparg); Py_DECREF(left); Py_DECREF(right); - if (res == NULL) { - goto error; - } - JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP); + ERROR_IF(res == NULL, error); } // stack effect: (__0 -- ) @@ -3690,9 +3688,6 @@ dummy_func( // Future families go below this point // -family(binary_subscr) = { - BINARY_SUBSCR, BINARY_SUBSCR_DICT, - BINARY_SUBSCR_GETITEM, BINARY_SUBSCR_LIST_INT, BINARY_SUBSCR_TUPLE_INT }; family(call) = { CALL, CALL_PY_EXACT_ARGS, CALL_PY_WITH_DEFAULTS, CALL_BOUND_METHOD_EXACT_ARGS, CALL_BUILTIN_CLASS, @@ -3701,9 +3696,6 @@ family(call) = { CALL_NO_KW_LIST_APPEND, CALL_NO_KW_METHOD_DESCRIPTOR_FAST, CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS, CALL_NO_KW_METHOD_DESCRIPTOR_O, CALL_NO_KW_STR_1, CALL_NO_KW_TUPLE_1, CALL_NO_KW_TYPE_1 }; -family(compare_op) = { - COMPARE_OP, COMPARE_OP_FLOAT_JUMP, - COMPARE_OP_INT_JUMP, COMPARE_OP_STR_JUMP }; family(for_iter) = { FOR_ITER, FOR_ITER_LIST, FOR_ITER_RANGE }; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 17210fa2ac1e1d..8329a1a986376b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2035,11 +2035,12 @@ TARGET(COMPARE_OP) { PREDICTED(COMPARE_OP); + PyObject *right = PEEK(1); + PyObject *left = PEEK(2); + PyObject *res; _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); - PyObject *right = TOP(); - PyObject *left = SECOND(); next_instr--; _Py_Specialize_CompareOp(left, right, next_instr, oparg); DISPATCH_SAME_OPARG(); @@ -2047,16 +2048,13 @@ STAT_INC(COMPARE_OP, deferred); DECREMENT_ADAPTIVE_COUNTER(cache->counter); assert(oparg <= Py_GE); - PyObject *right = POP(); - PyObject *left = TOP(); - PyObject *res = PyObject_RichCompare(left, right, oparg); - SET_TOP(res); + res = PyObject_RichCompare(left, right, oparg); Py_DECREF(left); Py_DECREF(right); - if (res == NULL) { - goto error; - } - JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP); + if (res == NULL) goto pop_2_error; + STACK_SHRINK(1); + POKE(1, res); + next_instr += 2; DISPATCH(); } From 30cb8cd5bd0c455ded907ea19c2aff60622a8fee Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Nov 2022 18:59:29 -0800 Subject: [PATCH 03/26] COMPARE_OP_FLOAT_JUMP --- Python/bytecodes.c | 46 +++++++++++++------------ Python/generated_cases.c.h | 69 +++++++++++++++++++++----------------- 2 files changed, 62 insertions(+), 53 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 54e80d32162915..67ade1a6cfad3b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -83,8 +83,14 @@ static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub static PyObject *container, *start, *stop, *v, *lhs, *rhs; static PyObject *list, *tuple, *dict; static PyObject *exit_func, *lasti, *val; +static PyObject *jump; +// Dummy variables for stack effects +static int when_to_jump_mask; +// Dummy opcode names for 'op' opcodes #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1 #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_2 2 +#define _COMPARE_OP_FLOAT 3 +#define _JUMP_ON_SIGN 4 static PyObject * dummy_func( @@ -2054,12 +2060,12 @@ dummy_func( JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); } - // family(compare_op) = { - // COMPARE_OP, - // COMPARE_OP_FLOAT_JUMP, - // COMPARE_OP_INT_JUMP, - // COMPARE_OP_STR_JUMP, - // }; + family(compare_op) = { + COMPARE_OP, + _COMPARE_OP_FLOAT, + // COMPARE_OP_INT_JUMP, + // COMPARE_OP_STR_JUMP, + }; inst(COMPARE_OP, (unused/1, left, right, unused/1 -- res)) { _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; @@ -2078,36 +2084,32 @@ dummy_func( ERROR_IF(res == NULL, error); } - // stack effect: (__0 -- ) - inst(COMPARE_OP_FLOAT_JUMP) { + // The result is an int disguised as an object pointer. + op(_COMPARE_OP_FLOAT, (unused/1, left, right, when_to_jump_mask/1 -- jump)) { assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) - _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; - int when_to_jump_mask = cache->mask; - PyObject *right = TOP(); - PyObject *left = SECOND(); DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); double dleft = PyFloat_AS_DOUBLE(left); double dright = PyFloat_AS_DOUBLE(right); - int sign = (dleft > dright) - (dleft < dright); + // 1 if <, 2 if ==, 4 if >; this matches when _to_jump_mask + int sign_ish = 2*(dleft > dright) + 2 - (dleft < dright); DEOPT_IF(isnan(dleft), COMPARE_OP); DEOPT_IF(isnan(dright), COMPARE_OP); STAT_INC(COMPARE_OP, hit); - JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP); - NEXTOPARG(); - STACK_SHRINK(2); _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); + jump = (PyObject *)(size_t)(sign_ish & when_to_jump_mask); + } + // The input is an int disguised as an object pointer! + op(_JUMP_ON_SIGN, (jump --)) { assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); - int jump = (1 << (sign + 1)) & when_to_jump_mask; - if (!jump) { - next_instr++; - } - else { - JUMPBY(1 + oparg); + if (jump) { + JUMPBY(oparg); } } + // We're praying that the compiler optimizes the flags manipuations. + super(COMPARE_OP_FLOAT_JUMP) = _COMPARE_OP_FLOAT + _JUMP_ON_SIGN; // stack effect: (__0 -- ) inst(COMPARE_OP_INT_JUMP) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8329a1a986376b..6fbf905fa721e6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2058,37 +2058,6 @@ DISPATCH(); } - TARGET(COMPARE_OP_FLOAT_JUMP) { - assert(cframe.use_tracing == 0); - // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) - _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; - int when_to_jump_mask = cache->mask; - PyObject *right = TOP(); - PyObject *left = SECOND(); - DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); - DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); - double dleft = PyFloat_AS_DOUBLE(left); - double dright = PyFloat_AS_DOUBLE(right); - int sign = (dleft > dright) - (dleft < dright); - DEOPT_IF(isnan(dleft), COMPARE_OP); - DEOPT_IF(isnan(dright), COMPARE_OP); - STAT_INC(COMPARE_OP, hit); - JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP); - NEXTOPARG(); - STACK_SHRINK(2); - _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); - _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); - assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); - int jump = (1 << (sign + 1)) & when_to_jump_mask; - if (!jump) { - next_instr++; - } - else { - JUMPBY(1 + oparg); - } - DISPATCH(); - } - TARGET(COMPARE_OP_INT_JUMP) { assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) @@ -3818,6 +3787,44 @@ DISPATCH(); } + TARGET(COMPARE_OP_FLOAT_JUMP) { + PyObject *_tmp_1 = PEEK(2); + PyObject *_tmp_2 = PEEK(1); + { + PyObject *right = _tmp_2; + PyObject *left = _tmp_1; + PyObject *jump; + uint16_t when_to_jump_mask = read_u16(next_instr + 1); + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) + DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); + double dleft = PyFloat_AS_DOUBLE(left); + double dright = PyFloat_AS_DOUBLE(right); + // 1 if <, 2 if ==, 4 if >; this matches when _to_jump_mask + int sign_ish = 2*(dleft > dright) + 2 - (dleft < dright); + DEOPT_IF(isnan(dleft), COMPARE_OP); + DEOPT_IF(isnan(dright), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); + _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); + jump = (PyObject *)(size_t)(sign_ish & when_to_jump_mask); + _tmp_1 = jump; + } + next_instr += 2; + NEXTOPARG(); + next_instr++; + { + PyObject *jump = _tmp_1; + assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); + if (jump) { + JUMPBY(oparg); + } + } + STACK_SHRINK(2); + DISPATCH(); + } + TARGET(END_FOR) { PyObject *_tmp_1 = PEEK(2); PyObject *_tmp_2 = PEEK(1); From 6af0a5df1a2c7c0d88ef8ae7a616110a9864b140 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 24 Nov 2022 11:12:43 -0700 Subject: [PATCH 04/26] COMPARE_OP_INT_JUMP --- Python/bytecodes.c | 35 +++++++------------ Python/generated_cases.c.h | 71 +++++++++++++++++++++----------------- 2 files changed, 51 insertions(+), 55 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 67ade1a6cfad3b..6d3507854af634 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -87,10 +87,11 @@ static PyObject *jump; // Dummy variables for stack effects static int when_to_jump_mask; // Dummy opcode names for 'op' opcodes -#define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1 -#define _BINARY_OP_INPLACE_ADD_UNICODE_PART_2 2 -#define _COMPARE_OP_FLOAT 3 -#define _JUMP_ON_SIGN 4 +#define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1001 +#define _BINARY_OP_INPLACE_ADD_UNICODE_PART_2 1002 +#define _COMPARE_OP_FLOAT 1003 +#define _COMPARE_OP_INT 1004 +#define _JUMP_ON_SIGN 1005 static PyObject * dummy_func( @@ -2063,7 +2064,7 @@ dummy_func( family(compare_op) = { COMPARE_OP, _COMPARE_OP_FLOAT, - // COMPARE_OP_INT_JUMP, + _COMPARE_OP_INT, // COMPARE_OP_STR_JUMP, }; @@ -2111,14 +2112,10 @@ dummy_func( // We're praying that the compiler optimizes the flags manipuations. super(COMPARE_OP_FLOAT_JUMP) = _COMPARE_OP_FLOAT + _JUMP_ON_SIGN; - // stack effect: (__0 -- ) - inst(COMPARE_OP_INT_JUMP) { + // Similar to COMPARE_OP_FLOAT + op(_COMPARE_OP_INT, (unused/1, left, right, when_to_jump_mask/1 -- jump)) { assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) - _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; - int when_to_jump_mask = cache->mask; - PyObject *right = TOP(); - PyObject *left = SECOND(); DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); DEOPT_IF((size_t)(Py_SIZE(left) + 1) > 2, COMPARE_OP); @@ -2127,21 +2124,13 @@ dummy_func( assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1); Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0]; Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0]; - int sign = (ileft > iright) - (ileft < iright); - JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP); - NEXTOPARG(); - STACK_SHRINK(2); + // 1 if <, 2 if ==, 4 if >; this matches when _to_jump_mask + int sign_ish = 2*(ileft > iright) + 2 - (ileft < iright); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); - int jump = (1 << (sign + 1)) & when_to_jump_mask; - if (!jump) { - next_instr++; - } - else { - JUMPBY(1 + oparg); - } + jump = (PyObject *)(size_t)(sign_ish & when_to_jump_mask); } + super(COMPARE_OP_INT_JUMP) = _COMPARE_OP_INT + _JUMP_ON_SIGN; // stack effect: (__0 -- ) inst(COMPARE_OP_STR_JUMP) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 6fbf905fa721e6..42469abcfe103e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2058,38 +2058,6 @@ DISPATCH(); } - TARGET(COMPARE_OP_INT_JUMP) { - assert(cframe.use_tracing == 0); - // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) - _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; - int when_to_jump_mask = cache->mask; - PyObject *right = TOP(); - PyObject *left = SECOND(); - DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); - DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); - DEOPT_IF((size_t)(Py_SIZE(left) + 1) > 2, COMPARE_OP); - DEOPT_IF((size_t)(Py_SIZE(right) + 1) > 2, COMPARE_OP); - STAT_INC(COMPARE_OP, hit); - assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1); - Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0]; - Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0]; - int sign = (ileft > iright) - (ileft < iright); - JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP); - NEXTOPARG(); - STACK_SHRINK(2); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); - int jump = (1 << (sign + 1)) & when_to_jump_mask; - if (!jump) { - next_instr++; - } - else { - JUMPBY(1 + oparg); - } - DISPATCH(); - } - TARGET(COMPARE_OP_STR_JUMP) { assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) @@ -3825,6 +3793,45 @@ DISPATCH(); } + TARGET(COMPARE_OP_INT_JUMP) { + PyObject *_tmp_1 = PEEK(2); + PyObject *_tmp_2 = PEEK(1); + { + PyObject *right = _tmp_2; + PyObject *left = _tmp_1; + PyObject *jump; + uint16_t when_to_jump_mask = read_u16(next_instr + 1); + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) + DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); + DEOPT_IF((size_t)(Py_SIZE(left) + 1) > 2, COMPARE_OP); + DEOPT_IF((size_t)(Py_SIZE(right) + 1) > 2, COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1); + Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0]; + Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0]; + // 1 if <, 2 if ==, 4 if >; this matches when _to_jump_mask + int sign_ish = 2*(ileft > iright) + 2 - (ileft < iright); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + jump = (PyObject *)(size_t)(sign_ish & when_to_jump_mask); + _tmp_1 = jump; + } + next_instr += 2; + NEXTOPARG(); + next_instr++; + { + PyObject *jump = _tmp_1; + assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); + if (jump) { + JUMPBY(oparg); + } + } + STACK_SHRINK(2); + DISPATCH(); + } + TARGET(END_FOR) { PyObject *_tmp_1 = PEEK(2); PyObject *_tmp_2 = PEEK(1); From 039efbf8406f3259b7af7f9d3dc6f18803bb899b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 24 Nov 2022 11:32:04 -0700 Subject: [PATCH 05/26] COMPARE_OP_STR_JUMP --- Python/bytecodes.c | 28 +++++----------- Python/generated_cases.c.h | 66 +++++++++++++++++++++----------------- 2 files changed, 44 insertions(+), 50 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6d3507854af634..a87d4a563a6676 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -85,13 +85,14 @@ static PyObject *list, *tuple, *dict; static PyObject *exit_func, *lasti, *val; static PyObject *jump; // Dummy variables for stack effects -static int when_to_jump_mask; +static _Py_CODEUNIT when_to_jump_mask, invert; // Dummy opcode names for 'op' opcodes #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1001 #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_2 1002 #define _COMPARE_OP_FLOAT 1003 #define _COMPARE_OP_INT 1004 -#define _JUMP_ON_SIGN 1005 +#define _COMPARE_OP_STR 1005 +#define _JUMP_ON_SIGN 1006 static PyObject * dummy_func( @@ -2065,7 +2066,7 @@ dummy_func( COMPARE_OP, _COMPARE_OP_FLOAT, _COMPARE_OP_INT, - // COMPARE_OP_STR_JUMP, + _COMPARE_OP_STR, }; inst(COMPARE_OP, (unused/1, left, right, unused/1 -- res)) { @@ -2132,35 +2133,22 @@ dummy_func( } super(COMPARE_OP_INT_JUMP) = _COMPARE_OP_INT + _JUMP_ON_SIGN; - // stack effect: (__0 -- ) - inst(COMPARE_OP_STR_JUMP) { + // Similar to COMPARE_OP_FLOAT, but for ==, != only + op(_COMPARE_OP_STR, (unused/1, left, right, invert/1 -- jump)) { assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) - _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; - int invert = cache->mask; - PyObject *right = TOP(); - PyObject *left = SECOND(); DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP); STAT_INC(COMPARE_OP, hit); int res = _PyUnicode_Equal(left, right); assert(oparg == Py_EQ || oparg == Py_NE); - JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP); - NEXTOPARG(); - assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); - STACK_SHRINK(2); _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); assert(res == 0 || res == 1); assert(invert == 0 || invert == 1); - int jump = res ^ invert; - if (!jump) { - next_instr++; - } - else { - JUMPBY(1 + oparg); - } + jump = (PyObject *)(size_t)(res ^ invert); } + super(COMPARE_OP_STR_JUMP) = _COMPARE_OP_STR + _JUMP_ON_SIGN; // stack effect: (__0 -- ) inst(IS_OP) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 42469abcfe103e..9d2b8592d509c3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2058,36 +2058,6 @@ DISPATCH(); } - TARGET(COMPARE_OP_STR_JUMP) { - assert(cframe.use_tracing == 0); - // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) - _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; - int invert = cache->mask; - PyObject *right = TOP(); - PyObject *left = SECOND(); - DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); - DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP); - STAT_INC(COMPARE_OP, hit); - int res = _PyUnicode_Equal(left, right); - assert(oparg == Py_EQ || oparg == Py_NE); - JUMPBY(INLINE_CACHE_ENTRIES_COMPARE_OP); - NEXTOPARG(); - assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); - STACK_SHRINK(2); - _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); - assert(res == 0 || res == 1); - assert(invert == 0 || invert == 1); - int jump = res ^ invert; - if (!jump) { - next_instr++; - } - else { - JUMPBY(1 + oparg); - } - DISPATCH(); - } - TARGET(IS_OP) { PyObject *right = POP(); PyObject *left = TOP(); @@ -3832,6 +3802,42 @@ DISPATCH(); } + TARGET(COMPARE_OP_STR_JUMP) { + PyObject *_tmp_1 = PEEK(2); + PyObject *_tmp_2 = PEEK(1); + { + PyObject *right = _tmp_2; + PyObject *left = _tmp_1; + PyObject *jump; + uint16_t invert = read_u16(next_instr + 1); + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) + DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + int res = _PyUnicode_Equal(left, right); + assert(oparg == Py_EQ || oparg == Py_NE); + _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); + _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); + assert(res == 0 || res == 1); + assert(invert == 0 || invert == 1); + jump = (PyObject *)(size_t)(res ^ invert); + _tmp_1 = jump; + } + next_instr += 2; + NEXTOPARG(); + next_instr++; + { + PyObject *jump = _tmp_1; + assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); + if (jump) { + JUMPBY(oparg); + } + } + STACK_SHRINK(2); + DISPATCH(); + } + TARGET(END_FOR) { PyObject *_tmp_1 = PEEK(2); PyObject *_tmp_2 = PEEK(1); From 2fd8822cb0d14a3461d3176c200d3f9f28323a1a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 24 Nov 2022 21:50:11 -0700 Subject: [PATCH 06/26] Support typed stack effects This was more convoluted than I expected. --- Python/bytecodes.c | 18 ++-- Python/generated_cases.c.h | 24 ++--- Tools/cases_generator/generate_cases.py | 124 ++++++++++++++---------- Tools/cases_generator/parser.py | 47 +++++---- 4 files changed, 122 insertions(+), 91 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a87d4a563a6676..17dccd422b7231 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -83,8 +83,8 @@ static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub static PyObject *container, *start, *stop, *v, *lhs, *rhs; static PyObject *list, *tuple, *dict; static PyObject *exit_func, *lasti, *val; -static PyObject *jump; -// Dummy variables for stack effects +static size_t jump; +// Dummy variables for cache effects static _Py_CODEUNIT when_to_jump_mask, invert; // Dummy opcode names for 'op' opcodes #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1001 @@ -2087,7 +2087,7 @@ dummy_func( } // The result is an int disguised as an object pointer. - op(_COMPARE_OP_FLOAT, (unused/1, left, right, when_to_jump_mask/1 -- jump)) { + op(_COMPARE_OP_FLOAT, (unused/1, left, right, when_to_jump_mask/1 -- jump: size_t)) { assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); @@ -2101,10 +2101,10 @@ dummy_func( STAT_INC(COMPARE_OP, hit); _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); - jump = (PyObject *)(size_t)(sign_ish & when_to_jump_mask); + jump = sign_ish & when_to_jump_mask; } // The input is an int disguised as an object pointer! - op(_JUMP_ON_SIGN, (jump --)) { + op(_JUMP_ON_SIGN, (jump: size_t --)) { assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); if (jump) { JUMPBY(oparg); @@ -2114,7 +2114,7 @@ dummy_func( super(COMPARE_OP_FLOAT_JUMP) = _COMPARE_OP_FLOAT + _JUMP_ON_SIGN; // Similar to COMPARE_OP_FLOAT - op(_COMPARE_OP_INT, (unused/1, left, right, when_to_jump_mask/1 -- jump)) { + op(_COMPARE_OP_INT, (unused/1, left, right, when_to_jump_mask/1 -- jump: size_t)) { assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); @@ -2129,12 +2129,12 @@ dummy_func( int sign_ish = 2*(ileft > iright) + 2 - (ileft < iright); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - jump = (PyObject *)(size_t)(sign_ish & when_to_jump_mask); + jump = sign_ish & when_to_jump_mask; } super(COMPARE_OP_INT_JUMP) = _COMPARE_OP_INT + _JUMP_ON_SIGN; // Similar to COMPARE_OP_FLOAT, but for ==, != only - op(_COMPARE_OP_STR, (unused/1, left, right, invert/1 -- jump)) { + op(_COMPARE_OP_STR, (unused/1, left, right, invert/1 -- jump: size_t)) { assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); @@ -2146,7 +2146,7 @@ dummy_func( _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); assert(res == 0 || res == 1); assert(invert == 0 || invert == 1); - jump = (PyObject *)(size_t)(res ^ invert); + jump = res ^ invert; } super(COMPARE_OP_STR_JUMP) = _COMPARE_OP_STR + _JUMP_ON_SIGN; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 9d2b8592d509c3..1a91e3f656326d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3731,7 +3731,7 @@ { PyObject *right = _tmp_2; PyObject *left = _tmp_1; - PyObject *jump; + size_t jump; uint16_t when_to_jump_mask = read_u16(next_instr + 1); assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) @@ -3746,14 +3746,14 @@ STAT_INC(COMPARE_OP, hit); _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); - jump = (PyObject *)(size_t)(sign_ish & when_to_jump_mask); - _tmp_1 = jump; + jump = sign_ish & when_to_jump_mask; + _tmp_1 = (PyObject *)jump; } next_instr += 2; NEXTOPARG(); next_instr++; { - PyObject *jump = _tmp_1; + size_t jump = (size_t)_tmp_1; assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); if (jump) { JUMPBY(oparg); @@ -3769,7 +3769,7 @@ { PyObject *right = _tmp_2; PyObject *left = _tmp_1; - PyObject *jump; + size_t jump; uint16_t when_to_jump_mask = read_u16(next_instr + 1); assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) @@ -3785,14 +3785,14 @@ int sign_ish = 2*(ileft > iright) + 2 - (ileft < iright); _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - jump = (PyObject *)(size_t)(sign_ish & when_to_jump_mask); - _tmp_1 = jump; + jump = sign_ish & when_to_jump_mask; + _tmp_1 = (PyObject *)jump; } next_instr += 2; NEXTOPARG(); next_instr++; { - PyObject *jump = _tmp_1; + size_t jump = (size_t)_tmp_1; assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); if (jump) { JUMPBY(oparg); @@ -3808,7 +3808,7 @@ { PyObject *right = _tmp_2; PyObject *left = _tmp_1; - PyObject *jump; + size_t jump; uint16_t invert = read_u16(next_instr + 1); assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) @@ -3821,14 +3821,14 @@ _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); assert(res == 0 || res == 1); assert(invert == 0 || invert == 1); - jump = (PyObject *)(size_t)(res ^ invert); - _tmp_1 = jump; + jump = res ^ invert; + _tmp_1 = (PyObject *)jump; } next_instr += 2; NEXTOPARG(); next_instr++; { - PyObject *jump = _tmp_1; + size_t jump = (size_t)_tmp_1; assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); if (jump) { JUMPBY(oparg); diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 2952634a3cda68..72903b1c710806 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -13,6 +13,7 @@ import typing import parser +from parser import StackEffect DEFAULT_INPUT = os.path.relpath( os.path.join(os.path.dirname(__file__), "../../Python/bytecodes.c") @@ -73,6 +74,34 @@ def block(self, head: str): yield self.emit("}") + def stack_adjust(self, diff: int): + if diff > 0: + self.emit(f"STACK_GROW({diff});") + elif diff < 0: + self.emit(f"STACK_SHRINK({-diff});") + + def declare(self, dst: StackEffect, src: StackEffect | None): + if dst.name == UNUSED: + return + type = f"{dst.type} " if dst.type else "PyObject *" + init = "" + if src: + cast = self.cast(dst, src) + init = f" = {cast}{src.name}" + self.emit(f"{type}{dst.name}{init};") + + def assign(self, dst: StackEffect, src: StackEffect): + if src.name == UNUSED: + return + cast = self.cast(dst, src) + if m := re.match(r"^PEEK\((\d+)\)$", dst.name): + self.emit(f"POKE({m.group(1)}, {cast}{src.name});") + else: + self.emit(f"{dst.name} = {cast}{src.name};") + + def cast(self, dst: StackEffect, src: StackEffect) -> str: + return f"({dst.type or 'PyObject *'})" if src.type != dst.type else "" + @dataclasses.dataclass class Instruction: @@ -88,8 +117,8 @@ class Instruction: always_exits: bool cache_offset: int cache_effects: list[parser.CacheEffect] - input_effects: list[parser.StackEffect] - output_effects: list[parser.StackEffect] + input_effects: list[StackEffect] + output_effects: list[StackEffect] # Set later family: parser.Family | None = None @@ -106,7 +135,7 @@ def __init__(self, inst: parser.InstDef): ] self.cache_offset = sum(c.size for c in self.cache_effects) self.input_effects = [ - effect for effect in inst.inputs if isinstance(effect, parser.StackEffect) + effect for effect in inst.inputs if isinstance(effect, StackEffect) ] self.output_effects = inst.outputs # For consistency/completeness @@ -122,16 +151,15 @@ def write(self, out: Formatter) -> None: ) # Write input stack effect variable declarations and initializations - for i, seffect in enumerate(reversed(self.input_effects), 1): - if seffect.name != UNUSED: - out.emit(f"PyObject *{seffect.name} = PEEK({i});") + for i, ieffect in enumerate(reversed(self.input_effects), 1): + src = StackEffect(f"PEEK({i})", "") + out.declare(ieffect, src) # Write output stack effect variable declarations - input_names = {seffect.name for seffect in self.input_effects} - input_names.add(UNUSED) - for seffect in self.output_effects: - if seffect.name not in input_names: - out.emit(f"PyObject *{seffect.name};") + input_names = {ieffect.name for ieffect in self.input_effects} + for oeffect in self.output_effects: + if oeffect.name not in input_names: + out.declare(oeffect, None) self.write_body(out, 0) @@ -141,19 +169,17 @@ def write(self, out: Formatter) -> None: # Write net stack growth/shrinkage diff = len(self.output_effects) - len(self.input_effects) - if diff > 0: - out.emit(f"STACK_GROW({diff});") - elif diff < 0: - out.emit(f"STACK_SHRINK({-diff});") + out.stack_adjust(diff) # Write output stack effect assignments - unmoved_names = {UNUSED} + unmoved_names = set() for ieffect, oeffect in zip(self.input_effects, self.output_effects): if ieffect.name == oeffect.name: unmoved_names.add(ieffect.name) - for i, seffect in enumerate(reversed(self.output_effects)): - if seffect.name not in unmoved_names: - out.emit(f"POKE({i+1}, {seffect.name});") + for i, oeffect in enumerate(reversed(self.output_effects), 1): + if oeffect.name not in unmoved_names: + dst = StackEffect(f"PEEK({i})", "") + out.assign(dst, oeffect) # Write cache effect if self.cache_offset: @@ -223,23 +249,26 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None InstructionOrCacheEffect = Instruction | parser.CacheEffect +StackEffectMapping = list[tuple[StackEffect, StackEffect]] @dataclasses.dataclass class Component: instr: Instruction - input_mapping: dict[str, parser.StackEffect] - output_mapping: dict[str, parser.StackEffect] + input_mapping: StackEffectMapping + output_mapping: StackEffectMapping def write_body(self, out: Formatter, cache_adjust: int) -> None: with out.block(""): - for var, ieffect in self.input_mapping.items(): - out.emit(f"PyObject *{ieffect.name} = {var};") - for oeffect in self.output_mapping.values(): - out.emit(f"PyObject *{oeffect.name};") + for var, ieffect in self.input_mapping: + out.declare(ieffect, var) + for _, oeffect in self.output_mapping: + out.declare(oeffect, None) + self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust) - for var, oeffect in self.output_mapping.items(): - out.emit(f"{var} = {oeffect.name};") + + for var, oeffect in self.output_mapping: + out.assign(var, oeffect) # TODO: Use a common base class for {Super,Macro}Instruction @@ -250,7 +279,7 @@ class SuperOrMacroInstruction: """Common fields for super- and macro instructions.""" name: str - stack: list[str] + stack: list[StackEffect] initial_sp: int final_sp: int @@ -445,15 +474,13 @@ def analyze_super(self, super: parser.Super) -> SuperInstruction: case parser.CacheEffect() as ceffect: parts.append(ceffect) case Instruction() as instr: - input_mapping = {} + input_mapping: StackEffectMapping = [] for ieffect in reversed(instr.input_effects): sp -= 1 - if ieffect.name != UNUSED: - input_mapping[stack[sp]] = ieffect - output_mapping = {} + input_mapping.append((stack[sp], ieffect)) + output_mapping: StackEffectMapping = [] for oeffect in instr.output_effects: - if oeffect.name != UNUSED: - output_mapping[stack[sp]] = oeffect + output_mapping.append((stack[sp], oeffect)) sp += 1 parts.append(Component(instr, input_mapping, output_mapping)) case _: @@ -471,15 +498,13 @@ def analyze_macro(self, macro: parser.Macro) -> MacroInstruction: case parser.CacheEffect() as ceffect: parts.append(ceffect) case Instruction() as instr: - input_mapping = {} + input_mapping: StackEffectMapping = [] for ieffect in reversed(instr.input_effects): sp -= 1 - if ieffect.name != UNUSED: - input_mapping[stack[sp]] = ieffect - output_mapping = {} + input_mapping.append((stack[sp], ieffect)) + output_mapping: StackEffectMapping = [] for oeffect in instr.output_effects: - if oeffect.name != UNUSED: - output_mapping[stack[sp]] = oeffect + output_mapping.append((stack[sp], oeffect)) sp += 1 parts.append(Component(instr, input_mapping, output_mapping)) case _: @@ -514,7 +539,7 @@ def check_macro_components( def stack_analysis( self, components: typing.Iterable[InstructionOrCacheEffect] - ) -> tuple[list[str], int]: + ) -> tuple[list[StackEffect], int]: """Analyze a super-instruction or macro. Print an error if there's a cache effect (which we don't support yet). @@ -536,7 +561,8 @@ def stack_analysis( # At this point, 'current' is the net stack effect, # and 'lowest' and 'highest' are the extremes. # Note that 'lowest' may be negative. - stack = [f"_tmp_{i+1}" for i in range(highest - lowest)] + # TODO: Reverse the numbering. + stack = [StackEffect(f"_tmp_{i+1}", "") for i in range(highest - lowest)] return stack, -lowest def write_instructions(self) -> None: @@ -616,19 +642,17 @@ def wrap_super_or_macro(self, up: SuperOrMacroInstruction): self.out.emit("") with self.out.block(f"TARGET({up.name})"): for i, var in enumerate(up.stack): + src = None if i < up.initial_sp: - self.out.emit(f"PyObject *{var} = PEEK({up.initial_sp - i});") - else: - self.out.emit(f"PyObject *{var};") + src = StackEffect(f"PEEK({up.initial_sp - i})", "") + self.out.declare(var, src) yield - if up.final_sp > up.initial_sp: - self.out.emit(f"STACK_GROW({up.final_sp - up.initial_sp});") - elif up.final_sp < up.initial_sp: - self.out.emit(f"STACK_SHRINK({up.initial_sp - up.final_sp});") + self.out.stack_adjust(up.final_sp - up.initial_sp) for i, var in enumerate(reversed(up.stack[: up.final_sp]), 1): - self.out.emit(f"POKE({i}, {var});") + dst = StackEffect(f"PEEK({i})", "") + self.out.assign(dst, var) self.out.emit(f"DISPATCH();") diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 02a7834d221596..461ea359dadc3b 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -62,6 +62,7 @@ class Block(Node): @dataclass class StackEffect(Node): name: str + type: str = "" # TODO: type, condition @@ -147,7 +148,7 @@ def inst_header(self) -> InstHeader | None: if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)): name = tkn.text if self.expect(lx.COMMA): - inp, outp = self.stack_effect() + inp, outp = self.io_effect() if self.expect(lx.RPAREN): if (tkn := self.peek()) and tkn.kind == lx.LBRACE: return InstHeader(kind, name, inp, outp) @@ -156,7 +157,7 @@ def inst_header(self) -> InstHeader | None: return InstHeader(kind, name, [], []) return None - def stack_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: + def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: # '(' [inputs] '--' [outputs] ')' if self.expect(lx.LPAREN): inputs = self.inputs() or [] @@ -181,23 +182,7 @@ def inputs(self) -> list[InputEffect] | None: @contextual def input(self) -> InputEffect | None: - # IDENTIFIER '/' INTEGER (CacheEffect) - # IDENTIFIER (StackEffect) - if tkn := self.expect(lx.IDENTIFIER): - if self.expect(lx.DIVIDE): - if num := self.expect(lx.NUMBER): - try: - size = int(num.text) - except ValueError: - raise self.make_syntax_error( - f"Expected integer, got {num.text!r}" - ) - else: - return CacheEffect(tkn.text, size) - raise self.make_syntax_error("Expected integer") - else: - # TODO: Arrays, conditions - return StackEffect(tkn.text) + return self.cache_effect() or self.stack_effect() def outputs(self) -> list[OutputEffect] | None: # output (, output)* @@ -214,8 +199,30 @@ def outputs(self) -> list[OutputEffect] | None: @contextual def output(self) -> OutputEffect | None: + return self.stack_effect() + + @contextual + def cache_effect(self) -> CacheEffect | None: + # IDENTIFIER '/' NUMBER + if tkn := self.expect(lx.IDENTIFIER): + if self.expect(lx.DIVIDE): + num = self.require(lx.NUMBER).text + try: + size = int(num) + except ValueError: + raise self.make_syntax_error(f"Expected integer, got {num!r}") + else: + return CacheEffect(tkn.text, size) + + @contextual + def stack_effect(self) -> StackEffect | None: + # IDENTIFIER [':' IDENTIFIER] + # TODO: Arrays, conditions if tkn := self.expect(lx.IDENTIFIER): - return StackEffect(tkn.text) + type = "" + if self.expect(lx.COLON): + type = self.require(lx.IDENTIFIER).text + return StackEffect(tkn.text, type) @contextual def super_def(self) -> Super | None: From 14bbd5065c964870e63a5acc845ec7b2733e148d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 24 Nov 2022 22:13:03 -0700 Subject: [PATCH 07/26] Refactor common code of analyze_{super,macro} Also removed some dead code from the former (somehow the case for CacheEffect had crept back in). --- Tools/cases_generator/generate_cases.py | 46 ++++++++++--------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 72903b1c710806..0382e22174086c 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -271,9 +271,6 @@ def write_body(self, out: Formatter, cache_adjust: int) -> None: out.assign(var, oeffect) -# TODO: Use a common base class for {Super,Macro}Instruction - - @dataclasses.dataclass class SuperOrMacroInstruction: """Common fields for super- and macro instructions.""" @@ -469,22 +466,9 @@ def analyze_super(self, super: parser.Super) -> SuperInstruction: stack, initial_sp = self.stack_analysis(components) sp = initial_sp parts: list[Component] = [] - for component in components: - match component: - case parser.CacheEffect() as ceffect: - parts.append(ceffect) - case Instruction() as instr: - input_mapping: StackEffectMapping = [] - for ieffect in reversed(instr.input_effects): - sp -= 1 - input_mapping.append((stack[sp], ieffect)) - output_mapping: StackEffectMapping = [] - for oeffect in instr.output_effects: - output_mapping.append((stack[sp], oeffect)) - sp += 1 - parts.append(Component(instr, input_mapping, output_mapping)) - case _: - typing.assert_never(component) + for instr in components: + part, sp = self.analyze_instruction(instr, stack, sp) + parts.append(part) final_sp = sp return SuperInstruction(super.name, stack, initial_sp, final_sp, super, parts) @@ -498,20 +482,26 @@ def analyze_macro(self, macro: parser.Macro) -> MacroInstruction: case parser.CacheEffect() as ceffect: parts.append(ceffect) case Instruction() as instr: - input_mapping: StackEffectMapping = [] - for ieffect in reversed(instr.input_effects): - sp -= 1 - input_mapping.append((stack[sp], ieffect)) - output_mapping: StackEffectMapping = [] - for oeffect in instr.output_effects: - output_mapping.append((stack[sp], oeffect)) - sp += 1 - parts.append(Component(instr, input_mapping, output_mapping)) + part, sp = self.analyze_instruction(instr, stack, sp) + parts.append(part) case _: typing.assert_never(component) final_sp = sp return MacroInstruction(macro.name, stack, initial_sp, final_sp, macro, parts) + def analyze_instruction( + self, instr: Instruction, stack: list[StackEffect], sp: int + ) -> tuple[Component, int]: + input_mapping: StackEffectMapping = [] + for ieffect in reversed(instr.input_effects): + sp -= 1 + input_mapping.append((stack[sp], ieffect)) + output_mapping: StackEffectMapping = [] + for oeffect in instr.output_effects: + output_mapping.append((stack[sp], oeffect)) + sp += 1 + return Component(instr, input_mapping, output_mapping), sp + def check_super_components(self, super: parser.Super) -> list[Instruction]: components: list[Instruction] = [] for op in super.ops: From 00a2495a2db8edb514c1ef6165a74dc1cc88325e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 24 Nov 2022 22:27:54 -0700 Subject: [PATCH 08/26] Reverse temporary variable numbering This makes the PEEK/POKE sequences less jarring. --- Python/generated_cases.c.h | 90 ++++++++++++------------- Tools/cases_generator/generate_cases.py | 7 +- 2 files changed, 51 insertions(+), 46 deletions(-) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1a91e3f656326d..a2b8b0bd83322c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3575,14 +3575,14 @@ } TARGET(LOAD_FAST__LOAD_FAST) { - PyObject *_tmp_1; PyObject *_tmp_2; + PyObject *_tmp_1; { PyObject *value; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - _tmp_1 = value; + _tmp_2 = value; } NEXTOPARG(); next_instr++; @@ -3591,23 +3591,23 @@ value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - _tmp_2 = value; + _tmp_1 = value; } STACK_GROW(2); - POKE(1, _tmp_2); - POKE(2, _tmp_1); + POKE(1, _tmp_1); + POKE(2, _tmp_2); DISPATCH(); } TARGET(LOAD_FAST__LOAD_CONST) { - PyObject *_tmp_1; PyObject *_tmp_2; + PyObject *_tmp_1; { PyObject *value; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - _tmp_1 = value; + _tmp_2 = value; } NEXTOPARG(); next_instr++; @@ -3615,11 +3615,11 @@ PyObject *value; value = GETITEM(consts, oparg); Py_INCREF(value); - _tmp_2 = value; + _tmp_1 = value; } STACK_GROW(2); - POKE(1, _tmp_2); - POKE(2, _tmp_1); + POKE(1, _tmp_1); + POKE(2, _tmp_2); DISPATCH(); } @@ -3643,16 +3643,16 @@ } TARGET(STORE_FAST__STORE_FAST) { - PyObject *_tmp_1 = PEEK(2); - PyObject *_tmp_2 = PEEK(1); + PyObject *_tmp_2 = PEEK(2); + PyObject *_tmp_1 = PEEK(1); { - PyObject *value = _tmp_2; + PyObject *value = _tmp_1; SETLOCAL(oparg, value); } NEXTOPARG(); next_instr++; { - PyObject *value = _tmp_1; + PyObject *value = _tmp_2; SETLOCAL(oparg, value); } STACK_SHRINK(2); @@ -3660,13 +3660,13 @@ } TARGET(LOAD_CONST__LOAD_FAST) { - PyObject *_tmp_1; PyObject *_tmp_2; + PyObject *_tmp_1; { PyObject *value; value = GETITEM(consts, oparg); Py_INCREF(value); - _tmp_1 = value; + _tmp_2 = value; } NEXTOPARG(); next_instr++; @@ -3675,20 +3675,20 @@ value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); - _tmp_2 = value; + _tmp_1 = value; } STACK_GROW(2); - POKE(1, _tmp_2); - POKE(2, _tmp_1); + POKE(1, _tmp_1); + POKE(2, _tmp_2); DISPATCH(); } TARGET(BINARY_OP_INPLACE_ADD_UNICODE) { - PyObject *_tmp_1 = PEEK(2); - PyObject *_tmp_2 = PEEK(1); + PyObject *_tmp_2 = PEEK(2); + PyObject *_tmp_1 = PEEK(1); { - PyObject *right = _tmp_2; - PyObject *left = _tmp_1; + PyObject *right = _tmp_1; + PyObject *left = _tmp_2; assert(cframe.use_tracing == 0); DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); @@ -3726,11 +3726,11 @@ } TARGET(COMPARE_OP_FLOAT_JUMP) { - PyObject *_tmp_1 = PEEK(2); - PyObject *_tmp_2 = PEEK(1); + PyObject *_tmp_2 = PEEK(2); + PyObject *_tmp_1 = PEEK(1); { - PyObject *right = _tmp_2; - PyObject *left = _tmp_1; + PyObject *right = _tmp_1; + PyObject *left = _tmp_2; size_t jump; uint16_t when_to_jump_mask = read_u16(next_instr + 1); assert(cframe.use_tracing == 0); @@ -3747,13 +3747,13 @@ _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); jump = sign_ish & when_to_jump_mask; - _tmp_1 = (PyObject *)jump; + _tmp_2 = (PyObject *)jump; } next_instr += 2; NEXTOPARG(); next_instr++; { - size_t jump = (size_t)_tmp_1; + size_t jump = (size_t)_tmp_2; assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); if (jump) { JUMPBY(oparg); @@ -3764,11 +3764,11 @@ } TARGET(COMPARE_OP_INT_JUMP) { - PyObject *_tmp_1 = PEEK(2); - PyObject *_tmp_2 = PEEK(1); + PyObject *_tmp_2 = PEEK(2); + PyObject *_tmp_1 = PEEK(1); { - PyObject *right = _tmp_2; - PyObject *left = _tmp_1; + PyObject *right = _tmp_1; + PyObject *left = _tmp_2; size_t jump; uint16_t when_to_jump_mask = read_u16(next_instr + 1); assert(cframe.use_tracing == 0); @@ -3786,13 +3786,13 @@ _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); jump = sign_ish & when_to_jump_mask; - _tmp_1 = (PyObject *)jump; + _tmp_2 = (PyObject *)jump; } next_instr += 2; NEXTOPARG(); next_instr++; { - size_t jump = (size_t)_tmp_1; + size_t jump = (size_t)_tmp_2; assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); if (jump) { JUMPBY(oparg); @@ -3803,11 +3803,11 @@ } TARGET(COMPARE_OP_STR_JUMP) { - PyObject *_tmp_1 = PEEK(2); - PyObject *_tmp_2 = PEEK(1); + PyObject *_tmp_2 = PEEK(2); + PyObject *_tmp_1 = PEEK(1); { - PyObject *right = _tmp_2; - PyObject *left = _tmp_1; + PyObject *right = _tmp_1; + PyObject *left = _tmp_2; size_t jump; uint16_t invert = read_u16(next_instr + 1); assert(cframe.use_tracing == 0); @@ -3822,13 +3822,13 @@ assert(res == 0 || res == 1); assert(invert == 0 || invert == 1); jump = res ^ invert; - _tmp_1 = (PyObject *)jump; + _tmp_2 = (PyObject *)jump; } next_instr += 2; NEXTOPARG(); next_instr++; { - size_t jump = (size_t)_tmp_1; + size_t jump = (size_t)_tmp_2; assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); if (jump) { JUMPBY(oparg); @@ -3839,14 +3839,14 @@ } TARGET(END_FOR) { - PyObject *_tmp_1 = PEEK(2); - PyObject *_tmp_2 = PEEK(1); + PyObject *_tmp_2 = PEEK(2); + PyObject *_tmp_1 = PEEK(1); { - PyObject *value = _tmp_2; + PyObject *value = _tmp_1; Py_DECREF(value); } { - PyObject *value = _tmp_1; + PyObject *value = _tmp_2; Py_DECREF(value); } STACK_SHRINK(2); diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 0382e22174086c..c571988d849873 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -552,7 +552,7 @@ def stack_analysis( # and 'lowest' and 'highest' are the extremes. # Note that 'lowest' may be negative. # TODO: Reverse the numbering. - stack = [StackEffect(f"_tmp_{i+1}", "") for i in range(highest - lowest)] + stack = [StackEffect(f"_tmp_{i+1}", "") for i in reversed(range(highest - lowest))] return stack, -lowest def write_instructions(self) -> None: @@ -629,6 +629,11 @@ def write_macro(self, mac: MacroInstruction) -> None: @contextlib.contextmanager def wrap_super_or_macro(self, up: SuperOrMacroInstruction): """Shared boilerplate for super- and macro instructions.""" + # TODO: Somewhere (where?) make it so that if one instruction + # has an output that is input to another, and the variable names + # and types match and don't conflict with other instructions, + # that variable is declared with the right name and type in the + # outer block, rather than trusting the compiler to optimize it. self.out.emit("") with self.out.block(f"TARGET({up.name})"): for i, var in enumerate(up.stack): From f01dff55af833e668729357ccca58c1632db1e11 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 15:39:30 -0700 Subject: [PATCH 09/26] STORE_ATTR --- Python/bytecodes.c | 23 +++++++---------------- Python/generated_cases.c.h | 21 +++++++++------------ 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 17dccd422b7231..2871b5516f09ff 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -81,11 +81,11 @@ do { \ // Dummy variables for stack effects. static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; static PyObject *container, *start, *stop, *v, *lhs, *rhs; -static PyObject *list, *tuple, *dict; +static PyObject *list, *tuple, *dict, *owner; static PyObject *exit_func, *lasti, *val; static size_t jump; // Dummy variables for cache effects -static _Py_CODEUNIT when_to_jump_mask, invert; +static _Py_CODEUNIT when_to_jump_mask, invert, counter; // Dummy opcode names for 'op' opcodes #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1001 #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_2 1002 @@ -1126,31 +1126,22 @@ dummy_func( Py_DECREF(seq); } - // stack effect: (__0, __1 -- ) - inst(STORE_ATTR) { - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + inst(STORE_ATTR, (counter/1, v, owner, unused/3 --)) { + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { assert(cframe.use_tracing == 0); - PyObject *owner = TOP(); PyObject *name = GETITEM(names, oparg); next_instr--; _Py_Specialize_StoreAttr(owner, next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; DECREMENT_ADAPTIVE_COUNTER(cache->counter); PyObject *name = GETITEM(names, oparg); - PyObject *owner = TOP(); - PyObject *v = SECOND(); - int err; - STACK_SHRINK(2); - err = PyObject_SetAttr(owner, name, v); + int err = PyObject_SetAttr(owner, name, v); Py_DECREF(v); Py_DECREF(owner); - if (err != 0) { - goto error; - } - JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); + ERROR_IF(err != 0, error); } // stack effect: (__0 -- ) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a2b8b0bd83322c..31e95769cf43b9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1094,29 +1094,26 @@ TARGET(STORE_ATTR) { PREDICTED(STORE_ATTR); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + PyObject *owner = PEEK(1); + PyObject *v = PEEK(2); + uint16_t counter = read_u16(next_instr + 0); + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { assert(cframe.use_tracing == 0); - PyObject *owner = TOP(); PyObject *name = GETITEM(names, oparg); next_instr--; _Py_Specialize_StoreAttr(owner, next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; DECREMENT_ADAPTIVE_COUNTER(cache->counter); PyObject *name = GETITEM(names, oparg); - PyObject *owner = TOP(); - PyObject *v = SECOND(); - int err; - STACK_SHRINK(2); - err = PyObject_SetAttr(owner, name, v); + int err = PyObject_SetAttr(owner, name, v); Py_DECREF(v); Py_DECREF(owner); - if (err != 0) { - goto error; - } - JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); + if (err != 0) goto pop_2_error; + STACK_SHRINK(2); + next_instr += 4; DISPATCH(); } From 71ee0894f0f666394baeac93472c35a64ba2aadf Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 15:43:51 -0700 Subject: [PATCH 10/26] DELETE_ATTR --- Python/bytecodes.c | 10 +++------- Python/generated_cases.c.h | 9 ++++----- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2871b5516f09ff..1e6db50abc7228 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1144,15 +1144,11 @@ dummy_func( ERROR_IF(err != 0, error); } - // stack effect: (__0 -- ) - inst(DELETE_ATTR) { + inst(DELETE_ATTR, (owner --)) { PyObject *name = GETITEM(names, oparg); - PyObject *owner = POP(); - int err; - err = PyObject_SetAttr(owner, name, (PyObject *)NULL); + int err = PyObject_SetAttr(owner, name, (PyObject *)NULL); Py_DECREF(owner); - if (err != 0) - goto error; + ERROR_IF(err != 0, error); } // stack effect: (__0 -- ) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 31e95769cf43b9..2955dc465d06a6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1118,13 +1118,12 @@ } TARGET(DELETE_ATTR) { + PyObject *owner = PEEK(1); PyObject *name = GETITEM(names, oparg); - PyObject *owner = POP(); - int err; - err = PyObject_SetAttr(owner, name, (PyObject *)NULL); + int err = PyObject_SetAttr(owner, name, (PyObject *)NULL); Py_DECREF(owner); - if (err != 0) - goto error; + if (err != 0) goto pop_1_error; + STACK_SHRINK(1); DISPATCH(); } From 966da1a1bec28bdad7fc80290ec5cacb6201bdbc Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 15:49:43 -0700 Subject: [PATCH 11/26] STORE_GLOBAL --- Python/bytecodes.c | 10 +++------- Python/generated_cases.c.h | 9 ++++----- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1e6db50abc7228..c029d87e8113bc 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1151,15 +1151,11 @@ dummy_func( ERROR_IF(err != 0, error); } - // stack effect: (__0 -- ) - inst(STORE_GLOBAL) { + inst(STORE_GLOBAL, (v --)) { PyObject *name = GETITEM(names, oparg); - PyObject *v = POP(); - int err; - err = PyDict_SetItem(GLOBALS(), name, v); + int err = PyDict_SetItem(GLOBALS(), name, v); Py_DECREF(v); - if (err != 0) - goto error; + ERROR_IF(err != 0, error); } inst(DELETE_GLOBAL, (--)) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2955dc465d06a6..3cd82ea4434b46 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1128,13 +1128,12 @@ } TARGET(STORE_GLOBAL) { + PyObject *v = PEEK(1); PyObject *name = GETITEM(names, oparg); - PyObject *v = POP(); - int err; - err = PyDict_SetItem(GLOBALS(), name, v); + int err = PyDict_SetItem(GLOBALS(), name, v); Py_DECREF(v); - if (err != 0) - goto error; + if (err != 0) goto pop_1_error; + STACK_SHRINK(1); DISPATCH(); } From 7c945919a7438cbd84cd16343061a35eb6a0e7e8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 15:59:33 -0700 Subject: [PATCH 12/26] STORE_ATTR_INSTANCE_VALUE --- Python/bytecodes.c | 13 +++---------- Python/generated_cases.c.h | 13 ++++++------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c029d87e8113bc..bd1e71ebfb7401 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -85,7 +85,8 @@ static PyObject *list, *tuple, *dict, *owner; static PyObject *exit_func, *lasti, *val; static size_t jump; // Dummy variables for cache effects -static _Py_CODEUNIT when_to_jump_mask, invert, counter; +static _Py_CODEUNIT when_to_jump_mask, invert, counter, index; +static uint32_t type_version; // Dummy opcode names for 'op' opcodes #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1001 #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_2 1002 @@ -1944,22 +1945,15 @@ dummy_func( DISPATCH_INLINED(new_frame); } - // stack effect: (__0, __1 -- ) - inst(STORE_ATTR_INSTANCE_VALUE) { + inst(STORE_ATTR_INSTANCE_VALUE, (unused/1, type_version/2, index/1, value, owner --)) { assert(cframe.use_tracing == 0); - PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - uint32_t type_version = read_u32(cache->version); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(dorv), STORE_ATTR); STAT_INC(STORE_ATTR, hit); - Py_ssize_t index = cache->index; - STACK_SHRINK(1); - PyObject *value = POP(); PyDictValues *values = _PyDictOrValues_GetValues(dorv); PyObject *old_value = values->values[index]; values->values[index] = value; @@ -1970,7 +1964,6 @@ dummy_func( Py_DECREF(old_value); } Py_DECREF(owner); - JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); } // stack effect: (__0, __1 -- ) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3cd82ea4434b46..639df9df849b2e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1928,20 +1928,18 @@ } TARGET(STORE_ATTR_INSTANCE_VALUE) { + PyObject *owner = PEEK(1); + PyObject *value = PEEK(2); + uint32_t type_version = read_u32(next_instr + 1); + uint16_t index = read_u16(next_instr + 3); assert(cframe.use_tracing == 0); - PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - uint32_t type_version = read_u32(cache->version); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(dorv), STORE_ATTR); STAT_INC(STORE_ATTR, hit); - Py_ssize_t index = cache->index; - STACK_SHRINK(1); - PyObject *value = POP(); PyDictValues *values = _PyDictOrValues_GetValues(dorv); PyObject *old_value = values->values[index]; values->values[index] = value; @@ -1952,7 +1950,8 @@ Py_DECREF(old_value); } Py_DECREF(owner); - JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); + STACK_SHRINK(2); + next_instr += 4; DISPATCH(); } From 2c7604677fdb015f590d739d9423282cf5675d31 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 16:18:28 -0700 Subject: [PATCH 13/26] STORE_ATTR_WITH_HINT --- Python/bytecodes.c | 16 +++------------- Python/generated_cases.c.h | 17 +++++++---------- 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index bd1e71ebfb7401..9adc0ded8f92cb 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -85,7 +85,7 @@ static PyObject *list, *tuple, *dict, *owner; static PyObject *exit_func, *lasti, *val; static size_t jump; // Dummy variables for cache effects -static _Py_CODEUNIT when_to_jump_mask, invert, counter, index; +static _Py_CODEUNIT when_to_jump_mask, invert, counter, index, hint; static uint32_t type_version; // Dummy opcode names for 'op' opcodes #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1001 @@ -1966,13 +1966,9 @@ dummy_func( Py_DECREF(owner); } - // stack effect: (__0, __1 -- ) - inst(STORE_ATTR_WITH_HINT) { + inst(STORE_ATTR_WITH_HINT, (unused/1, type_version/2, hint/1, value, owner --)) { assert(cframe.use_tracing == 0); - PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - uint32_t type_version = read_u32(cache->version); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); @@ -1982,17 +1978,14 @@ dummy_func( DEOPT_IF(dict == NULL, STORE_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(names, oparg); - uint16_t hint = cache->index; DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR); - PyObject *value, *old_value; + PyObject *old_value; uint64_t new_version; if (DK_IS_UNICODE(dict->ma_keys)) { PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name, STORE_ATTR); old_value = ep->me_value; DEOPT_IF(old_value == NULL, STORE_ATTR); - STACK_SHRINK(1); - value = POP(); new_version = _PyDict_NotifyEvent(PyDict_EVENT_MODIFIED, dict, name, value); ep->me_value = value; } @@ -2001,8 +1994,6 @@ dummy_func( DEOPT_IF(ep->me_key != name, STORE_ATTR); old_value = ep->me_value; DEOPT_IF(old_value == NULL, STORE_ATTR); - STACK_SHRINK(1); - value = POP(); new_version = _PyDict_NotifyEvent(PyDict_EVENT_MODIFIED, dict, name, value); ep->me_value = value; } @@ -2015,7 +2006,6 @@ dummy_func( /* PEP 509 */ dict->ma_version_tag = new_version; Py_DECREF(owner); - JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); } // stack effect: (__0, __1 -- ) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 639df9df849b2e..97ffcec1b6f7c7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1956,11 +1956,12 @@ } TARGET(STORE_ATTR_WITH_HINT) { + PyObject *owner = PEEK(1); + PyObject *value = PEEK(2); + uint32_t type_version = read_u32(next_instr + 1); + uint16_t hint = read_u16(next_instr + 3); assert(cframe.use_tracing == 0); - PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - uint32_t type_version = read_u32(cache->version); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); @@ -1970,17 +1971,14 @@ DEOPT_IF(dict == NULL, STORE_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(names, oparg); - uint16_t hint = cache->index; DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR); - PyObject *value, *old_value; + PyObject *old_value; uint64_t new_version; if (DK_IS_UNICODE(dict->ma_keys)) { PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name, STORE_ATTR); old_value = ep->me_value; DEOPT_IF(old_value == NULL, STORE_ATTR); - STACK_SHRINK(1); - value = POP(); new_version = _PyDict_NotifyEvent(PyDict_EVENT_MODIFIED, dict, name, value); ep->me_value = value; } @@ -1989,8 +1987,6 @@ DEOPT_IF(ep->me_key != name, STORE_ATTR); old_value = ep->me_value; DEOPT_IF(old_value == NULL, STORE_ATTR); - STACK_SHRINK(1); - value = POP(); new_version = _PyDict_NotifyEvent(PyDict_EVENT_MODIFIED, dict, name, value); ep->me_value = value; } @@ -2003,7 +1999,8 @@ /* PEP 509 */ dict->ma_version_tag = new_version; Py_DECREF(owner); - JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); + STACK_SHRINK(2); + next_instr += 4; DISPATCH(); } From d0f29f85960ff056e9791db9b98aaeca9c891020 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 16:26:19 -0700 Subject: [PATCH 14/26] STORE_ATTR_SLOT, and complete the store_attr family --- Python/bytecodes.c | 21 +++++++++------------ Python/generated_cases.c.h | 14 +++++++------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9adc0ded8f92cb..6a7d77b70b599d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1127,6 +1127,13 @@ dummy_func( Py_DECREF(seq); } + family(store_attr) = { + STORE_ATTR, + STORE_ATTR_INSTANCE_VALUE, + STORE_ATTR_SLOT, + STORE_ATTR_WITH_HINT, + }; + inst(STORE_ATTR, (counter/1, v, owner, unused/3 --)) { if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { assert(cframe.use_tracing == 0); @@ -2008,24 +2015,17 @@ dummy_func( Py_DECREF(owner); } - // stack effect: (__0, __1 -- ) - inst(STORE_ATTR_SLOT) { + inst(STORE_ATTR_SLOT, (unused/1, type_version/2, index/1, value, owner --)) { assert(cframe.use_tracing == 0); - PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - uint32_t type_version = read_u32(cache->version); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); - char *addr = (char *)owner + cache->index; + char *addr = (char *)owner + index; STAT_INC(STORE_ATTR, hit); - STACK_SHRINK(1); - PyObject *value = POP(); PyObject *old_value = *(PyObject **)addr; *(PyObject **)addr = value; Py_XDECREF(old_value); Py_DECREF(owner); - JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); } family(compare_op) = { @@ -3655,9 +3655,6 @@ family(load_fast) = { LOAD_FAST, LOAD_FAST__LOAD_CONST, LOAD_FAST__LOAD_FAST }; family(load_global) = { LOAD_GLOBAL, LOAD_GLOBAL_BUILTIN, LOAD_GLOBAL_MODULE }; -family(store_attr) = { - STORE_ATTR, STORE_ATTR_INSTANCE_VALUE, - STORE_ATTR_SLOT, STORE_ATTR_WITH_HINT }; family(store_fast) = { STORE_FAST, STORE_FAST__LOAD_FAST, STORE_FAST__STORE_FAST }; family(store_subscr) = { STORE_SUBSCR, STORE_SUBSCR_DICT, diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 97ffcec1b6f7c7..bc8beeee389aaf 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2005,22 +2005,22 @@ } TARGET(STORE_ATTR_SLOT) { + PyObject *owner = PEEK(1); + PyObject *value = PEEK(2); + uint32_t type_version = read_u32(next_instr + 1); + uint16_t index = read_u16(next_instr + 3); assert(cframe.use_tracing == 0); - PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - uint32_t type_version = read_u32(cache->version); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); - char *addr = (char *)owner + cache->index; + char *addr = (char *)owner + index; STAT_INC(STORE_ATTR, hit); - STACK_SHRINK(1); - PyObject *value = POP(); PyObject *old_value = *(PyObject **)addr; *(PyObject **)addr = value; Py_XDECREF(old_value); Py_DECREF(owner); - JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); + STACK_SHRINK(2); + next_instr += 4; DISPATCH(); } From 001c4180d555f01346f03031ab8f312925dc29ad Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 17:19:38 -0700 Subject: [PATCH 15/26] Complete the store_subscr family: STORE_SUBSCR{,DICT,LIST_INT} STORE_SUBSCR was alread half converted, but wasn't using cache effects yet. --- Python/bytecodes.c | 36 ++++++++++++------------------------ Python/generated_cases.c.h | 31 +++++++++++++++---------------- 2 files changed, 27 insertions(+), 40 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6a7d77b70b599d..80ea02a0e49ed4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -489,15 +489,21 @@ dummy_func( PREDICT(JUMP_BACKWARD); } - inst(STORE_SUBSCR, (v, container, sub -- )) { - _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + family(store_subscr) = { + STORE_SUBSCR, + STORE_SUBSCR_DICT, + STORE_SUBSCR_LIST_INT, + }; + + inst(STORE_SUBSCR, (counter/1, v, container, sub -- )) { + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { assert(cframe.use_tracing == 0); next_instr--; _Py_Specialize_StoreSubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); + _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; DECREMENT_ADAPTIVE_COUNTER(cache->counter); /* container[sub] = v */ int err = PyObject_SetItem(container, sub, v); @@ -505,15 +511,10 @@ dummy_func( Py_DECREF(container); Py_DECREF(sub); ERROR_IF(err != 0, error); - JUMPBY(INLINE_CACHE_ENTRIES_STORE_SUBSCR); } - // stack effect: (__0, __1, __2 -- ) - inst(STORE_SUBSCR_LIST_INT) { + inst(STORE_SUBSCR_LIST_INT, (unused/1, value, list, sub -- )) { assert(cframe.use_tracing == 0); - PyObject *sub = TOP(); - PyObject *list = SECOND(); - PyObject *value = THIRD(); DEOPT_IF(!PyLong_CheckExact(sub), STORE_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), STORE_SUBSCR); @@ -526,29 +527,19 @@ dummy_func( PyObject *old_value = PyList_GET_ITEM(list, index); PyList_SET_ITEM(list, index, value); - STACK_SHRINK(3); assert(old_value != NULL); Py_DECREF(old_value); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); Py_DECREF(list); - JUMPBY(INLINE_CACHE_ENTRIES_STORE_SUBSCR); } - // stack effect: (__0, __1, __2 -- ) - inst(STORE_SUBSCR_DICT) { + inst(STORE_SUBSCR_DICT, (unused/1, value, dict, sub -- )) { assert(cframe.use_tracing == 0); - PyObject *sub = TOP(); - PyObject *dict = SECOND(); - PyObject *value = THIRD(); DEOPT_IF(!PyDict_CheckExact(dict), STORE_SUBSCR); - STACK_SHRINK(3); STAT_INC(STORE_SUBSCR, hit); int err = _PyDict_SetItem_Take2((PyDictObject *)dict, sub, value); Py_DECREF(dict); - if (err != 0) { - goto error; - } - JUMPBY(INLINE_CACHE_ENTRIES_STORE_SUBSCR); + ERROR_IF(err != 0, error); } // stack effect: (__0, __1 -- ) @@ -3656,9 +3647,6 @@ family(load_global) = { LOAD_GLOBAL, LOAD_GLOBAL_BUILTIN, LOAD_GLOBAL_MODULE }; family(store_fast) = { STORE_FAST, STORE_FAST__LOAD_FAST, STORE_FAST__STORE_FAST }; -family(store_subscr) = { - STORE_SUBSCR, STORE_SUBSCR_DICT, - STORE_SUBSCR_LIST_INT }; family(unpack_sequence) = { UNPACK_SEQUENCE, UNPACK_SEQUENCE_LIST, UNPACK_SEQUENCE_TUPLE, UNPACK_SEQUENCE_TWO_TUPLE }; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index bc8beeee389aaf..90c787cff19c84 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -455,14 +455,15 @@ PyObject *sub = PEEK(1); PyObject *container = PEEK(2); PyObject *v = PEEK(3); - _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; - if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + uint16_t counter = read_u16(next_instr + 0); + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { assert(cframe.use_tracing == 0); next_instr--; _Py_Specialize_StoreSubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); + _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; DECREMENT_ADAPTIVE_COUNTER(cache->counter); /* container[sub] = v */ int err = PyObject_SetItem(container, sub, v); @@ -470,16 +471,16 @@ Py_DECREF(container); Py_DECREF(sub); if (err != 0) goto pop_3_error; - JUMPBY(INLINE_CACHE_ENTRIES_STORE_SUBSCR); STACK_SHRINK(3); + next_instr += 1; DISPATCH(); } TARGET(STORE_SUBSCR_LIST_INT) { + PyObject *sub = PEEK(1); + PyObject *list = PEEK(2); + PyObject *value = PEEK(3); assert(cframe.use_tracing == 0); - PyObject *sub = TOP(); - PyObject *list = SECOND(); - PyObject *value = THIRD(); DEOPT_IF(!PyLong_CheckExact(sub), STORE_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), STORE_SUBSCR); @@ -492,29 +493,27 @@ PyObject *old_value = PyList_GET_ITEM(list, index); PyList_SET_ITEM(list, index, value); - STACK_SHRINK(3); assert(old_value != NULL); Py_DECREF(old_value); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); Py_DECREF(list); - JUMPBY(INLINE_CACHE_ENTRIES_STORE_SUBSCR); + STACK_SHRINK(3); + next_instr += 1; DISPATCH(); } TARGET(STORE_SUBSCR_DICT) { + PyObject *sub = PEEK(1); + PyObject *dict = PEEK(2); + PyObject *value = PEEK(3); assert(cframe.use_tracing == 0); - PyObject *sub = TOP(); - PyObject *dict = SECOND(); - PyObject *value = THIRD(); DEOPT_IF(!PyDict_CheckExact(dict), STORE_SUBSCR); - STACK_SHRINK(3); STAT_INC(STORE_SUBSCR, hit); int err = _PyDict_SetItem_Take2((PyDictObject *)dict, sub, value); Py_DECREF(dict); - if (err != 0) { - goto error; - } - JUMPBY(INLINE_CACHE_ENTRIES_STORE_SUBSCR); + if (err != 0) goto pop_3_error; + STACK_SHRINK(3); + next_instr += 1; DISPATCH(); } From 05caa7e07d784bf771f4c81fb188c8864fc681c8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 21:38:20 -0700 Subject: [PATCH 16/26] DELETE_SUBSCR --- Python/bytecodes.c | 12 +++--------- Python/generated_cases.c.h | 12 +++++------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 80ea02a0e49ed4..aa8103f1549d7c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -542,18 +542,12 @@ dummy_func( ERROR_IF(err != 0, error); } - // stack effect: (__0, __1 -- ) - inst(DELETE_SUBSCR) { - PyObject *sub = TOP(); - PyObject *container = SECOND(); - int err; - STACK_SHRINK(2); + inst(DELETE_SUBSCR, (container, sub --)) { /* del container[sub] */ - err = PyObject_DelItem(container, sub); + int err = PyObject_DelItem(container, sub); Py_DECREF(container); Py_DECREF(sub); - if (err != 0) - goto error; + ERROR_IF(err != 0, error); } // stack effect: (__0 -- ) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 90c787cff19c84..209c3217266de1 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -518,16 +518,14 @@ } TARGET(DELETE_SUBSCR) { - PyObject *sub = TOP(); - PyObject *container = SECOND(); - int err; - STACK_SHRINK(2); + PyObject *sub = PEEK(1); + PyObject *container = PEEK(2); /* del container[sub] */ - err = PyObject_DelItem(container, sub); + int err = PyObject_DelItem(container, sub); Py_DECREF(container); Py_DECREF(sub); - if (err != 0) - goto error; + if (err != 0) goto pop_2_error; + STACK_SHRINK(2); DISPATCH(); } From 8d445ae7a78d5a3411a7411e0c38c06b5d34e5a5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 21:44:53 -0700 Subject: [PATCH 17/26] PRINT_EXPR --- Python/bytecodes.c | 10 ++++------ Python/generated_cases.c.h | 9 +++++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index aa8103f1549d7c..c0850f196e5118 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -550,21 +550,19 @@ dummy_func( ERROR_IF(err != 0, error); } - // stack effect: (__0 -- ) - inst(PRINT_EXPR) { - PyObject *value = POP(); + inst(PRINT_EXPR, (value --)) { PyObject *hook = _PySys_GetAttr(tstate, &_Py_ID(displayhook)); PyObject *res; + // Can't use ERROR_IF here. if (hook == NULL) { _PyErr_SetString(tstate, PyExc_RuntimeError, "lost sys.displayhook"); Py_DECREF(value); - goto error; + ERROR_IF(1, error); } res = PyObject_CallOneArg(hook, value); Py_DECREF(value); - if (res == NULL) - goto error; + ERROR_IF(res == NULL, error); Py_DECREF(res); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 209c3217266de1..2b44377b63380b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -530,20 +530,21 @@ } TARGET(PRINT_EXPR) { - PyObject *value = POP(); + PyObject *value = PEEK(1); PyObject *hook = _PySys_GetAttr(tstate, &_Py_ID(displayhook)); PyObject *res; + // Can't use ERROR_IF here. if (hook == NULL) { _PyErr_SetString(tstate, PyExc_RuntimeError, "lost sys.displayhook"); Py_DECREF(value); - goto error; + if (1) goto pop_1_error; } res = PyObject_CallOneArg(hook, value); Py_DECREF(value); - if (res == NULL) - goto error; + if (res == NULL) goto pop_1_error; Py_DECREF(res); + STACK_SHRINK(1); DISPATCH(); } From c1f303487c1b11ff875f489bb9e3218e414a4d6c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 21:53:25 -0700 Subject: [PATCH 18/26] INTERPRETER_EXIT (a bit weird, ends in return) --- Python/bytecodes.c | 7 +++---- Python/generated_cases.c.h | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c0850f196e5118..f385098f8c32a2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -82,7 +82,7 @@ do { \ static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; static PyObject *container, *start, *stop, *v, *lhs, *rhs; static PyObject *list, *tuple, *dict, *owner; -static PyObject *exit_func, *lasti, *val; +static PyObject *exit_func, *lasti, *val, *retval; static size_t jump; // Dummy variables for cache effects static _Py_CODEUNIT when_to_jump_mask, invert, counter, index, hint; @@ -589,11 +589,10 @@ dummy_func( goto error; } - // stack effect: (__0 -- ) - inst(INTERPRETER_EXIT) { + inst(INTERPRETER_EXIT, (retval --)) { assert(frame == &entry_frame); assert(_PyFrame_IsIncomplete(frame)); - PyObject *retval = POP(); + STACK_SHRINK(1); // Since we're not going to DISPATCH() assert(EMPTY()); /* Restore previous cframe and return. */ tstate->cframe = cframe.previous; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2b44377b63380b..e40cfaa5530b36 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -571,9 +571,10 @@ } TARGET(INTERPRETER_EXIT) { + PyObject *retval = PEEK(1); assert(frame == &entry_frame); assert(_PyFrame_IsIncomplete(frame)); - PyObject *retval = POP(); + STACK_SHRINK(1); // Since we're not going to DISPATCH() assert(EMPTY()); /* Restore previous cframe and return. */ tstate->cframe = cframe.previous; From e2f376ba9e346cb44fda277d4fd0bf9a30420610 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 22:05:44 -0700 Subject: [PATCH 19/26] RETURN_VALUE --- Python/bytecodes.c | 5 ++--- Python/generated_cases.c.h | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f385098f8c32a2..499df93e32e925 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -603,9 +603,8 @@ dummy_func( return retval; } - // stack effect: (__0 -- ) - inst(RETURN_VALUE) { - PyObject *retval = POP(); + inst(RETURN_VALUE, (retval --)) { + STACK_SHRINK(1); assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); TRACE_FUNCTION_EXIT(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e40cfaa5530b36..5d04a50eb16917 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -586,7 +586,8 @@ } TARGET(RETURN_VALUE) { - PyObject *retval = POP(); + PyObject *retval = PEEK(1); + STACK_SHRINK(1); assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); TRACE_FUNCTION_EXIT(); From 53c204e0d8e222d18a3ff9ff2bc27eefed411f75 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 22:23:29 -0700 Subject: [PATCH 20/26] GET_AITER (had to restructure it some) The original had mysterious `SET_TOP(NULL)` before `goto error`. I assume those just account for `obj` having been decref'ed, so I got rid of them in favor of the cleanup implied by `ERROR_IF()`. --- Python/bytecodes.c | 29 +++++++++-------------------- Python/generated_cases.c.h | 27 ++++++++++----------------- 2 files changed, 19 insertions(+), 37 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 499df93e32e925..ecf4421f8042eb 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -82,7 +82,7 @@ do { \ static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; static PyObject *container, *start, *stop, *v, *lhs, *rhs; static PyObject *list, *tuple, *dict, *owner; -static PyObject *exit_func, *lasti, *val, *retval; +static PyObject *exit_func, *lasti, *val, *retval, *obj, *iter; static size_t jump; // Dummy variables for cache effects static _Py_CODEUNIT when_to_jump_mask, invert, counter, index, hint; @@ -616,48 +616,37 @@ dummy_func( goto resume_frame; } - // stack effect: ( -- ) - inst(GET_AITER) { + inst(GET_AITER, (obj -- iter)) { unaryfunc getter = NULL; - PyObject *iter = NULL; - PyObject *obj = TOP(); PyTypeObject *type = Py_TYPE(obj); if (type->tp_as_async != NULL) { getter = type->tp_as_async->am_aiter; } - if (getter != NULL) { - iter = (*getter)(obj); - Py_DECREF(obj); - if (iter == NULL) { - SET_TOP(NULL); - goto error; - } - } - else { - SET_TOP(NULL); + if (getter == NULL) { _PyErr_Format(tstate, PyExc_TypeError, "'async for' requires an object with " "__aiter__ method, got %.100s", type->tp_name); Py_DECREF(obj); - goto error; + ERROR_IF(1, error); } + iter = (*getter)(obj); + Py_DECREF(obj); + ERROR_IF(iter == NULL, error); + if (Py_TYPE(iter)->tp_as_async == NULL || Py_TYPE(iter)->tp_as_async->am_anext == NULL) { - SET_TOP(NULL); _PyErr_Format(tstate, PyExc_TypeError, "'async for' received an object from __aiter__ " "that does not implement __anext__: %.100s", Py_TYPE(iter)->tp_name); Py_DECREF(iter); - goto error; + ERROR_IF(1, error); } - - SET_TOP(iter); } // stack effect: ( -- __0) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 5d04a50eb16917..a9ee47ed7de4e5 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -600,46 +600,39 @@ } TARGET(GET_AITER) { + PyObject *obj = PEEK(1); + PyObject *iter; unaryfunc getter = NULL; - PyObject *iter = NULL; - PyObject *obj = TOP(); PyTypeObject *type = Py_TYPE(obj); if (type->tp_as_async != NULL) { getter = type->tp_as_async->am_aiter; } - if (getter != NULL) { - iter = (*getter)(obj); - Py_DECREF(obj); - if (iter == NULL) { - SET_TOP(NULL); - goto error; - } - } - else { - SET_TOP(NULL); + if (getter == NULL) { _PyErr_Format(tstate, PyExc_TypeError, "'async for' requires an object with " "__aiter__ method, got %.100s", type->tp_name); Py_DECREF(obj); - goto error; + if (1) goto pop_1_error; } + iter = (*getter)(obj); + Py_DECREF(obj); + if (iter == NULL) goto pop_1_error; + if (Py_TYPE(iter)->tp_as_async == NULL || Py_TYPE(iter)->tp_as_async->am_anext == NULL) { - SET_TOP(NULL); _PyErr_Format(tstate, PyExc_TypeError, "'async for' received an object from __aiter__ " "that does not implement __anext__: %.100s", Py_TYPE(iter)->tp_name); Py_DECREF(iter); - goto error; + if (1) goto pop_1_error; } - - SET_TOP(iter); + POKE(1, iter); DISPATCH(); } From e0ba8bf29eab0c3027be0bd77bf5572c3be97e7e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 22:29:55 -0700 Subject: [PATCH 21/26] Typo in TODO comment --- Tools/cases_generator/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 461ea359dadc3b..d802c733dfd10c 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -63,7 +63,7 @@ class Block(Node): class StackEffect(Node): name: str type: str = "" - # TODO: type, condition + # TODO: array, condition @dataclass From f7a54d774ac1597dc77b9fa22820ac04b4ff148d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 27 Nov 2022 22:26:44 -0700 Subject: [PATCH 22/26] Emit PREDICT() macros right before DISPATCH() This should fix the build crash on Windows. --- Tools/cases_generator/generate_cases.py | 96 +++++++++++++++---------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index c571988d849873..3984d7bf39355a 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -23,7 +23,7 @@ ) BEGIN_MARKER = "// BEGIN BYTECODES //" END_MARKER = "// END BYTECODES //" -RE_PREDICTED = r"(?s)(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);" +RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*$" UNUSED = "unused" BITS_PER_CODE_UNIT = 16 @@ -112,6 +112,8 @@ class Instruction: kind: typing.Literal["inst", "op"] name: str block: parser.Block + block_text: list[str] # Block.text, less curlies, less PREDICT() calls + predictions: list[str] # Prediction targets (instruction names) # Computed by constructor always_exits: bool @@ -129,7 +131,8 @@ def __init__(self, inst: parser.InstDef): self.kind = inst.kind self.name = inst.name self.block = inst.block - self.always_exits = always_exits(self.block) + self.block_text, self.predictions = extract_block_text(self.block) + self.always_exits = always_exits(self.block_text) self.cache_effects = [ effect for effect in inst.inputs if isinstance(effect, parser.CacheEffect) ] @@ -164,7 +167,7 @@ def write(self, out: Formatter) -> None: self.write_body(out, 0) # Skip the rest if the block always exits - if always_exits(self.block): + if self.always_exits: return # Write net stack growth/shrinkage @@ -172,7 +175,7 @@ def write(self, out: Formatter) -> None: out.stack_adjust(diff) # Write output stack effect assignments - unmoved_names = set() + unmoved_names: set[str] = set() for ieffect, oeffect in zip(self.input_effects, self.output_effects): if ieffect.name == oeffect.name: unmoved_names.add(ieffect.name) @@ -206,27 +209,10 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None cache_offset += ceffect.size assert cache_offset == self.cache_offset + cache_adjust - # Get lines of text with proper dedent - blocklines = self.block.to_text(dedent=dedent).splitlines(True) - - # Remove blank lines from both ends - while blocklines and not blocklines[0].strip(): - blocklines.pop(0) - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - - # Remove leading and trailing braces - assert blocklines and blocklines[0].strip() == "{" - assert blocklines and blocklines[-1].strip() == "}" - blocklines.pop() - blocklines.pop(0) - - # Remove trailing blank lines - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - # Write the body, substituting a goto for ERROR_IF() - for line in blocklines: + assert dedent <= 0 + extra = " " * -dedent + for line in self.block_text: if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): space, cond, label = m.groups() # ERROR_IF() must pop the inputs from the stack. @@ -241,11 +227,13 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None else: break if ninputs: - out.write_raw(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") + out.write_raw( + f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n" + ) else: - out.write_raw(f"{space}if ({cond}) goto {label};\n") + out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n") else: - out.write_raw(line) + out.write_raw(extra + line) InstructionOrCacheEffect = Instruction | parser.CacheEffect @@ -395,7 +383,11 @@ def analyze(self) -> None: def find_predictions(self) -> None: """Find the instructions that need PREDICTED() labels.""" for instr in self.instrs.values(): - for target in re.findall(RE_PREDICTED, instr.block.text): + targets = set(instr.predictions) + for line in instr.block_text: + if m := re.match(RE_PREDICTED, line): + targets.add(m.group(1)) + for target in targets: if target_instr := self.instrs.get(target): target_instr.predicted = True else: @@ -552,7 +544,9 @@ def stack_analysis( # and 'lowest' and 'highest' are the extremes. # Note that 'lowest' may be negative. # TODO: Reverse the numbering. - stack = [StackEffect(f"_tmp_{i+1}", "") for i in reversed(range(highest - lowest))] + stack = [ + StackEffect(f"_tmp_{i+1}", "") for i in reversed(range(highest - lowest)) + ] return stack, -lowest def write_instructions(self) -> None: @@ -577,7 +571,9 @@ def write_instructions(self) -> None: if instr.predicted: self.out.emit(f"PREDICTED({name});") instr.write(self.out) - if not always_exits(instr.block): + if not instr.always_exits: + for prediction in instr.predictions: + self.out.emit(f"PREDICT({prediction});") self.out.emit(f"DISPATCH();") # Write and count super-instructions @@ -652,18 +648,40 @@ def wrap_super_or_macro(self, up: SuperOrMacroInstruction): self.out.emit(f"DISPATCH();") -def always_exits(block: parser.Block) -> bool: +def extract_block_text(block: parser.Block) -> tuple[list[str], list[str]]: + # Get lines of text with proper dedent + blocklines = block.text.splitlines(True) + + # Remove blank lines from both ends + while blocklines and not blocklines[0].strip(): + blocklines.pop(0) + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Remove leading and trailing braces + assert blocklines and blocklines[0].strip() == "{" + assert blocklines and blocklines[-1].strip() == "}" + blocklines.pop() + blocklines.pop(0) + + # Remove trailing blank lines + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Separate PREDICT(...) macros from end + predictions: list[str] = [] + while blocklines and (m := re.match(r"^\s*PREDICT\((\w+)\);\s*$", blocklines[-1])): + predictions.insert(0, m.group(1)) + blocklines.pop() + + return blocklines, predictions + + +def always_exits(lines: list[str]) -> bool: """Determine whether a block always ends in a return/goto/etc.""" - text = block.text - lines = text.splitlines() - while lines and not lines[-1].strip(): - lines.pop() - if not lines or lines[-1].strip() != "}": - return False - lines.pop() if not lines: return False - line = lines.pop().rstrip() + line = lines[-1].rstrip() # Indent must match exactly (TODO: Do something better) if line[:12] != " " * 12: return False From 6ae9e6eb1d3d9901ae2f36532692a1d85df42c13 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 21:20:45 -0700 Subject: [PATCH 23/26] LIST_APPEND (a bit unhappy with it) --- Python/bytecodes.c | 10 ++++------ Python/generated_cases.c.h | 8 ++++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ecf4421f8042eb..88a9c86ff6ea59 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -468,12 +468,10 @@ dummy_func( DISPATCH_INLINED(new_frame); } - // stack effect: (__0 -- ) - inst(LIST_APPEND) { - PyObject *v = POP(); - PyObject *list = PEEK(oparg); - if (_PyList_AppendTakeRef((PyListObject *)list, v) < 0) - goto error; + // Alternative: (list, unused[oparg], v -- list, unused[oparg]) + inst(LIST_APPEND, (v --)) { + PyObject *list = PEEK(oparg + 1); // +1 to account for v staying on stack + ERROR_IF(_PyList_AppendTakeRef((PyListObject *)list, v) < 0, error); PREDICT(JUMP_BACKWARD); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a9ee47ed7de4e5..e0c4fe6f6e0f0c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -430,10 +430,10 @@ } TARGET(LIST_APPEND) { - PyObject *v = POP(); - PyObject *list = PEEK(oparg); - if (_PyList_AppendTakeRef((PyListObject *)list, v) < 0) - goto error; + PyObject *v = PEEK(1); + PyObject *list = PEEK(oparg + 1); // +1 to account for v staying on stack + if (_PyList_AppendTakeRef((PyListObject *)list, v) < 0) goto pop_1_error; + STACK_SHRINK(1); PREDICT(JUMP_BACKWARD); DISPATCH(); } From 34aa393a645286d0cc44cd36eac3e9614bcbe2ed Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 25 Nov 2022 21:32:16 -0700 Subject: [PATCH 24/26] SET_ADD (also a bit unhappy with it) --- Python/bytecodes.c | 13 +++++-------- Python/generated_cases.c.h | 11 +++++------ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 88a9c86ff6ea59..567d618be5cf48 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -475,15 +475,12 @@ dummy_func( PREDICT(JUMP_BACKWARD); } - // stack effect: (__0 -- ) - inst(SET_ADD) { - PyObject *v = POP(); - PyObject *set = PEEK(oparg); - int err; - err = PySet_Add(set, v); + // Alternative: (set, unused[oparg], v -- set, unused[oparg]) + inst(SET_ADD, (v --)) { + PyObject *set = PEEK(oparg + 1); // +1 to account for v staying on stack + int err = PySet_Add(set, v); Py_DECREF(v); - if (err != 0) - goto error; + ERROR_IF(err != 0, error); PREDICT(JUMP_BACKWARD); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e0c4fe6f6e0f0c..a37dc6e226ca23 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -439,13 +439,12 @@ } TARGET(SET_ADD) { - PyObject *v = POP(); - PyObject *set = PEEK(oparg); - int err; - err = PySet_Add(set, v); + PyObject *v = PEEK(1); + PyObject *set = PEEK(oparg + 1); // +1 to account for v staying on stack + int err = PySet_Add(set, v); Py_DECREF(v); - if (err != 0) - goto error; + if (err != 0) goto pop_1_error; + STACK_SHRINK(1); PREDICT(JUMP_BACKWARD); DISPATCH(); } From 28e45f990b76cbeae14a3893769859e640ebf87e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 4 Dec 2022 21:29:04 -0800 Subject: [PATCH 25/26] Super can be reformulated as macro with a special op --- Python/bytecodes.c | 31 ++++++---- Python/generated_cases.c.h | 124 ++++++++++++++++++++++++++----------- 2 files changed, 107 insertions(+), 48 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 567d618be5cf48..9a8f9cdea3f89f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -73,7 +73,7 @@ do { \ #define op(name, ...) /* NAME is ignored */ #define macro(name) static int MACRO_##name #define super(name) static int SUPER_##name -#define family(name, ...) static int family_##name +#define family(name, ...) static int family_##name[] #define NAME_ERROR_MSG \ "name '%.200s' is not defined" @@ -86,6 +86,7 @@ static PyObject *exit_func, *lasti, *val, *retval, *obj, *iter; static size_t jump; // Dummy variables for cache effects static _Py_CODEUNIT when_to_jump_mask, invert, counter, index, hint; +static _Py_CODEUNIT word; static uint32_t type_version; // Dummy opcode names for 'op' opcodes #define _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 1001 @@ -94,6 +95,7 @@ static uint32_t type_version; #define _COMPARE_OP_INT 1004 #define _COMPARE_OP_STR 1005 #define _JUMP_ON_SIGN 1006 +#define JOIN 0 static PyObject * dummy_func( @@ -156,11 +158,18 @@ dummy_func( SETLOCAL(oparg, value); } - super(LOAD_FAST__LOAD_FAST) = LOAD_FAST + LOAD_FAST; - super(LOAD_FAST__LOAD_CONST) = LOAD_FAST + LOAD_CONST; - super(STORE_FAST__LOAD_FAST) = STORE_FAST + LOAD_FAST; - super(STORE_FAST__STORE_FAST) = STORE_FAST + STORE_FAST; - super(LOAD_CONST__LOAD_FAST) = LOAD_CONST + LOAD_FAST; + op(JOIN, (word/1 --)) { + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } + + macro(LOAD_FAST__LOAD_FAST) = LOAD_FAST + JOIN + LOAD_FAST; + macro(LOAD_FAST__LOAD_CONST) = LOAD_FAST + JOIN + LOAD_CONST; + macro(STORE_FAST__LOAD_FAST) = STORE_FAST + JOIN + LOAD_FAST; + macro(STORE_FAST__STORE_FAST) = STORE_FAST + JOIN + STORE_FAST; + macro(LOAD_CONST__LOAD_FAST) = LOAD_CONST + JOIN + LOAD_FAST; inst(POP_TOP, (value --)) { Py_DECREF(value); @@ -307,8 +316,8 @@ dummy_func( op(_BINARY_OP_INPLACE_ADD_UNICODE_PART_2, (unused --)) { // The STORE_FAST is already done; oparg is dead. } - super(BINARY_OP_INPLACE_ADD_UNICODE) = - _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 + _BINARY_OP_INPLACE_ADD_UNICODE_PART_2; + macro(BINARY_OP_INPLACE_ADD_UNICODE) = + _BINARY_OP_INPLACE_ADD_UNICODE_PART_1 + JOIN + _BINARY_OP_INPLACE_ADD_UNICODE_PART_2; inst(BINARY_OP_ADD_FLOAT, (left, right, unused/1 -- sum)) { assert(cframe.use_tracing == 0); @@ -2042,7 +2051,7 @@ dummy_func( } } // We're praying that the compiler optimizes the flags manipuations. - super(COMPARE_OP_FLOAT_JUMP) = _COMPARE_OP_FLOAT + _JUMP_ON_SIGN; + macro(COMPARE_OP_FLOAT_JUMP) = _COMPARE_OP_FLOAT + JOIN + _JUMP_ON_SIGN; // Similar to COMPARE_OP_FLOAT op(_COMPARE_OP_INT, (unused/1, left, right, when_to_jump_mask/1 -- jump: size_t)) { @@ -2062,7 +2071,7 @@ dummy_func( _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); jump = sign_ish & when_to_jump_mask; } - super(COMPARE_OP_INT_JUMP) = _COMPARE_OP_INT + _JUMP_ON_SIGN; + macro(COMPARE_OP_INT_JUMP) = _COMPARE_OP_INT + JOIN + _JUMP_ON_SIGN; // Similar to COMPARE_OP_FLOAT, but for ==, != only op(_COMPARE_OP_STR, (unused/1, left, right, invert/1 -- jump: size_t)) { @@ -2079,7 +2088,7 @@ dummy_func( assert(invert == 0 || invert == 1); jump = res ^ invert; } - super(COMPARE_OP_STR_JUMP) = _COMPARE_OP_STR + _JUMP_ON_SIGN; + macro(COMPARE_OP_STR_JUMP) = _COMPARE_OP_STR + JOIN + _JUMP_ON_SIGN; // stack effect: (__0 -- ) inst(IS_OP) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a37dc6e226ca23..a3dfc0d0399c8c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3567,8 +3567,13 @@ Py_INCREF(value); _tmp_2 = value; } - NEXTOPARG(); - next_instr++; + { + uint16_t word = read_u16(next_instr + 0); + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } { PyObject *value; value = GETLOCAL(oparg); @@ -3576,6 +3581,7 @@ Py_INCREF(value); _tmp_1 = value; } + next_instr += 1; STACK_GROW(2); POKE(1, _tmp_1); POKE(2, _tmp_2); @@ -3592,14 +3598,20 @@ Py_INCREF(value); _tmp_2 = value; } - NEXTOPARG(); - next_instr++; + { + uint16_t word = read_u16(next_instr + 0); + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } { PyObject *value; value = GETITEM(consts, oparg); Py_INCREF(value); _tmp_1 = value; } + next_instr += 1; STACK_GROW(2); POKE(1, _tmp_1); POKE(2, _tmp_2); @@ -3612,8 +3624,13 @@ PyObject *value = _tmp_1; SETLOCAL(oparg, value); } - NEXTOPARG(); - next_instr++; + { + uint16_t word = read_u16(next_instr + 0); + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } { PyObject *value; value = GETLOCAL(oparg); @@ -3621,6 +3638,7 @@ Py_INCREF(value); _tmp_1 = value; } + next_instr += 1; POKE(1, _tmp_1); DISPATCH(); } @@ -3632,12 +3650,18 @@ PyObject *value = _tmp_1; SETLOCAL(oparg, value); } - NEXTOPARG(); - next_instr++; + { + uint16_t word = read_u16(next_instr + 0); + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } { PyObject *value = _tmp_2; SETLOCAL(oparg, value); } + next_instr += 1; STACK_SHRINK(2); DISPATCH(); } @@ -3651,8 +3675,13 @@ Py_INCREF(value); _tmp_2 = value; } - NEXTOPARG(); - next_instr++; + { + uint16_t word = read_u16(next_instr + 0); + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } { PyObject *value; value = GETLOCAL(oparg); @@ -3660,12 +3689,28 @@ Py_INCREF(value); _tmp_1 = value; } + next_instr += 1; STACK_GROW(2); POKE(1, _tmp_1); POKE(2, _tmp_2); DISPATCH(); } + TARGET(END_FOR) { + PyObject *_tmp_2 = PEEK(2); + PyObject *_tmp_1 = PEEK(1); + { + PyObject *value = _tmp_1; + Py_DECREF(value); + } + { + PyObject *value = _tmp_2; + Py_DECREF(value); + } + STACK_SHRINK(2); + DISPATCH(); + } + TARGET(BINARY_OP_INPLACE_ADD_UNICODE) { PyObject *_tmp_2 = PEEK(2); PyObject *_tmp_1 = PEEK(1); @@ -3698,12 +3743,17 @@ _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (*target_local == NULL) goto pop_2_error; } - next_instr += 1; - NEXTOPARG(); - next_instr++; + { + uint16_t word = read_u16(next_instr + 1); + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } { // The STORE_FAST is already done; oparg is dead. } + next_instr += 2; STACK_SHRINK(2); DISPATCH(); } @@ -3732,9 +3782,13 @@ jump = sign_ish & when_to_jump_mask; _tmp_2 = (PyObject *)jump; } - next_instr += 2; - NEXTOPARG(); - next_instr++; + { + uint16_t word = read_u16(next_instr + 2); + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } { size_t jump = (size_t)_tmp_2; assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); @@ -3742,6 +3796,7 @@ JUMPBY(oparg); } } + next_instr += 3; STACK_SHRINK(2); DISPATCH(); } @@ -3771,9 +3826,13 @@ jump = sign_ish & when_to_jump_mask; _tmp_2 = (PyObject *)jump; } - next_instr += 2; - NEXTOPARG(); - next_instr++; + { + uint16_t word = read_u16(next_instr + 2); + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } { size_t jump = (size_t)_tmp_2; assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); @@ -3781,6 +3840,7 @@ JUMPBY(oparg); } } + next_instr += 3; STACK_SHRINK(2); DISPATCH(); } @@ -3807,9 +3867,13 @@ jump = res ^ invert; _tmp_2 = (PyObject *)jump; } - next_instr += 2; - NEXTOPARG(); - next_instr++; + { + uint16_t word = read_u16(next_instr + 2); + #ifndef NDEBUG + opcode = _Py_OPCODE(word); + #endif + oparg = _Py_OPARG(word); + } { size_t jump = (size_t)_tmp_2; assert(opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_TRUE); @@ -3817,21 +3881,7 @@ JUMPBY(oparg); } } - STACK_SHRINK(2); - DISPATCH(); - } - - TARGET(END_FOR) { - PyObject *_tmp_2 = PEEK(2); - PyObject *_tmp_1 = PEEK(1); - { - PyObject *value = _tmp_1; - Py_DECREF(value); - } - { - PyObject *value = _tmp_2; - Py_DECREF(value); - } + next_instr += 3; STACK_SHRINK(2); DISPATCH(); } From 248ecfede45e5e9f4d85189042b164107a129c6f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 6 Dec 2022 18:29:28 -0800 Subject: [PATCH 26/26] Rip out support for 'super' from the generator --- Tools/cases_generator/generate_cases.py | 85 ++++--------------------- Tools/cases_generator/parser.py | 22 +------ 2 files changed, 12 insertions(+), 95 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 3984d7bf39355a..cdcbea4a2c56ed 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -260,27 +260,13 @@ def write_body(self, out: Formatter, cache_adjust: int) -> None: @dataclasses.dataclass -class SuperOrMacroInstruction: - """Common fields for super- and macro instructions.""" +class MacroInstruction: + """A macro instruction.""" name: str stack: list[StackEffect] initial_sp: int final_sp: int - - -@dataclasses.dataclass -class SuperInstruction(SuperOrMacroInstruction): - """A super-instruction.""" - - super: parser.Super - parts: list[Component] - - -@dataclasses.dataclass -class MacroInstruction(SuperOrMacroInstruction): - """A macro instruction.""" - macro: parser.Macro parts: list[Component | parser.CacheEffect] @@ -312,8 +298,6 @@ def error(self, msg: str, node: parser.Node) -> None: self.errors += 1 instrs: dict[str, Instruction] # Includes ops - supers: dict[str, parser.Super] - super_instrs: dict[str, SuperInstruction] macros: dict[str, parser.Macro] macro_instrs: dict[str, MacroInstruction] families: dict[str, parser.Family] @@ -345,15 +329,12 @@ def parse(self) -> None: # Parse from start psr.setpos(start) self.instrs = {} - self.supers = {} self.macros = {} self.families = {} while thing := psr.definition(): match thing: case parser.InstDef(name=name): self.instrs[name] = Instruction(thing) - case parser.Super(name): - self.supers[name] = thing case parser.Macro(name): self.macros[name] = thing case parser.Family(name): @@ -365,7 +346,7 @@ def parse(self) -> None: print( f"Read {len(self.instrs)} instructions/ops, " - f"{len(self.supers)} supers, {len(self.macros)} macros, " + f"{len(self.macros)} macros, " f"and {len(self.families)} families from {self.filename}", file=sys.stderr, ) @@ -378,7 +359,7 @@ def analyze(self) -> None: self.find_predictions() self.map_families() self.check_families() - self.analyze_supers_and_macros() + self.analyze_macros() def find_predictions(self) -> None: """Find the instructions that need PREDICTED() labels.""" @@ -444,26 +425,12 @@ def check_families(self) -> None: family, ) - def analyze_supers_and_macros(self) -> None: - """Analyze each super- and macro instruction.""" - self.super_instrs = {} + def analyze_macros(self) -> None: + """Analyze each macro instruction.""" self.macro_instrs = {} - for name, super in self.supers.items(): - self.super_instrs[name] = self.analyze_super(super) for name, macro in self.macros.items(): self.macro_instrs[name] = self.analyze_macro(macro) - def analyze_super(self, super: parser.Super) -> SuperInstruction: - components = self.check_super_components(super) - stack, initial_sp = self.stack_analysis(components) - sp = initial_sp - parts: list[Component] = [] - for instr in components: - part, sp = self.analyze_instruction(instr, stack, sp) - parts.append(part) - final_sp = sp - return SuperInstruction(super.name, stack, initial_sp, final_sp, super, parts) - def analyze_macro(self, macro: parser.Macro) -> MacroInstruction: components = self.check_macro_components(macro) stack, initial_sp = self.stack_analysis(components) @@ -494,15 +461,6 @@ def analyze_instruction( sp += 1 return Component(instr, input_mapping, output_mapping), sp - def check_super_components(self, super: parser.Super) -> list[Instruction]: - components: list[Instruction] = [] - for op in super.ops: - if op.name not in self.instrs: - self.error(f"Unknown instruction {op.name!r}", super) - else: - components.append(self.instrs[op.name]) - return components - def check_macro_components( self, macro: parser.Macro ) -> list[InstructionOrCacheEffect]: @@ -522,9 +480,7 @@ def check_macro_components( def stack_analysis( self, components: typing.Iterable[InstructionOrCacheEffect] ) -> tuple[list[StackEffect], int]: - """Analyze a super-instruction or macro. - - Print an error if there's a cache effect (which we don't support yet). + """Analyze a macro. Return the list of variable names and the initial stack pointer. """ @@ -576,12 +532,6 @@ def write_instructions(self) -> None: self.out.emit(f"PREDICT({prediction});") self.out.emit(f"DISPATCH();") - # Write and count super-instructions - n_supers = 0 - for sup in self.super_instrs.values(): - n_supers += 1 - self.write_super(sup) - # Write and count macro instructions n_macros = 0 for macro in self.macro_instrs.values(): @@ -589,27 +539,14 @@ def write_instructions(self) -> None: self.write_macro(macro) print( - f"Wrote {n_instrs} instructions, {n_supers} supers, " + f"Wrote {n_instrs} instructions " f"and {n_macros} macros to {self.output_filename}", file=sys.stderr, ) - def write_super(self, sup: SuperInstruction) -> None: - """Write code for a super-instruction.""" - with self.wrap_super_or_macro(sup): - first = True - for comp in sup.parts: - if not first: - self.out.emit("NEXTOPARG();") - self.out.emit("next_instr++;") - first = False - comp.write_body(self.out, 0) - if comp.instr.cache_offset: - self.out.emit(f"next_instr += {comp.instr.cache_offset};") - def write_macro(self, mac: MacroInstruction) -> None: """Write code for a macro instruction.""" - with self.wrap_super_or_macro(mac): + with self.wrap_macro(mac): cache_adjust = 0 for part in mac.parts: match part: @@ -623,8 +560,8 @@ def write_macro(self, mac: MacroInstruction) -> None: self.out.emit(f"next_instr += {cache_adjust};") @contextlib.contextmanager - def wrap_super_or_macro(self, up: SuperOrMacroInstruction): - """Shared boilerplate for super- and macro instructions.""" + def wrap_macro(self, up: MacroInstruction): + """Boilerplate for macro instructions.""" # TODO: Somewhere (where?) make it so that if one instruction # has an output that is input to another, and the variable names # and types match and don't conflict with other instructions, diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index d802c733dfd10c..cf5f29f28c592d 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -99,12 +99,6 @@ class InstDef(Node): block: Block -@dataclass -class Super(Node): - name: str - ops: list[OpName] - - @dataclass class Macro(Node): name: str @@ -120,11 +114,9 @@ class Family(Node): class Parser(PLexer): @contextual - def definition(self) -> InstDef | Super | Macro | Family | None: + def definition(self) -> InstDef | Macro | Family | None: if inst := self.inst_def(): return inst - if super := self.super_def(): - return super if macro := self.macro_def(): return macro if family := self.family_def(): @@ -224,18 +216,6 @@ def stack_effect(self) -> StackEffect | None: type = self.require(lx.IDENTIFIER).text return StackEffect(tkn.text, type) - @contextual - def super_def(self) -> Super | None: - if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "super": - if self.expect(lx.LPAREN): - if tkn := self.expect(lx.IDENTIFIER): - if self.expect(lx.RPAREN): - if self.expect(lx.EQUALS): - if ops := self.ops(): - self.require(lx.SEMI) - res = Super(tkn.text, ops) - return res - def ops(self) -> list[OpName] | None: if op := self.op(): ops = [op]