From a08909dc765156a81adc296457f146909c068102 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 14:27:54 -0800 Subject: [PATCH 01/14] Add executor_cases.c.h dependency for ceval.o --- Makefile.pre.in | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.pre.in b/Makefile.pre.in index 2174ec3ac56158..3d766425abba34 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1609,6 +1609,7 @@ Python/ceval.o: \ $(srcdir)/Python/ceval_macros.h \ $(srcdir)/Python/condvar.h \ $(srcdir)/Python/generated_cases.c.h \ + $(srcdir)/Python/executor_cases.c.h \ $(srcdir)/Python/opcode_targets.h Python/flowgraph.o: \ From 4c2914bff6bd7b133ac5016fb2f2342a21a8871e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 10:46:06 -0800 Subject: [PATCH 02/14] Clean up flags.py --- Tools/cases_generator/flags.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 0066c9e74512c3..808c9e82bbce07 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -53,7 +53,7 @@ def makes_escaping_api_call(instr: parsing.InstDef) -> bool: if "CALL_INTRINSIC" in instr.name: - return True; + return True tkns = iter(instr.tokens) for tkn in tkns: if tkn.kind != lx.IDENTIFIER: @@ -79,6 +79,7 @@ def makes_escaping_api_call(instr: parsing.InstDef) -> bool: return True return False + @dataclasses.dataclass class InstructionFlags: """Construct and manipulate instruction flags""" @@ -124,9 +125,7 @@ def fromInstruction(instr: parsing.InstDef) -> "InstructionFlags": or variable_used(instr, "exception_unwind") or variable_used(instr, "resume_with_error") ), - HAS_ESCAPES_FLAG=( - makes_escaping_api_call(instr) - ), + HAS_ESCAPES_FLAG=makes_escaping_api_call(instr), ) @staticmethod From 053a0a22936e8885333751546a1d9e100b7b6486 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 10:47:50 -0800 Subject: [PATCH 03/14] Clean up parsing.py --- Tools/cases_generator/parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 49459be68ae5e8..d36bd52b022ea9 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -105,7 +105,7 @@ class OpName(Node): @dataclass class InstHeader(Node): - annotations : list[str] + annotations: list[str] kind: Literal["inst", "op"] name: str inputs: list[InputEffect] @@ -114,7 +114,7 @@ class InstHeader(Node): @dataclass class InstDef(Node): - annotations : list[str] + annotations: list[str] kind: Literal["inst", "op"] name: str inputs: list[InputEffect] From b838435c80dd6eb33c0d3e83bb11f3cd14dab66a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 15:32:13 -0800 Subject: [PATCH 04/14] Add back printing optimized uops --- Python/optimizer.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index e14ad89bbe2921..5d1ef8a683c250 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -325,7 +325,8 @@ uop_dealloc(_PyUOpExecutorObject *self) { } static const char * -uop_name(int index) { +uop_name(int index) +{ if (index <= MAX_REAL_OPCODE) { return _PyOpcode_OpName[index]; } @@ -832,6 +833,24 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) assert(dest == -1); executor->base.execute = _PyUopExecute; _Py_ExecutorInit((_PyExecutorObject *)executor, dependencies); +#ifdef Py_DEBUG + char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); + int lltrace = 0; + if (python_lltrace != NULL && *python_lltrace >= '0') { + lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that + } + if (lltrace >= 2) { + printf("Optimized executor (length %d):\n", length); + for (int i = 0; i < length; i++) { + printf("%4d %s(%d, %d, %" PRIu64 ")\n", + i, + uop_name(executor->trace[i].opcode), + executor->trace[i].oparg, + executor->trace[i].target, + executor->trace[i].operand); + } + } +#endif return (_PyExecutorObject *)executor; } From b28effa9f7b43ed892cc31bb2872708d64ec0588 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 15:37:24 -0800 Subject: [PATCH 05/14] Hacky way to make FOR_ITER a viable uop --- Include/internal/pycore_opcode_metadata.h | 86 +++++++++++++---------- Python/abstract_interp_cases.c.h | 10 +++ Python/bytecodes.c | 29 +++++++- Python/executor_cases.c.h | 49 +++++++++++++ Python/optimizer.c | 1 + Tools/cases_generator/flags.py | 2 +- Tools/cases_generator/generate_cases.py | 2 +- Tools/cases_generator/instructions.py | 2 +- 8 files changed, 137 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 4d98b23df5d927..1442350411c90a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -81,45 +81,46 @@ #define _IS_NONE 353 #define _SPECIALIZE_FOR_ITER 354 #define _FOR_ITER 355 -#define _ITER_CHECK_LIST 356 -#define _ITER_JUMP_LIST 357 -#define _GUARD_NOT_EXHAUSTED_LIST 358 -#define _ITER_NEXT_LIST 359 -#define _ITER_CHECK_TUPLE 360 -#define _ITER_JUMP_TUPLE 361 -#define _GUARD_NOT_EXHAUSTED_TUPLE 362 -#define _ITER_NEXT_TUPLE 363 -#define _ITER_CHECK_RANGE 364 -#define _ITER_JUMP_RANGE 365 -#define _GUARD_NOT_EXHAUSTED_RANGE 366 -#define _ITER_NEXT_RANGE 367 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 368 -#define _GUARD_KEYS_VERSION 369 -#define _LOAD_ATTR_METHOD_WITH_VALUES 370 -#define _LOAD_ATTR_METHOD_NO_DICT 371 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 372 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 373 -#define _CHECK_ATTR_METHOD_LAZY_DICT 374 -#define _LOAD_ATTR_METHOD_LAZY_DICT 375 -#define _SPECIALIZE_CALL 376 -#define _CALL 377 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 378 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 379 -#define _CHECK_PEP_523 380 -#define _CHECK_FUNCTION_EXACT_ARGS 381 -#define _CHECK_STACK_SPACE 382 -#define _INIT_CALL_PY_EXACT_ARGS 383 -#define _PUSH_FRAME 384 -#define _SPECIALIZE_BINARY_OP 385 -#define _BINARY_OP 386 -#define _GUARD_IS_TRUE_POP 387 -#define _GUARD_IS_FALSE_POP 388 -#define _GUARD_IS_NONE_POP 389 -#define _GUARD_IS_NOT_NONE_POP 390 -#define _JUMP_TO_TOP 391 -#define _SAVE_RETURN_OFFSET 392 -#define _INSERT 393 -#define _CHECK_VALIDITY 394 +#define _FOR_ITER_TIER_TWO 356 +#define _ITER_CHECK_LIST 357 +#define _ITER_JUMP_LIST 358 +#define _GUARD_NOT_EXHAUSTED_LIST 359 +#define _ITER_NEXT_LIST 360 +#define _ITER_CHECK_TUPLE 361 +#define _ITER_JUMP_TUPLE 362 +#define _GUARD_NOT_EXHAUSTED_TUPLE 363 +#define _ITER_NEXT_TUPLE 364 +#define _ITER_CHECK_RANGE 365 +#define _ITER_JUMP_RANGE 366 +#define _GUARD_NOT_EXHAUSTED_RANGE 367 +#define _ITER_NEXT_RANGE 368 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 369 +#define _GUARD_KEYS_VERSION 370 +#define _LOAD_ATTR_METHOD_WITH_VALUES 371 +#define _LOAD_ATTR_METHOD_NO_DICT 372 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 373 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 374 +#define _CHECK_ATTR_METHOD_LAZY_DICT 375 +#define _LOAD_ATTR_METHOD_LAZY_DICT 376 +#define _SPECIALIZE_CALL 377 +#define _CALL 378 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 379 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 380 +#define _CHECK_PEP_523 381 +#define _CHECK_FUNCTION_EXACT_ARGS 382 +#define _CHECK_STACK_SPACE 383 +#define _INIT_CALL_PY_EXACT_ARGS 384 +#define _PUSH_FRAME 385 +#define _SPECIALIZE_BINARY_OP 386 +#define _BINARY_OP 387 +#define _GUARD_IS_TRUE_POP 388 +#define _GUARD_IS_FALSE_POP 389 +#define _GUARD_IS_NONE_POP 390 +#define _GUARD_IS_NOT_NONE_POP 391 +#define _JUMP_TO_TOP 392 +#define _SAVE_RETURN_OFFSET 393 +#define _INSERT 394 +#define _CHECK_VALIDITY 395 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -543,6 +544,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 1; + case _FOR_ITER_TIER_TWO: + return 1; case FOR_ITER: return 1; case INSTRUMENTED_FOR_ITER: @@ -1181,6 +1184,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 2; + case _FOR_ITER_TIER_TWO: + return 2; case FOR_ITER: return 2; case INSTRUMENTED_FOR_ITER: @@ -1676,6 +1681,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, @@ -1906,6 +1912,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } }, [GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } }, [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, + [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, 0, 0 } } }, [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 0, 0 }, { _ITER_NEXT_LIST, 0, 0 } } }, [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 0, 0 }, { _ITER_NEXT_TUPLE, 0, 0 } } }, [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 0, 0 }, { _ITER_NEXT_RANGE, 0, 0 } } }, @@ -2005,6 +2012,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_IS_NONE] = "_IS_NONE", [_SPECIALIZE_FOR_ITER] = "_SPECIALIZE_FOR_ITER", [_FOR_ITER] = "_FOR_ITER", + [_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO", [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", [_ITER_JUMP_LIST] = "_ITER_JUMP_LIST", [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index a2f6aa8def8f69..28338f53ea7fb9 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -242,6 +242,10 @@ break; } + case _SPECIALIZE_UNPACK_SEQUENCE: { + break; + } + case _UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); @@ -624,6 +628,12 @@ break; } + case _FOR_ITER_TIER_TWO: { + STACK_GROW(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + case _ITER_CHECK_LIST: { break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8a7dcb8416eb8c..da58cb75e607ed 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2368,7 +2368,7 @@ dummy_func( goto enter_tier_one; } - replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { + replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { assert(PyBool_Check(cond)); int flag = Py_IsFalse(cond); #if ENABLE_SPECIALIZATION @@ -2512,7 +2512,7 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION */ } - op(_FOR_ITER, (iter -- iter, next)) { + replaced op(_FOR_ITER, (iter -- iter, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ next = (*Py_TYPE(iter)->tp_iternext)(iter); if (next == NULL) { @@ -2535,6 +2535,31 @@ dummy_func( // Common case: no jump, leave it to the code generator } + op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) { + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* HACK: Emulate DEOPT_IF to jump over END_FOR */ + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + assert(frame->instr_ptr[-1].op.code == END_FOR || + frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); + Py_DECREF(current_executor); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + goto enter_tier_one; + } + // Common case: no jump, leave it to the code generator + } + macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER; inst(INSTRUMENTED_FOR_ITER, (unused/1 -- )) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4e29fb9f0fa93d..4f2f73ee76d5ef 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -866,6 +866,24 @@ break; } + case _SPECIALIZE_UNPACK_SEQUENCE: { + PyObject *seq; + seq = stack_pointer[-1]; + uint16_t counter = (uint16_t)operand; + #if ENABLE_SPECIALIZATION + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + next_instr = this_instr; + _Py_Specialize_UnpackSequence(seq, next_instr, oparg); + DISPATCH_SAME_OPARG(); + } + STAT_INC(UNPACK_SEQUENCE, deferred); + DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + #endif /* ENABLE_SPECIALIZATION */ + (void)seq; + (void)counter; + break; + } + case _UNPACK_SEQUENCE: { PyObject *seq; seq = stack_pointer[-1]; @@ -2101,6 +2119,37 @@ break; } + case _FOR_ITER_TIER_TWO: { + PyObject *iter; + PyObject *next; + iter = stack_pointer[-1]; + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* HACK: Emulate DEOPT_IF to jump over END_FOR */ + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + assert(frame->instr_ptr[-1].op.code == END_FOR || + frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); + Py_DECREF(current_executor); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + goto enter_tier_one; + } + // Common case: no jump, leave it to the code generator + STACK_GROW(1); + stack_pointer[-1] = next; + break; + } + case _ITER_CHECK_LIST: { PyObject *iter; iter = stack_pointer[-1]; diff --git a/Python/optimizer.c b/Python/optimizer.c index 5d1ef8a683c250..5c9f965aeefa85 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -392,6 +392,7 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = { [_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE, [_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST, [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE, + [_FOR_ITER] = _FOR_ITER_TIER_TWO, }; static const uint16_t diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 808c9e82bbce07..bf76112159e38e 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -175,7 +175,7 @@ def variable_used_unspecialized(node: parsing.Node, name: str) -> bool: tokens: list[lx.Token] = [] skipping = False for i, token in enumerate(node.tokens): - if token.kind == "MACRO": + if token.kind == "CMACRO": text = "".join(token.text.split()) # TODO: Handle nested #if if text == "#if": diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index d1dbfeae8d74f6..ba45e3a625072e 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -658,7 +658,7 @@ def write_macro_expansions( if not part.instr.is_viable_uop() and "replaced" not in part.instr.annotations: # This note just reminds us about macros that cannot # be expanded to Tier 2 uops. It is not an error. - # It is sometimes emitted for macros that have a + # Suppress it using 'replaced op(...)' for macros having # manual translation in translate_bytecode_to_trace() # in Python/optimizer.c. if len(parts) > 1 or part.instr.name != name: diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 9039ac5c6f127e..457221a0e15f75 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -115,7 +115,7 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - if "FRAME" in self.name: + if self.name == "_FOR_ITER_TIER_TWO": dprint = print if self.name == "_EXIT_TRACE": From de8f199a17d2daec020fff97f0661e2a7c0f29ff Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 16:03:13 -0800 Subject: [PATCH 06/14] _SPECIALIZE_UNPACK_SEQUENCE is TIER_ONE_ONLY --- Python/abstract_interp_cases.c.h | 4 ---- Python/bytecodes.c | 1 + Python/executor_cases.c.h | 18 ------------------ Python/generated_cases.c.h | 1 + 4 files changed, 2 insertions(+), 22 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 28338f53ea7fb9..0d7fbe8a39a5d4 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -242,10 +242,6 @@ break; } - case _SPECIALIZE_UNPACK_SEQUENCE: { - break; - } - case _UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index da58cb75e607ed..64e511c7106f2a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1210,6 +1210,7 @@ dummy_func( }; specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) { + TIER_ONE_ONLY #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { next_instr = this_instr; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4f2f73ee76d5ef..0652d7a3ab8b3f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -866,24 +866,6 @@ break; } - case _SPECIALIZE_UNPACK_SEQUENCE: { - PyObject *seq; - seq = stack_pointer[-1]; - uint16_t counter = (uint16_t)operand; - #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { - next_instr = this_instr; - _Py_Specialize_UnpackSequence(seq, next_instr, oparg); - DISPATCH_SAME_OPARG(); - } - STAT_INC(UNPACK_SEQUENCE, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); - #endif /* ENABLE_SPECIALIZATION */ - (void)seq; - (void)counter; - break; - } - case _UNPACK_SEQUENCE: { PyObject *seq; seq = stack_pointer[-1]; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index fe0cbfe6330e51..a74529d88557be 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1731,6 +1731,7 @@ seq = stack_pointer[-1]; { uint16_t counter = read_u16(&this_instr[1].cache); + TIER_ONE_ONLY #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { next_instr = this_instr; From 5c5d8bd5d71e285a950cd40f6c0cb0c3cc575fef Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 16:14:17 -0800 Subject: [PATCH 07/14] NEWS --- .../2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst new file mode 100644 index 00000000000000..b2a34ac735cdeb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst @@ -0,0 +1 @@ +Enable translating unspecialized ``FOR_ITER`` to Tier 2. From 36e9ada4d03af2207babd57bfbfd490ed0f9b5fb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 16 Nov 2023 12:34:16 -0800 Subject: [PATCH 08/14] Double max trace length to 256 --- Include/internal/pycore_uops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 0ecbd2dfd1af73..8ab9aaf4108079 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_frame.h" // _PyInterpreterFrame -#define _Py_UOP_MAX_TRACE_LENGTH 128 +#define _Py_UOP_MAX_TRACE_LENGTH 256 typedef struct { uint16_t opcode; From def1830fad23042b498b21263305a7051790991d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 16 Nov 2023 15:24:02 -0800 Subject: [PATCH 09/14] Move stuff around to suit the JIT branch --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 6 ++---- Python/ceval.c | 3 +-- Python/ceval_macros.h | 2 ++ Python/executor_cases.c.h | 6 ++---- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1442350411c90a..d6e348ae951c55 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1784,7 +1784,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_JUMP_TO_TOP] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG }, [_SET_IP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_SAVE_RETURN_OFFSET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_EXIT_TRACE] = { true, INSTR_FMT_IX, 0 }, + [_EXIT_TRACE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [_INSERT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_CHECK_VALIDITY] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 64e511c7106f2a..20afb1f9197d8c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2550,13 +2550,10 @@ dummy_func( Py_DECREF(iter); STACK_SHRINK(1); /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - _PyFrame_SetStackPointer(frame, stack_pointer); frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; assert(frame->instr_ptr[-1].op.code == END_FOR || frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - Py_DECREF(current_executor); - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - goto enter_tier_one; + goto exit_trace; } // Common case: no jump, leave it to the code generator } @@ -4034,6 +4031,7 @@ dummy_func( op(_EXIT_TRACE, (--)) { TIER_TWO_ONLY + frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); } diff --git a/Python/ceval.c b/Python/ceval.c index d684c72cc9e302..f1add9f8cf17a9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1067,7 +1067,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int UOP_STAT_INC(opcode, miss); frame->return_offset = 0; // Dispatch to frame->instr_ptr _PyFrame_SetStackPointer(frame, stack_pointer); - frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); Py_DECREF(current_executor); // Fall through // Jump here from ENTER_EXECUTOR @@ -1078,7 +1078,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Jump here from _EXIT_TRACE exit_trace: _PyFrame_SetStackPointer(frame, stack_pointer); - frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); Py_DECREF(current_executor); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); goto enter_tier_one; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 546adbe5f438d1..cd6edeb0734a11 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -397,3 +397,5 @@ stack_pointer = _PyFrame_GetStackPointer(frame); #define GOTO_TIER_TWO() goto enter_tier_two; #define GOTO_TIER_ONE() goto exit_trace; + +#define CURRENT_TARGET() (next_uop[-1].target) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0652d7a3ab8b3f..ccc72d3427e7ac 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2118,13 +2118,10 @@ Py_DECREF(iter); STACK_SHRINK(1); /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - _PyFrame_SetStackPointer(frame, stack_pointer); frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; assert(frame->instr_ptr[-1].op.code == END_FOR || frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - Py_DECREF(current_executor); - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - goto enter_tier_one; + goto exit_trace; } // Common case: no jump, leave it to the code generator STACK_GROW(1); @@ -3276,6 +3273,7 @@ case _EXIT_TRACE: { TIER_TWO_ONLY + frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); break; } From 70968183bf7dbc5c487d9a20d426ba997de0fc34 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 12:47:24 -0800 Subject: [PATCH 10/14] Clean up _FOR_ITER_TIER_TWO using DEOPT_IF(true) --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 7 ++----- Python/executor_cases.c.h | 7 ++----- Python/optimizer.c | 6 ++++++ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d6e348ae951c55..1a2c4956849011 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1681,7 +1681,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 20afb1f9197d8c..abc51d82e9409f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2549,11 +2549,8 @@ dummy_func( /* iterator ended normally */ Py_DECREF(iter); STACK_SHRINK(1); - /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; - assert(frame->instr_ptr[-1].op.code == END_FOR || - frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - goto exit_trace; + /* The translator sets the deopt target just past END_FOR */ + DEOPT_IF(true); } // Common case: no jump, leave it to the code generator } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ccc72d3427e7ac..8d13c524794008 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2117,11 +2117,8 @@ /* iterator ended normally */ Py_DECREF(iter); STACK_SHRINK(1); - /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; - assert(frame->instr_ptr[-1].op.code == END_FOR || - frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - goto exit_trace; + /* The translator sets the deopt target just past END_FOR */ + DEOPT_IF(true, _FOR_ITER_TIER_TWO); } // Common case: no jump, leave it to the code generator STACK_GROW(1); diff --git a/Python/optimizer.c b/Python/optimizer.c index 5c9f965aeefa85..2df1d266e67f95 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -621,6 +621,12 @@ translate_bytecode_to_trace( } if (_PyUop_Replacements[uop]) { uop = _PyUop_Replacements[uop]; + if (uop == _FOR_ITER_TIER_TWO) { + target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + _Py_CODEUNIT word = _PyCode_CODE(code)[target-1]; + assert(word.op.code == END_FOR || + word.op.code == INSTRUMENTED_END_FOR); + } } break; case OPARG_CACHE_1: From 58521059780ff091c118f3b89789831fcf822937 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 13:16:47 -0800 Subject: [PATCH 11/14] Add test --- Lib/test/test_capi/test_misc.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index fe5c36c0c0dec9..21a5cd3326d707 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2808,6 +2808,36 @@ def testfunc(n): uops = {opname for opname, _, _ in ex} self.assertIn("_GUARD_IS_FALSE_POP", uops) + def test_for_iter_tier_two(self): + class MyIter: + def __init__(self, n): + self.n = n + def __iter__(self): + return self + def __next__(self): + self.n -= 1 + if self.n < 0: + raise StopIteration + return self.n + + def testfunc(n, m): + x = 0 + for i in range(m): + for j in MyIter(n): + x += 1000*i + j + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + x = testfunc(10, 10) + + self.assertEqual(x, sum(range(10)) * 10010) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_FOR_ITER_TIER_TWO", uops) + if __name__ == "__main__": unittest.main() From 4ac68b3dfee0b632eaa2bfb62e3667799c095b6f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 14:06:17 -0800 Subject: [PATCH 12/14] Revert debug change to is_viable_uop() --- Tools/cases_generator/instructions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 457221a0e15f75..9039ac5c6f127e 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -115,7 +115,7 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - if self.name == "_FOR_ITER_TIER_TWO": + if "FRAME" in self.name: dprint = print if self.name == "_EXIT_TRACE": From 95b1a01ea6f2b39b7bc96d2634b74f27cb153e00 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 14:07:03 -0800 Subject: [PATCH 13/14] Avoid debug-only local variable 'word' --- Python/optimizer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 2df1d266e67f95..4278da3598e4f0 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -623,9 +623,8 @@ translate_bytecode_to_trace( uop = _PyUop_Replacements[uop]; if (uop == _FOR_ITER_TIER_TWO) { target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; - _Py_CODEUNIT word = _PyCode_CODE(code)[target-1]; - assert(word.op.code == END_FOR || - word.op.code == INSTRUMENTED_END_FOR); + assert(_PyCode_CODE(code)[target-1].op.code == END_FOR || + _PyCode_CODE(code)[target-1].op.code == INSTRUMENTED_END_FOR); } } break; From 4c720287aa9910c89a0e7ea4b3ae76224c0f9892 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 14:12:38 -0800 Subject: [PATCH 14/14] Revert changes to _EXIT_TRACE logic --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 1 - Python/ceval.c | 1 + Python/ceval_macros.h | 2 -- Python/executor_cases.c.h | 1 - 5 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1a2c4956849011..4e45725d393479 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1784,7 +1784,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_JUMP_TO_TOP] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG }, [_SET_IP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_SAVE_RETURN_OFFSET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_EXIT_TRACE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [_EXIT_TRACE] = { true, INSTR_FMT_IX, 0 }, [_INSERT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_CHECK_VALIDITY] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index abc51d82e9409f..06baa85589b168 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4028,7 +4028,6 @@ dummy_func( op(_EXIT_TRACE, (--)) { TIER_TWO_ONLY - frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); } diff --git a/Python/ceval.c b/Python/ceval.c index f1add9f8cf17a9..390de32f8e1b15 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1078,6 +1078,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Jump here from _EXIT_TRACE exit_trace: _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); Py_DECREF(current_executor); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); goto enter_tier_one; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index cd6edeb0734a11..546adbe5f438d1 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -397,5 +397,3 @@ stack_pointer = _PyFrame_GetStackPointer(frame); #define GOTO_TIER_TWO() goto enter_tier_two; #define GOTO_TIER_ONE() goto exit_trace; - -#define CURRENT_TARGET() (next_uop[-1].target) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8d13c524794008..ae662b20e4403f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3270,7 +3270,6 @@ case _EXIT_TRACE: { TIER_TWO_ONLY - frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); break; }