diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 10457afc180a00..2f891a676d2b54 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -14,7 +14,7 @@ typedef struct { typedef struct _PyExecutorObject { PyObject_VAR_HEAD /* WARNING: execute consumes a reference to self. This is necessary to allow executors to tail call into each other. */ - struct _PyInterpreterFrame *(*execute)(struct _PyExecutorObject *self, struct _PyInterpreterFrame *frame, PyObject **stack_pointer); + _Py_CODEUNIT *(*execute)(struct _PyExecutorObject *self, struct _PyInterpreterFrame *frame, PyObject **stack_pointer); _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ /* Data needed by the executor goes here, but is opaque to the VM */ } _PyExecutorObject; @@ -40,7 +40,7 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); -struct _PyInterpreterFrame * +_Py_CODEUNIT * _PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer); extern _PyOptimizerObject _PyOptimizer_Default; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index fb5c0465a10021..d0d452f1ef5dc5 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1372,7 +1372,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [JUMP_BACKWARD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG }, [JUMP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [JUMP_NO_INTERRUPT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG }, + [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [IS_NONE] = { true, INSTR_FMT_IX, 0 }, diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 249f5c010e0092..4e664632ec73dc 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -23,7 +23,7 @@ typedef struct { _PyUOpInstruction trace[1]; } _PyUOpExecutorObject; -_PyInterpreterFrame *_PyUopExecute( +_Py_CODEUNIT *_PyUopExecute( _PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer); diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-07-04-50-19.gh-issue-108866.xbJ-9a.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-04-50-19.gh-issue-108866.xbJ-9a.rst new file mode 100644 index 00000000000000..96606924d4a3ec --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-04-50-19.gh-issue-108866.xbJ-9a.rst @@ -0,0 +1,3 @@ +Change the API and contract of ``_PyExecutorObject`` to return the +next_instr pointer, instead of the frame, and to always execute at least one +instruction. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8820b52774671b..354cffca27333f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2218,23 +2218,25 @@ dummy_func( JUMPBY(1-oparg); #if ENABLE_SPECIALIZATION here[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - if (here[1].cache > tstate->interp->optimizer_backedge_threshold && - // Double-check that the opcode isn't instrumented or something: - here->op.code == JUMP_BACKWARD && - // _PyOptimizer_BackEdge is going to change frame->prev_instr, - // which breaks line event calculations: - next_instr->op.code != INSTRUMENTED_LINE - ) - { + if (here[1].cache > tstate->interp->optimizer_backedge_threshold) { + /* We should not be here if the code has been instrumented, + * or already has an attached executor */ + assert(here->op.code != ENTER_EXECUTOR); + assert(here->op.code != INSTRUMENTED_JUMP_BACKWARD); OBJECT_STAT_INC(optimization_attempts); - frame = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer); - if (frame == NULL) { - frame = tstate->current_frame; + _Py_CODEUNIT *src = here; + while(oparg > 255) { + oparg >>= 8; + src--; + assert(src->op.code == EXTENDED_ARG); + } + next_instr = _PyOptimizer_BackEdge(frame, src, next_instr, stack_pointer); + frame = tstate->current_frame; + if (next_instr == NULL) { goto resume_with_error; } - assert(frame == tstate->current_frame); + stack_pointer = _PyFrame_GetStackPointer(frame); here[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) -1); - goto resume_frame; } #endif /* ENABLE_SPECIALIZATION */ } @@ -2251,19 +2253,15 @@ dummy_func( inst(ENTER_EXECUTOR, (--)) { CHECK_EVAL_BREAKER(); - PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; - int original_oparg = executor->vm_data.oparg | (oparg & 0xfffff00); - JUMPBY(1-original_oparg); - frame->prev_instr = next_instr - 1; Py_INCREF(executor); - frame = executor->execute(executor, frame, stack_pointer); - if (frame == NULL) { - frame = tstate->current_frame; + next_instr = executor->execute(executor, frame, stack_pointer); + frame = tstate->current_frame; + if (next_instr == NULL) { goto resume_with_error; } - goto resume_frame; + stack_pointer = _PyFrame_GetStackPointer(frame); } inst(POP_JUMP_IF_FALSE, (cond -- )) { @@ -3821,10 +3819,9 @@ dummy_func( } op(EXIT_TRACE, (--)) { - frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); - return frame; + return frame->prev_instr; } op(INSERT, (unused[oparg], top -- top, unused[oparg])) { diff --git a/Python/executor.c b/Python/executor.c index ac9104223da8ff..c37f5ffab4ea09 100644 --- a/Python/executor.c +++ b/Python/executor.c @@ -34,7 +34,7 @@ #define ENABLE_SPECIALIZATION 0 -_PyInterpreterFrame * +_Py_CODEUNIT * _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer) { #ifdef Py_DEBUG @@ -122,8 +122,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject // On DEOPT_IF we just repeat the last instruction. // This presumes nothing was popped from the stack (nor pushed). DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand); - frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); - return frame; + return frame->prev_instr; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index f4c526a5a8c0a2..0edb6c0d17af5c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2874,10 +2874,9 @@ } case EXIT_TRACE: { - frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); - return frame; + return frame->prev_instr; break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 84f83db128ea50..8e02485532d29b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2966,23 +2966,25 @@ JUMPBY(1-oparg); #if ENABLE_SPECIALIZATION here[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - if (here[1].cache > tstate->interp->optimizer_backedge_threshold && - // Double-check that the opcode isn't instrumented or something: - here->op.code == JUMP_BACKWARD && - // _PyOptimizer_BackEdge is going to change frame->prev_instr, - // which breaks line event calculations: - next_instr->op.code != INSTRUMENTED_LINE - ) - { + if (here[1].cache > tstate->interp->optimizer_backedge_threshold) { + /* We should not be here if the code has been instrumented, + * or already has an attached executor */ + assert(here->op.code != ENTER_EXECUTOR); + assert(here->op.code != INSTRUMENTED_JUMP_BACKWARD); OBJECT_STAT_INC(optimization_attempts); - frame = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer); - if (frame == NULL) { - frame = tstate->current_frame; + _Py_CODEUNIT *src = here; + while(oparg > 255) { + oparg >>= 8; + src--; + assert(src->op.code == EXTENDED_ARG); + } + next_instr = _PyOptimizer_BackEdge(frame, src, next_instr, stack_pointer); + frame = tstate->current_frame; + if (next_instr == NULL) { goto resume_with_error; } - assert(frame == tstate->current_frame); + stack_pointer = _PyFrame_GetStackPointer(frame); here[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) -1); - goto resume_frame; } #endif /* ENABLE_SPECIALIZATION */ DISPATCH(); @@ -2990,19 +2992,16 @@ TARGET(ENTER_EXECUTOR) { CHECK_EVAL_BREAKER(); - PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; - int original_oparg = executor->vm_data.oparg | (oparg & 0xfffff00); - JUMPBY(1-original_oparg); - frame->prev_instr = next_instr - 1; Py_INCREF(executor); - frame = executor->execute(executor, frame, stack_pointer); - if (frame == NULL) { - frame = tstate->current_frame; + next_instr = executor->execute(executor, frame, stack_pointer); + frame = tstate->current_frame; + if (next_instr == NULL) { goto resume_with_error; } - goto resume_frame; + stack_pointer = _PyFrame_GetStackPointer(frame); + DISPATCH(); } TARGET(POP_JUMP_IF_FALSE) { diff --git a/Python/optimizer.c b/Python/optimizer.c index c6d0f9e5bf22d7..9e57316115303a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -153,10 +153,10 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer) Py_DECREF(old); } -_PyInterpreterFrame * +_Py_CODEUNIT * _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer) { - assert(src->op.code == JUMP_BACKWARD); + assert(src->op.code == EXTENDED_ARG || src->op.code == JUMP_BACKWARD); PyCodeObject *code = (PyCodeObject *)frame->f_executable; assert(PyCode_Check(code)); PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -165,7 +165,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI } _PyOptimizerObject *opt = interp->optimizer; _PyExecutorObject *executor = NULL; - int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame))); + int err = opt->optimize(opt, code, src, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame))); if (err <= 0) { assert(executor == NULL); if (err < 0) { @@ -186,13 +186,11 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI goto jump_to_destination; } insert_executor(code, src, index, executor); - assert(frame->prev_instr == src); - frame->prev_instr = dest - 1; return executor->execute(executor, frame, stack_pointer); jump_to_destination: frame->prev_instr = dest - 1; _PyFrame_SetStackPointer(frame, stack_pointer); - return frame; + return dest; } _PyExecutorObject * @@ -241,14 +239,13 @@ static PyTypeObject CounterExecutor_Type = { .tp_dealloc = (destructor)counter_dealloc, }; -static _PyInterpreterFrame * +static _Py_CODEUNIT * counter_execute(_PyExecutorObject *self, _PyInterpreterFrame *frame, PyObject **stack_pointer) { ((_PyCounterExecutorObject *)self)->optimizer->count++; _PyFrame_SetStackPointer(frame, stack_pointer); - frame->prev_instr = ((_PyCounterExecutorObject *)self)->next_instr - 1; Py_DECREF(self); - return frame; + return ((_PyCounterExecutorObject *)self)->next_instr; } static int @@ -266,8 +263,14 @@ counter_optimize( } executor->executor.execute = counter_execute; Py_INCREF(self); + int oparg = instr->op.arg; + while (instr->op.code == EXTENDED_ARG) { + instr++; + oparg = (oparg << 8) | instr->op.arg; + } + assert(instr->op.code == JUMP_BACKWARD); executor->optimizer = (_PyCounterOptimizerObject *)self; - executor->next_instr = instr; + executor->next_instr = instr + 2 - oparg; *exec_ptr = (_PyExecutorObject *)executor; return 1; } @@ -880,6 +883,14 @@ uop_optimize( _PyExecutorObject **exec_ptr, int curr_stackentries) { + /* Do backwards jump before handing to trace generation */ + int oparg = instr->op.arg; + while (instr->op.code == EXTENDED_ARG) { + instr++; + oparg = (oparg << 8) | instr->op.arg; + } + assert(instr->op.code == JUMP_BACKWARD); + instr += 2 - oparg; _PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH]; int trace_length = translate_bytecode_to_trace(code, instr, trace, _Py_UOP_MAX_TRACE_LENGTH); if (trace_length <= 0) {