From 2ec65dc331054cf68f7a3b740c19db7f64533ff5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 9 Oct 2024 11:37:19 +0100 Subject: [PATCH 01/53] Initial implementation of tagged stackrefs for GIL build. Requires fixes to immortal objects. --- Include/internal/pycore_stackref.h | 97 +++++++++++++++++++++++++----- Include/refcount.h | 93 +++++++++++++++++++++++++--- Python/bytecodes.c | 10 +-- Python/ceval.c | 4 -- Python/ceval_macros.h | 11 ---- Python/executor_cases.c.h | 10 +-- Python/generated_cases.c.h | 10 +-- Python/sysmodule.c | 3 + 8 files changed, 188 insertions(+), 50 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 7d1eb11aa5ecb8..1805eee642c8f6 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -54,12 +54,13 @@ typedef union _PyStackRef { } _PyStackRef; +#ifdef Py_GIL_DISABLED + #define Py_TAG_DEFERRED (1) #define Py_TAG_PTR ((uintptr_t)0) #define Py_TAG_BITS ((uintptr_t)1) -#ifdef Py_GIL_DISABLED static const _PyStackRef PyStackRef_NULL = { .bits = Py_TAG_DEFERRED}; #define PyStackRef_IsNull(stackref) ((stackref).bits == PyStackRef_NULL.bits) @@ -153,36 +154,104 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) return PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(stackref)); } - #else // Py_GIL_DISABLED // With GIL + +#define Py_TAG_BITS 1 +#define Py_TAG_REFCNT 1 +#define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) +#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) + static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; -#define PyStackRef_IsNull(stackref) ((stackref).bits == 0) -#define PyStackRef_True ((_PyStackRef){.bits = (uintptr_t)&_Py_TrueStruct }) -#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) }) -#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) }) -#define PyStackRef_AsPyObjectBorrow(stackref) ((PyObject *)(stackref).bits) +#define PyStackRef_IsNull(ref) ((ref).bits == 0) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) -#define PyStackRef_AsPyObjectSteal(stackref) PyStackRef_AsPyObjectBorrow(stackref) +static inline int +PyStackRef_HasCount(_PyStackRef ref) +{ + return ref.bits & Py_TAG_REFCNT; +} -#define PyStackRef_FromPyObjectSteal(obj) ((_PyStackRef){.bits = ((uintptr_t)(obj))}) +static inline PyObject * +PyStackRef_AsPyObjectBorrow(_PyStackRef ref) +{ + return BITS_TO_PTR_MASKED(ref); +} -#define PyStackRef_FromPyObjectNew(obj) ((_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) }) +static inline PyObject * +PyStackRef_AsPyObjectSteal(_PyStackRef ref) +{ + if (PyStackRef_HasCount(ref)) { + return Py_NewRef(BITS_TO_PTR_MASKED(ref)); + } + else { + return BITS_TO_PTR(ref); + } +} -#define PyStackRef_FromPyObjectImmortal(obj) ((_PyStackRef){ .bits = (uintptr_t)(obj) }) +/* We will want to extend this to a larger set of objects in the future */ +#define _Py_IsDeferrable _Py_IsImmortal -#define PyStackRef_CLOSE(stackref) Py_DECREF(PyStackRef_AsPyObjectBorrow(stackref)) +static inline _PyStackRef +PyStackRef_FromPyObjectSteal(PyObject *obj) +{ + assert(obj != NULL); + unsigned int tag = _Py_IsDeferrable(obj) ? Py_TAG_REFCNT : 0; + _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); + return ref; +} + +static inline _PyStackRef +_PyStackRef_FromPyObjectNew(PyObject *obj) +{ + if (_Py_IsDeferrable(obj)) { + return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; + } + Py_INCREF(obj); + _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; + return ref; +} +#define PyStackRef_FromPyObjectNew(obj) _PyStackRef_FromPyObjectNew(_PyObject_CAST(obj)) + +/* Create a new reference from an object with an embedded reference count */ +static inline _PyStackRef +_PyStackRef_FromPyObjectWithCount(PyObject *obj) +{ + return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT}; +} +#define PyStackRef_FromPyObjectWithCount(obj) _PyStackRef_FromPyObjectWithCount(_PyObject_CAST(obj)) -#define PyStackRef_DUP(stackref) PyStackRef_FromPyObjectSteal(Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref))) +#define PyStackRef_FromPyObjectImmortal PyStackRef_FromPyObjectWithCount +static inline _PyStackRef +PyStackRef_DUP(_PyStackRef ref) +{ + assert(!PyStackRef_IsNull(ref)); + if (!PyStackRef_HasCount(ref)) { + Py_INCREF_MORTAL(BITS_TO_PTR(ref)); + } + return ref; +} + +static inline void +PyStackRef_CLOSE(_PyStackRef ref) +{ + assert(!PyStackRef_IsNull(ref)); + if (!PyStackRef_HasCount(ref)) { + Py_DECREF_MORTAL(BITS_TO_PTR(ref)); + } +} + #endif // Py_GIL_DISABLED // Note: this is a macro because MSVC (Windows) has trouble inlining it. -#define PyStackRef_Is(a, b) ((a).bits == (b).bits) +#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_BITS)) == ((b).bits & (~Py_TAG_BITS))) // Converts a PyStackRef back to a PyObject *, converting the // stackref to a new reference. diff --git a/Include/refcount.h b/Include/refcount.h index 9a4e15065ecab8..82d247da18ffed 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -34,7 +34,7 @@ having all the lower 32 bits set, which will avoid the reference count to go beyond the refcount limit. Immortality checks for reference count decreases will be done by checking the bit sign flag in the lower 32 bits. */ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX) +#define _Py_IMMORTAL_REFCNT ((Py_ssize_t)0xb0000000) #else /* @@ -274,6 +274,32 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) # define Py_INCREF(op) Py_INCREF(_PyObject_CAST(op)) #endif +static inline Py_ALWAYS_INLINE void Py_INCREF_MORTAL(PyObject *op) +{ +#if defined(Py_GIL_DISABLED) + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + uint32_t new_local = local + 1; + assert (new_local != 0); + if (_Py_IsOwnedByCurrentThread(op)) { + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, new_local); + } + else { + _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); + } +#elif SIZEOF_VOID_P > 4 + PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; + PY_UINT32_T new_refcnt = cur_refcnt + 1; + op->ob_refcnt_split[PY_BIG_ENDIAN] = new_refcnt; +#else + op->ob_refcnt++; +#endif + _Py_INCREF_STAT_INC(); +#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) + if (!_Py_IsImmortal(op)) { + _Py_INCREF_IncRefTotal(); + } +#endif +} #if !defined(Py_LIMITED_API) && defined(Py_GIL_DISABLED) // Implements Py_DECREF on objects not owned by the current thread. @@ -302,6 +328,27 @@ static inline void Py_DECREF(PyObject *op) { #define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) #elif defined(Py_GIL_DISABLED) && defined(Py_REF_DEBUG) +static inline void Py_DECREF_MORTAL(const char *filename, int lineno, PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + _Py_DECREF_STAT_INC(); + _Py_DECREF_DecRefTotal(); + if (_Py_IsOwnedByCurrentThread(op)) { + if (local == 0) { + _Py_NegativeRefcount(filename, lineno, op); + } + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefSharedDebug(op, filename, lineno); + } +} +#define Py_DECREF_MORTAL(op) Py_DECREF_MORTAL(__FILE__, __LINE__, _PyObject_CAST(op)) + static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) { uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); @@ -328,6 +375,22 @@ static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) #define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) #elif defined(Py_GIL_DISABLED) +static inline void Py_DECREF_MORTAL(PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + _Py_DECREF_STAT_INC(); + if (_Py_IsOwnedByCurrentThread(op)) { + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefShared(op); + } +} + static inline void Py_DECREF(PyObject *op) { uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); @@ -350,6 +413,15 @@ static inline void Py_DECREF(PyObject *op) #define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) #elif defined(Py_REF_DEBUG) +static inline Py_ALWAYS_INLINE void Py_DECREF_MORTAL(PyObject *op) +{ + _Py_DECREF_STAT_INC(); + _Py_DECREF_DecRefTotal(); + if (--op->ob_refcnt == 0) { + _Py_Dealloc(op); + } +} + static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) { if (op->ob_refcnt <= 0) { @@ -359,15 +431,21 @@ static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) _Py_DECREF_IMMORTAL_STAT_INC(); return; } - _Py_DECREF_STAT_INC(); - _Py_DECREF_DecRefTotal(); + Py_DECREF_MORTAL(op); +} +#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) + +#else +static inline Py_ALWAYS_INLINE void Py_DECREF_MORTAL(PyObject *op) +{ + if (!_Py_IsImmortal(op)) { + _Py_DECREF_STAT_INC(); + } if (--op->ob_refcnt == 0) { _Py_Dealloc(op); } } -#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) -#else static inline Py_ALWAYS_INLINE void Py_DECREF(PyObject *op) { // Non-limited C API and limited C API for Python 3.9 and older access @@ -376,10 +454,7 @@ static inline Py_ALWAYS_INLINE void Py_DECREF(PyObject *op) _Py_DECREF_IMMORTAL_STAT_INC(); return; } - _Py_DECREF_STAT_INC(); - if (--op->ob_refcnt == 0) { - _Py_Dealloc(op); - } + Py_DECREF_MORTAL(op); } #define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) #endif diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 87cca3fc1d373c..d58a78961fcb83 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3627,11 +3627,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *str = PyObject_Str(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(str == NULL, error); + res = PyStackRef_FromPyObjectSteal(str); } macro(CALL_STR_1) = @@ -3648,11 +3649,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *tuple = PySequence_Tuple(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(tuple == NULL, error); + res = PyStackRef_FromPyObjectSteal(tuple); } macro(CALL_TUPLE_1) = diff --git a/Python/ceval.c b/Python/ceval.c index f4e0add3034707..970f4fe627e3f4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1765,7 +1765,6 @@ _PyEvalFramePushAndInit_UnTagged(PyThreadState *tstate, _PyStackRef func, PyObject *locals, PyObject *const* args, size_t argcount, PyObject *kwnames, _PyInterpreterFrame *previous) { -#if defined(Py_GIL_DISABLED) size_t kw_count = kwnames == NULL ? 0 : PyTuple_GET_SIZE(kwnames); size_t total_argcount = argcount + kw_count; _PyStackRef *tagged_args_buffer = PyMem_Malloc(sizeof(_PyStackRef) * total_argcount); @@ -1782,9 +1781,6 @@ _PyEvalFramePushAndInit_UnTagged(PyThreadState *tstate, _PyStackRef func, _PyInterpreterFrame *res = _PyEvalFramePushAndInit(tstate, func, locals, (_PyStackRef const *)tagged_args_buffer, argcount, kwnames, previous); PyMem_Free(tagged_args_buffer); return res; -#else - return _PyEvalFramePushAndInit(tstate, func, locals, (_PyStackRef const *)args, argcount, kwnames, previous); -#endif } /* Same as _PyEvalFramePushAndInit but takes an args tuple and kwargs dict. diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index e0e9cc156ed62f..578e7e569d92f7 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -442,25 +442,14 @@ do { \ /* How much scratch space to give stackref to PyObject* conversion. */ #define MAX_STACKREF_SCRATCH 10 -#ifdef Py_GIL_DISABLED #define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \ /* +1 because vectorcall might use -1 to write self */ \ PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \ PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1); -#else -#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \ - PyObject **NAME = (PyObject **)ARGS; \ - assert(NAME != NULL); -#endif -#ifdef Py_GIL_DISABLED #define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \ /* +1 because we +1 previously */ \ _PyObjectArray_Free(NAME - 1, NAME##_temp); -#else -#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \ - (void)(NAME); -#endif #ifdef Py_GIL_DISABLED #define CONVERSION_FAILED(NAME) ((NAME) == NULL) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 57e15f33ca7703..6af50aa6642437 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4297,10 +4297,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *str = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (str == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(str); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); @@ -4329,10 +4330,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *tuple = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (tuple == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(tuple); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7656ce6bb7e313..4ca0a72f37ea37 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2976,10 +2976,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *str = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (str == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(str); } // _CHECK_PERIODIC { @@ -3026,10 +3027,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *tuple = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (tuple == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(tuple); } // _CHECK_PERIODIC { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index ac343a8048e008..224121dcaf1f27 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1965,6 +1965,9 @@ static Py_ssize_t sys_getrefcount_impl(PyObject *module, PyObject *object) /*[clinic end generated code: output=5fd477f2264b85b2 input=bf474efd50a21535]*/ { + if (_Py_IsImmortal(object)) { + return _Py_IMMORTAL_REFCNT; + } return Py_REFCNT(object); } From ee7f3a60cd071f2d003921b0ad8da2e291727bf8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 11 Oct 2024 09:47:05 +0100 Subject: [PATCH 02/53] Streamline PyStackRef_XCLOSE and PyStackRef_CLEAR. --- Include/internal/pycore_stackref.h | 58 +++++++++++++++++++----------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 1805eee642c8f6..f03ddb26177292 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -154,6 +154,24 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) return PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(stackref)); } +#define PyStackRef_XCLOSE(stackref) \ + do { \ + _PyStackRef _tmp = (stackref); \ + if (!PyStackRef_IsNull(_tmp)) { \ + PyStackRef_CLOSE(_tmp); \ + } \ + } while (0); + +#define PyStackRef_CLEAR(op) \ + do { \ + _PyStackRef *_tmp_op_ptr = &(op); \ + _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ + if (!PyStackRef_IsNull(_tmp_old_op)) { \ + *_tmp_op_ptr = PyStackRef_NULL; \ + PyStackRef_CLOSE(_tmp_old_op); \ + } \ + } while (0) + #else // Py_GIL_DISABLED // With GIL @@ -163,9 +181,10 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) -static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; +#define PyStackRef_NULL_BITS Py_TAG_REFCNT +static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; -#define PyStackRef_IsNull(ref) ((ref).bits == 0) +#define PyStackRef_IsNull(ref) ((ref).bits == PyStackRef_NULL_BITS) #define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) #define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) #define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) @@ -247,6 +266,23 @@ PyStackRef_CLOSE(_PyStackRef ref) } } +static inline void +PyStackRef_XCLOSE(_PyStackRef ref) +{ + if (!PyStackRef_HasCount(ref)) { + assert(!PyStackRef_IsNull(ref)); + Py_DECREF_MORTAL(BITS_TO_PTR(ref)); + } +} + +#define PyStackRef_CLEAR(REF) \ + do { \ + _PyStackRef *_tmp_op_ptr = &(REF); \ + _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ + *_tmp_op_ptr = PyStackRef_NULL; \ + PyStackRef_XCLOSE(_tmp_old_op); \ + } while (0) + #endif // Py_GIL_DISABLED // Note: this is a macro because MSVC (Windows) has trouble inlining it. @@ -259,24 +295,6 @@ PyStackRef_CLOSE(_PyStackRef ref) #define PyStackRef_TYPE(stackref) Py_TYPE(PyStackRef_AsPyObjectBorrow(stackref)) -#define PyStackRef_CLEAR(op) \ - do { \ - _PyStackRef *_tmp_op_ptr = &(op); \ - _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ - if (!PyStackRef_IsNull(_tmp_old_op)) { \ - *_tmp_op_ptr = PyStackRef_NULL; \ - PyStackRef_CLOSE(_tmp_old_op); \ - } \ - } while (0) - -#define PyStackRef_XCLOSE(stackref) \ - do { \ - _PyStackRef _tmp = (stackref); \ - if (!PyStackRef_IsNull(_tmp)) { \ - PyStackRef_CLOSE(_tmp); \ - } \ - } while (0); - // StackRef type checks From 21d7e87115930763f73fb4ca15867150e3f5d217 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 11 Oct 2024 10:56:18 +0100 Subject: [PATCH 03/53] Fix error handling with not stackref outputs in code generator --- Include/internal/pycore_opcode_metadata.h | 8 ++-- Include/internal/pycore_stackref.h | 1 + Include/internal/pycore_uop_metadata.h | 4 +- Lib/test/test_generated_cases.py | 3 +- Python/bytecodes.c | 13 ++---- Python/ceval.c | 4 ++ Python/executor_cases.c.h | 12 +++-- Python/generated_cases.c.h | 51 +++++++++++++--------- Tools/cases_generator/generators_common.py | 2 +- 9 files changed, 52 insertions(+), 46 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 8fec45b1e8d5c3..2dcc2825bd47ea 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1037,7 +1037,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BUILD_STRING] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [CACHE] = { true, INSTR_FMT_IX, 0 }, - [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1050,9 +1050,9 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [CALL_INTRINSIC_2] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ISINSTANCE] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1107,7 +1107,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_FROM] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, 0 }, [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index f03ddb26177292..05514ef6e2e6fc 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -269,6 +269,7 @@ PyStackRef_CLOSE(_PyStackRef ref) static inline void PyStackRef_XCLOSE(_PyStackRef ref) { + assert(ref.bits != 0); if (!PyStackRef_HasCount(ref)) { assert(!PyStackRef_IsNull(ref)); Py_DECREF_MORTAL(BITS_TO_PTR(ref)); diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index fd41e9a5fe862b..fe89d8d860e5d3 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -227,7 +227,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_STR_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_AND_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, - [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_EXIT_INIT_CHECK] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_CLASS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -241,7 +241,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_METHOD_DESCRIPTOR_NOARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_FAST] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MAYBE_EXPAND_METHOD_KW] = HAS_ARG_FLAG, - [_PY_FRAME_KW] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_PY_FRAME_KW] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_FUNCTION_VERSION_KW] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_CHECK_METHOD_VERSION_KW] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_EXPAND_METHOD_KW] = HAS_ARG_FLAG, diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index cd3718b80612bd..819b77bf999038 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1255,8 +1255,7 @@ def test_push_then_error(self): b = 1; if (cond) { stack_pointer[0] = a; - stack_pointer[1] = b; - stack_pointer += 2; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); goto error; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6ffeaa4da5433d..b3450bdd919fba 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3325,13 +3325,11 @@ dummy_func( tstate, callable[0], locals, args, total_args, NULL, frame ); + ERROR_IF(new_frame == NULL, error); // Manipulate stack directly since we leave using DISPATCH_INLINED(). - SYNC_SP(); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - if (new_frame == NULL) { - ERROR_NO_POP(); - } + SYNC_SP(); frame->return_offset = (uint16_t)(next_instr - this_instr); DISPATCH_INLINED(new_frame); } @@ -3695,10 +3693,9 @@ dummy_func( DEAD(self); init_frame = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); - SYNC_SP(); if (init_frame == NULL) { _PyEval_FrameClearAndPop(tstate, shim); - ERROR_NO_POP(); + ERROR_IF(true, error); } frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. @@ -4258,12 +4255,10 @@ dummy_func( args, positional_args, kwnames_o, frame ); PyStackRef_CLOSE(kwnames); + ERROR_IF(new_frame == NULL, error); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. SYNC_SP(); - if (new_frame == NULL) { - ERROR_NO_POP(); - } } op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable[1], self_or_null[1], unused[oparg], kwnames -- callable[1], self_or_null[1], unused[oparg], kwnames)) { diff --git a/Python/ceval.c b/Python/ceval.c index 970f4fe627e3f4..75e7cfe7570cea 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -146,6 +146,10 @@ dump_stack(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer) if (ptr != stack_base) { printf(", "); } + if (PyStackRef_IsNull(*ptr)) { + printf(""); + continue; + } PyObject *obj = PyStackRef_AsPyObjectBorrow(*ptr); if (obj == NULL) { printf(""); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b4c497709c8941..4b9a8a62abddf6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4408,18 +4408,18 @@ init_frame = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; - assert(WITHIN_STACK_BOUNDS()); if (init_frame == NULL) { _PyEval_FrameClearAndPop(tstate, shim); - JUMP_TO_ERROR(); + if (true) JUMP_TO_ERROR(); } frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, * as it will be checked after start_frame */ tstate->py_recursion_remaining--; + stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -5096,14 +5096,12 @@ ); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(kwnames); + if (new_frame == NULL) JUMP_TO_ERROR(); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - JUMP_TO_ERROR(); - } break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 127586198ea31d..8ded9d63b153bd 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -925,14 +925,16 @@ args, total_args, NULL, frame ); stack_pointer = _PyFrame_GetStackPointer(frame); - // Manipulate stack directly since we leave using DISPATCH_INLINED(). - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); - // The frame has stolen all the arguments from the stack, - // so there is no need to clean them up. if (new_frame == NULL) { + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); goto error; } + // Manipulate stack directly since we leave using DISPATCH_INLINED(). + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); frame->return_offset = (uint16_t)(next_instr - this_instr); DISPATCH_INLINED(new_frame); } @@ -1072,12 +1074,13 @@ init_frame = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; - assert(WITHIN_STACK_BOUNDS()); if (init_frame == NULL) { _PyEval_FrameClearAndPop(tstate, shim); - goto error; + if (true) { + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } } frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. @@ -1092,7 +1095,7 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; + stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); @@ -2092,14 +2095,16 @@ ); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(kwnames); + if (new_frame == NULL) { + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - goto error; - } } // _SAVE_RETURN_OFFSET { @@ -2270,14 +2275,16 @@ ); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(kwnames); + if (new_frame == NULL) { + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - goto error; - } } // _SAVE_RETURN_OFFSET { @@ -4438,14 +4445,16 @@ args, total_args, NULL, frame ); stack_pointer = _PyFrame_GetStackPointer(frame); - // Manipulate stack directly since we leave using DISPATCH_INLINED(). - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); - // The frame has stolen all the arguments from the stack, - // so there is no need to clean them up. if (new_frame == NULL) { + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); goto error; } + // Manipulate stack directly since we leave using DISPATCH_INLINED(). + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); frame->return_offset = (uint16_t)(next_instr - this_instr); DISPATCH_INLINED(new_frame); } diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 0bfa1a3b56fbc2..955c88fd052e6d 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -190,7 +190,7 @@ def error_if( self.out.emit(";\n") else: self.out.emit("{\n") - storage.copy().flush(self.out) + storage.copy().stack.flush(self.out) self.out.emit("goto ") self.out.emit(label) self.out.emit(";\n") From 2636c049d78030d07fe41fbe72b12dc58b230052 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 14 Oct 2024 14:20:13 +0100 Subject: [PATCH 04/53] Attempt to fix _BINARY_OP_INPLACE_ADD_UNICODE and use tag references in LOAD_CONST --- Include/internal/pycore_code.h | 2 +- Include/internal/pycore_opcode_metadata.h | 4 ++-- Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 17 +++++++-------- Python/executor_cases.c.h | 17 +++++++-------- Python/generated_cases.c.h | 25 ++++++++++++++--------- 6 files changed, 35 insertions(+), 32 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 57e0a14bb9b5bd..01fb9ea67307c4 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -317,7 +317,7 @@ extern void _PyCode_Clear_Executors(PyCodeObject *code); // gh-115999 tracks progress on addressing this. #define ENABLE_SPECIALIZATION 0 #else -#define ENABLE_SPECIALIZATION 1 +#define ENABLE_SPECIALIZATION 0 #endif /* Specialization functions */ diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 2dcc2825bd47ea..de413be7b34d92 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1149,7 +1149,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_COMMON_CONSTANT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG }, + [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, @@ -1187,7 +1187,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [RESERVED] = { true, INSTR_FMT_IX, 0 }, [RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RESUME_CHECK] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, - [RETURN_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, + [RETURN_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG }, [RETURN_GENERATOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [RETURN_VALUE] = { true, INSTR_FMT_IX, 0 }, [SEND] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index fe89d8d860e5d3..e74bb2f0bbffdb 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -35,7 +35,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, + [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_STORE_FAST_0] = HAS_LOCAL_FLAG, [_STORE_FAST_1] = HAS_LOCAL_FLAG, [_STORE_FAST_2] = HAS_LOCAL_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7a3cea112299cd..9ae637825c3834 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -256,7 +256,7 @@ dummy_func( } pure inst(LOAD_CONST, (-- value)) { - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); } replicate(8) inst(STORE_FAST, (value --)) { @@ -611,9 +611,6 @@ dummy_func( // specializations, but there is no output. // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { - PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -635,12 +632,12 @@ dummy_func( * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); - PyStackRef_CLOSE(left); - DEAD(left); - PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); - PyUnicode_Append(&temp, right_o); - *target_local = PyStackRef_FromPyObjectSteal(temp); + PyObject *left_o = PyStackRef_AsPyObjectSteal(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + PyStackRef_CLEAR(*target_local); + assert(Py_REFCNT(left_o) >= 1); + PyUnicode_Append(&left_o, right_o); + *target_local = PyStackRef_FromPyObjectSteal(left_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); DEAD(right); ERROR_IF(PyStackRef_IsNull(*target_local), error); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3d84a93e906730..02e7514045d182 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -203,7 +203,9 @@ case _LOAD_CONST: { _PyStackRef value; oparg = CURRENT_OPARG(); - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -737,8 +739,6 @@ _PyStackRef left; right = stack_pointer[-1]; left = stack_pointer[-2]; - PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -763,11 +763,12 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); - PyStackRef_CLOSE(left); - PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); - PyUnicode_Append(&temp, right_o); - *target_local = PyStackRef_FromPyObjectSteal(temp); + PyObject *left_o = PyStackRef_AsPyObjectSteal(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + PyStackRef_CLEAR(*target_local); + assert(Py_REFCNT(left_o) >= 1); + PyUnicode_Append(&left_o, right_o); + *target_local = PyStackRef_FromPyObjectSteal(left_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) JUMP_TO_ERROR(); #if TIER_ONE diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ca1134e928f11c..f0b591c85ba3b5 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -183,8 +183,6 @@ /* Skip 1 cache entry */ // _BINARY_OP_INPLACE_ADD_UNICODE { - PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -206,11 +204,12 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); - PyStackRef_CLOSE(left); - PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); - PyUnicode_Append(&temp, right_o); - *target_local = PyStackRef_FromPyObjectSteal(temp); + PyObject *left_o = PyStackRef_AsPyObjectSteal(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + PyStackRef_CLEAR(*target_local); + assert(Py_REFCNT(left_o) >= 1); + PyUnicode_Append(&left_o, right_o); + *target_local = PyStackRef_FromPyObjectSteal(left_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) goto pop_2_error; #if TIER_ONE @@ -4899,7 +4898,9 @@ _PyStackRef res; // _LOAD_CONST { - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + stack_pointer = _PyFrame_GetStackPointer(frame); } // _RETURN_VALUE_EVENT { @@ -5909,7 +5910,9 @@ next_instr += 1; INSTRUCTION_STATS(LOAD_CONST); _PyStackRef value; - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -6967,7 +6970,9 @@ _PyStackRef res; // _LOAD_CONST { - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + stack_pointer = _PyFrame_GetStackPointer(frame); } // _RETURN_VALUE { From a79aa478d91ba212e6f2c7cc141a5489a13b7f13 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 14 Oct 2024 16:25:05 +0100 Subject: [PATCH 05/53] Show frame locals in lltrace --- Include/internal/pycore_code.h | 2 +- Python/bytecodes.c | 2 +- Python/ceval.c | 61 +++++++++++++++++++++------------- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 6 ++-- 5 files changed, 44 insertions(+), 29 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 01fb9ea67307c4..57e0a14bb9b5bd 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -317,7 +317,7 @@ extern void _PyCode_Clear_Executors(PyCodeObject *code); // gh-115999 tracks progress on addressing this. #define ENABLE_SPECIALIZATION 0 #else -#define ENABLE_SPECIALIZATION 0 +#define ENABLE_SPECIALIZATION 1 #endif /* Specialization functions */ diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 444aab823a5a55..8952551927f7be 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -256,7 +256,7 @@ dummy_func( } pure inst(LOAD_CONST, (-- value)) { - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); } replicate(8) inst(STORE_FAST, (value --)) { diff --git a/Python/ceval.c b/Python/ceval.c index 75e7cfe7570cea..ac70379d822d54 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -136,39 +136,54 @@ #ifdef LLTRACE +static void +dump_item(_PyStackRef item) +{ + if (PyStackRef_IsNull(item)) { + printf(""); + return; + } + PyObject *obj = PyStackRef_AsPyObjectBorrow(item); + if (obj == NULL) { + printf(""); + return; + } + if ( + obj == Py_None + || PyBool_Check(obj) + || PyLong_CheckExact(obj) + || PyFloat_CheckExact(obj) + || PyUnicode_CheckExact(obj) + ) { + if (PyObject_Print(obj, stdout, 0) == 0) { + return; + } + PyErr_Clear(); + } + // Don't call __repr__(), it might recurse into the interpreter. + printf("<%s at %p>", Py_TYPE(obj)->tp_name, (void *)(item.bits)); +} + static void dump_stack(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer) { + _PyStackRef *locals_base = _PyFrame_GetLocalsArray(frame); _PyStackRef *stack_base = _PyFrame_Stackbase(frame); PyObject *exc = PyErr_GetRaisedException(); + printf(" locals=["); + for (_PyStackRef *ptr = locals_base; ptr < stack_base; ptr++) { + if (ptr != locals_base) { + printf(", "); + } + dump_item(*ptr); + } + printf("]\n"); printf(" stack=["); for (_PyStackRef *ptr = stack_base; ptr < stack_pointer; ptr++) { if (ptr != stack_base) { printf(", "); } - if (PyStackRef_IsNull(*ptr)) { - printf(""); - continue; - } - PyObject *obj = PyStackRef_AsPyObjectBorrow(*ptr); - if (obj == NULL) { - printf(""); - continue; - } - if ( - obj == Py_None - || PyBool_Check(obj) - || PyLong_CheckExact(obj) - || PyFloat_CheckExact(obj) - || PyUnicode_CheckExact(obj) - ) { - if (PyObject_Print(obj, stdout, 0) == 0) { - continue; - } - PyErr_Clear(); - } - // Don't call __repr__(), it might recurse into the interpreter. - printf("<%s at %p>", Py_TYPE(obj)->tp_name, (void *)(ptr->bits)); + dump_item(*ptr); } printf("]\n"); fflush(stdout); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9d0b34c1f12c14..48386d2788150e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -204,7 +204,7 @@ _PyStackRef value; oparg = CURRENT_OPARG(); _PyFrame_SetStackPointer(frame, stack_pointer); - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 6efc287e1b22b8..1e82918bc9847f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4899,7 +4899,7 @@ // _LOAD_CONST { _PyFrame_SetStackPointer(frame, stack_pointer); - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); stack_pointer = _PyFrame_GetStackPointer(frame); } // _RETURN_VALUE_EVENT @@ -5911,7 +5911,7 @@ INSTRUCTION_STATS(LOAD_CONST); _PyStackRef value; _PyFrame_SetStackPointer(frame, stack_pointer); - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[0] = value; stack_pointer += 1; @@ -6971,7 +6971,7 @@ // _LOAD_CONST { _PyFrame_SetStackPointer(frame, stack_pointer); - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); stack_pointer = _PyFrame_GetStackPointer(frame); } // _RETURN_VALUE From a90e6440ba63697db03e9cb241ccf6bded5fc441 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 15 Oct 2024 10:41:58 +0100 Subject: [PATCH 06/53] Fix _PyFrame_Copy and add some asserts to make reference heap safe --- Include/internal/pycore_frame.h | 20 ++++++++++------ Include/internal/pycore_stackref.h | 35 ++++++++++++++++++++++++++++ Python/bytecodes.c | 17 ++++++++++++++ Python/executor_cases.c.h | 37 ++++++++++++++++++++++++++++++ Python/frame.c | 23 +++++++++---------- Python/generated_cases.c.h | 20 ++++++++++++++++ Tools/cases_generator/analyzer.py | 2 ++ 7 files changed, 135 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index c9ac3819d0390b..fda6c7d79aff47 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -124,17 +124,23 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code) static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) { - *dest = *src; + dest->f_executable = PyStackRef_HeapSafe(src->f_executable); + // Don't leave a dangling pointer to the old frame when creating generators + // and coroutines: + dest->previous = NULL; + dest->f_funcobj = PyStackRef_HeapSafe(src->f_funcobj); + dest->f_globals = src->f_globals; + dest->f_builtins = src->f_builtins; + dest->f_locals = src->f_locals; + dest->frame_obj = src->frame_obj; + dest->instr_ptr = src->instr_ptr; assert(src->stackpointer != NULL); int stacktop = (int)(src->stackpointer - src->localsplus); - assert(stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus); + assert(stacktop >= 0); dest->stackpointer = dest->localsplus + stacktop; - for (int i = 1; i < stacktop; i++) { - dest->localsplus[i] = src->localsplus[i]; + for (int i = 0; i < stacktop; i++) { + dest->localsplus[i] = PyStackRef_HeapSafe(src->localsplus[i]); } - // Don't leave a dangling pointer to the old frame when creating generators - // and coroutines: - dest->previous = NULL; #ifdef Py_GIL_DISABLED PyCodeObject *co = _PyFrame_GetCode(dest); diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 335b2935fe9fdf..7d4811736bbfab 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -154,6 +154,18 @@ PyStackRef_DUP(_PyStackRef stackref) return stackref; } +static inline int +PyStackRef_IsHeapSafe(_PyStackRef ref) +{ + return 1; +} + +static inline _PyStackRef +PyStackRef_HeapSafe(_PyStackRef ref) +{ + return ref; +} + // Convert a possibly deferred reference to a strong reference. static inline _PyStackRef PyStackRef_AsStrongReference(_PyStackRef stackref) @@ -264,6 +276,29 @@ PyStackRef_DUP(_PyStackRef ref) return ref; } +static inline int +PyStackRef_IsHeapSafe(_PyStackRef ref) +{ + return ( + PyStackRef_IsNull(ref) || + !PyStackRef_HasCount(ref) || + _Py_IsImmortal(PyStackRef_AsPyObjectBorrow(ref)) + ); +} + +static inline _PyStackRef +PyStackRef_HeapSafe(_PyStackRef ref) +{ + if (PyStackRef_HasCount(ref)) { + PyObject *obj = BITS_TO_PTR_MASKED(ref); + if (obj != NULL && !_Py_IsImmortal(obj)) { + Py_INCREF_MORTAL(obj); + ref.bits = (uintptr_t)obj; + } + } + return ref; +} + static inline void PyStackRef_CLOSE(_PyStackRef ref) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8952551927f7be..a4f142c119bd4c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -260,6 +260,10 @@ dummy_func( } replicate(8) inst(STORE_FAST, (value --)) { + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); DEAD(value); } @@ -269,6 +273,10 @@ dummy_func( }; inst(STORE_FAST_LOAD_FAST, (value1 -- value2)) { + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value1) + ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; SETLOCAL(oparg1, value1); @@ -277,6 +285,14 @@ dummy_func( } inst(STORE_FAST_STORE_FAST, (value2, value1 --)) { + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value1) + ); + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value2) + ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; SETLOCAL(oparg1, value1); @@ -981,6 +997,7 @@ dummy_func( assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); DEAD(retval); SAVE_STACK(); assert(EMPTY()); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 48386d2788150e..4565d8d4307acc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -217,6 +217,10 @@ oparg = 0; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -228,6 +232,10 @@ oparg = 1; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -239,6 +247,10 @@ oparg = 2; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -250,6 +262,10 @@ oparg = 3; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -261,6 +277,10 @@ oparg = 4; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -272,6 +292,10 @@ oparg = 5; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -283,6 +307,10 @@ oparg = 6; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -294,6 +322,10 @@ oparg = 7; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -304,6 +336,10 @@ _PyStackRef value; oparg = CURRENT_OPARG(); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -1258,6 +1294,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/frame.c b/Python/frame.c index 35e6c2d0a93333..f0fb0f6f2aee77 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -52,23 +52,19 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) assert(frame->owner != FRAME_OWNED_BY_CSTACK); assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT); assert(frame->owner != FRAME_CLEARED); - Py_ssize_t size = ((char*)frame->stackpointer) - (char *)frame; - memcpy((_PyInterpreterFrame *)f->_f_frame_data, frame, size); - frame = (_PyInterpreterFrame *)f->_f_frame_data; - frame->stackpointer = (_PyStackRef *)(((char *)frame) + size); - frame->f_executable = PyStackRef_DUP(frame->f_executable); - f->f_frame = frame; - frame->owner = FRAME_OWNED_BY_FRAME_OBJECT; - if (_PyFrame_IsIncomplete(frame)) { + _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)f->_f_frame_data; + _PyFrame_Copy(frame, new_frame); + f->f_frame = new_frame; + new_frame->owner = FRAME_OWNED_BY_FRAME_OBJECT; + if (_PyFrame_IsIncomplete(new_frame)) { // This may be a newly-created generator or coroutine frame. Since it's // dead anyways, just pretend that the first RESUME ran: - PyCodeObject *code = _PyFrame_GetCode(frame); - frame->instr_ptr = _PyCode_CODE(code) + code->_co_firsttraceable + 1; + PyCodeObject *code = _PyFrame_GetCode(new_frame); + new_frame->instr_ptr = _PyCode_CODE(code) + code->_co_firsttraceable + 1; } - assert(!_PyFrame_IsIncomplete(frame)); + assert(!_PyFrame_IsIncomplete(new_frame)); assert(f->f_back == NULL); _PyInterpreterFrame *prev = _PyFrame_GetFirstComplete(frame->previous); - frame->previous = NULL; if (prev) { assert(prev->owner != FRAME_OWNED_BY_CSTACK); /* Link PyFrameObjects.f_back and remove link through _PyInterpreterFrame.previous */ @@ -116,6 +112,9 @@ _PyFrame_ClearExceptCode(_PyInterpreterFrame *frame) PyFrameObject *f = frame->frame_obj; frame->frame_obj = NULL; if (Py_REFCNT(f) > 1) { + // Take ownership takes the reference to the executable, + // so we need to incref it. + PyStackRef_AsPyObjectNew(frame->f_executable); take_ownership(f, frame); Py_DECREF(f); return; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1e82918bc9847f..d68e607a99d652 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4922,6 +4922,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -4967,6 +4968,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -6981,6 +6983,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); _PyFrame_SetStackPointer(frame, stack_pointer); assert(EMPTY()); _Py_LeaveRecursiveCallPy(tstate); @@ -7043,6 +7046,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7501,6 +7505,10 @@ INSTRUCTION_STATS(STORE_FAST); _PyStackRef value; value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -7514,6 +7522,10 @@ _PyStackRef value1; _PyStackRef value2; value1 = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value1) + ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; SETLOCAL(oparg1, value1); @@ -7530,6 +7542,14 @@ _PyStackRef value1; value1 = stack_pointer[-1]; value2 = stack_pointer[-2]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value1) + ); + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value2) + ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; SETLOCAL(oparg1, value1); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 381ad3a4e2082c..9d7aaa988bf467 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -546,7 +546,9 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", + "PyStackRef_HasCount", "PyStackRef_Is", + "PyStackRef_IsHeapSafe", "PyStackRef_IsNull", "PyStackRef_None", "PyStackRef_TYPE", From 83e0323be5ea7a757400cda9e9e289c6ba124cbc Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 16 Oct 2024 18:19:47 +0100 Subject: [PATCH 07/53] Improve efficiency of _PyStackRef_FromPyObjectNew a bit --- Include/internal/pycore_stackref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 7d4811736bbfab..a1c5fba0db8cb7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -249,7 +249,7 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) if (_Py_IsDeferrable(obj)) { return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; } - Py_INCREF(obj); + Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; return ref; } From f03f745930554105f98a248723d1350b84462e26 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 29 Oct 2024 18:28:26 +0000 Subject: [PATCH 08/53] Make making heap safe more efficient --- Include/internal/pycore_stackref.h | 27 ++++++++++++++++----------- Include/refcount.h | 4 +++- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index a1c5fba0db8cb7..007443762788af 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -195,18 +195,19 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) // With GIL -#define Py_TAG_BITS 1 +#define Py_TAG_BITS 3 #define Py_TAG_REFCNT 1 +#define Py_TAG_IMMORTAL 3 #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) -#define PyStackRef_NULL_BITS Py_TAG_REFCNT +#define PyStackRef_NULL_BITS Py_TAG_IMMORTAL static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_IsNull(ref) ((ref).bits == PyStackRef_NULL_BITS) -#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) -#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) -#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL }) static inline int PyStackRef_HasCount(_PyStackRef ref) @@ -214,6 +215,12 @@ PyStackRef_HasCount(_PyStackRef ref) return ref.bits & Py_TAG_REFCNT; } +static inline int +PyStackRef_HasCountAndMortal(_PyStackRef ref) +{ + return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; +} + static inline PyObject * PyStackRef_AsPyObjectBorrow(_PyStackRef ref) { @@ -238,7 +245,7 @@ static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); - unsigned int tag = _Py_IsDeferrable(obj) ? Py_TAG_REFCNT : 0; + unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_IMMORTAL : 0; _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); return ref; } @@ -289,12 +296,10 @@ PyStackRef_IsHeapSafe(_PyStackRef ref) static inline _PyStackRef PyStackRef_HeapSafe(_PyStackRef ref) { - if (PyStackRef_HasCount(ref)) { + if (PyStackRef_HasCountAndMortal(ref)) { PyObject *obj = BITS_TO_PTR_MASKED(ref); - if (obj != NULL && !_Py_IsImmortal(obj)) { - Py_INCREF_MORTAL(obj); - ref.bits = (uintptr_t)obj; - } + Py_INCREF_MORTAL(obj); + ref.bits = (uintptr_t)obj; } return ref; } diff --git a/Include/refcount.h b/Include/refcount.h index 1a5265e27b0c01..39d606ffbc6b84 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -377,7 +377,9 @@ static inline void Py_DECREF_MORTAL(const char *filename, int lineno, PyObject * _Py_NegativeRefcount(filename, lineno, op); } _Py_DECREF_STAT_INC(); - _Py_DECREF_DecRefTotal(); + if (!_Py_IsImmortal(op)) { + _Py_DECREF_DecRefTotal(); + } if (--op->ob_refcnt == 0) { _Py_Dealloc(op); } From ff9d044ca863c1b982bb12eb6834c1d1dffcc2c4 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Dec 2024 15:18:45 +0000 Subject: [PATCH 09/53] Fix up after merge --- Include/internal/pycore_frame.h | 3 +- Include/internal/pycore_stackref.h | 62 +++++++++++++++++++++++------- Python/ceval.c | 55 ++++++++++++++++++++++---- Python/ceval_macros.h | 1 + Python/frame.c | 4 +- 5 files changed, 101 insertions(+), 24 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 1d6c51dc24dbac..1e434e9ed43d52 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -157,6 +157,7 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * dest->stackpointer = dest->localsplus + stacktop; for (int i = 0; i < stacktop; i++) { dest->localsplus[i] = PyStackRef_HeapSafe(src->localsplus[i]); + PyStackRef_CheckValid(dest->localsplus[i]); } #ifdef Py_GIL_DISABLED @@ -408,7 +409,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int PyAPI_FUNC(_PyInterpreterFrame *) _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func, - PyObject *locals, _PyStackRef const* args, + PyObject *locals, _PyStackRef const *args, size_t argcount, PyObject *kwnames, _PyInterpreterFrame *previous); diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 0f83d2e7ad96f0..9d60ac698d3bb2 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -200,22 +200,49 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) // With GIL #define Py_TAG_BITS 3 +#define Py_NULL_BIT 2 #define Py_TAG_REFCNT 1 -#define Py_TAG_IMMORTAL 3 #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) -#define PyStackRef_NULL_BITS Py_TAG_IMMORTAL +#define PyStackRef_NULL_BITS (Py_TAG_REFCNT | Py_NULL_BIT) static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_IsNull(ref) ((ref).bits == PyStackRef_NULL_BITS) -#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL }) -#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL }) -#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) -#define PyStackRef_IsTrue(ref) ((ref).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL)) -#define PyStackRef_IsFalse(ref) ((ref).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) -#define PyStackRef_IsNone(ref) ((ref).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) +// #define PyStackRef_IsTrue(ref) ((ref).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT)) +// #define PyStackRef_IsFalse(ref) ((ref).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT)) +// #define PyStackRef_IsNone(ref) ((ref).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT)) + +/* We should be able to guarantee that the tag bits are set for immortal objects */ + +#define PyStackRef_IsTrue(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_TrueStruct)) +#define PyStackRef_IsFalse(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_FalseStruct)) +// #define PyStackRef_IsNone(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) + + +static inline void PyStackRef_CheckValid(_PyStackRef ref) { + int tag = ref.bits & Py_TAG_BITS; + if (tag == PyStackRef_NULL_BITS) { + assert(ref.bits == PyStackRef_NULL_BITS); + } + else if (tag == 0) { + assert(!_Py_IsImmortal(BITS_TO_PTR_MASKED(ref))); + } +} + +static inline int +PyStackRef_IsNone(_PyStackRef ref) +{ + if ((ref.bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) { + assert ((ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT); + return 1; + } + return 0; +} static inline int PyStackRef_HasCount(_PyStackRef ref) @@ -224,7 +251,7 @@ PyStackRef_HasCount(_PyStackRef ref) } static inline int -PyStackRef_HasCountAndMortal(_PyStackRef ref) +PyStackRef_HasCountAndNotNull(_PyStackRef ref) { return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; } @@ -253,8 +280,9 @@ static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); - unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_IMMORTAL : 0; + unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_REFCNT : 0; _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); + PyStackRef_CheckValid(ref); return ref; } @@ -272,6 +300,7 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) } Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; + PyStackRef_CheckValid(ref); return ref; } #define PyStackRef_FromPyObjectNew(obj) _PyStackRef_FromPyObjectNew(_PyObject_CAST(obj)) @@ -310,11 +339,16 @@ PyStackRef_IsHeapSafe(_PyStackRef ref) static inline _PyStackRef PyStackRef_HeapSafe(_PyStackRef ref) { - if (PyStackRef_HasCountAndMortal(ref)) { - PyObject *obj = BITS_TO_PTR_MASKED(ref); - Py_INCREF_MORTAL(obj); - ref.bits = (uintptr_t)obj; + if (!PyStackRef_HasCountAndNotNull(ref)) { + return ref; + } + PyObject *obj = BITS_TO_PTR_MASKED(ref); + if (_Py_IsImmortal(obj)) { + return ref; } + Py_INCREF_MORTAL(obj); + ref.bits = (uintptr_t)obj; + PyStackRef_CheckValid(ref); return ref; } diff --git a/Python/ceval.c b/Python/ceval.c index 64652816f4c237..c181f762fba2ef 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1515,6 +1515,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, } assert(PyStackRef_IsNull(localsplus[i])); localsplus[i] = PyStackRef_FromPyObjectSteal(kwdict); + PyStackRef_CheckValid(localsplus[i]); } else { kwdict = NULL; @@ -1531,6 +1532,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, for (j = 0; j < n; j++) { assert(PyStackRef_IsNull(localsplus[j])); localsplus[j] = args[j]; + PyStackRef_CheckValid(localsplus[j]); } /* Pack other positional arguments into the *args argument */ @@ -1654,6 +1656,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, goto kw_fail; } localsplus[j] = value_stackref; + PyStackRef_CheckValid(localsplus[j]); } } @@ -1689,6 +1692,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, if (PyStackRef_AsPyObjectBorrow(localsplus[m+i]) == NULL) { PyObject *def = defs[i]; localsplus[m+i] = PyStackRef_FromPyObjectNew(def); + PyStackRef_CheckValid(localsplus[m+i]); } } } @@ -1708,6 +1712,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, } if (def) { localsplus[i] = PyStackRef_FromPyObjectSteal(def); + PyStackRef_CheckValid(localsplus[i]); continue; } } @@ -1835,12 +1840,27 @@ _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, _PyStackRef func, PyStackRef_CLOSE(func); goto error; } + size_t total_args = nargs + PyDict_GET_SIZE(kwargs); + for (size_t i = 0; i < total_args; i++) { + ((_PyStackRef *)newargs)[i] = PyStackRef_FromPyObjectSteal(newargs[i]); + } } else { - newargs = &PyTuple_GET_ITEM(callargs, 0); - /* We need to incref all our args since the new frame steals the references. */ - for (Py_ssize_t i = 0; i < nargs; ++i) { - Py_INCREF(PyTuple_GET_ITEM(callargs, i)); + if (nargs <= 8) { + PyObject *stack_array[8]; + newargs = stack_array; + } + else { + newargs = PyMem_Malloc(sizeof(PyObject *) *nargs); + if (newargs == NULL) { + PyErr_NoMemory(); + PyStackRef_CLOSE(func); + goto error; + } + } + /* We need to tag all our args since the new frame steals the references. */ + for (Py_ssize_t i = 0; i < nargs; i++) { + ((_PyStackRef *)newargs)[i] = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(callargs, i)); } } _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit( @@ -1850,6 +1870,9 @@ _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, _PyStackRef func, if (has_dict) { _PyStack_UnpackDict_FreeNoDecRef(newargs, kwnames); } + else if (nargs > 8) { + PyMem_Free((void *)newargs); + } /* No need to decref func here because the reference has been stolen by _PyEvalFramePushAndInit. */ @@ -1868,21 +1891,39 @@ _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func, PyObject* const* args, size_t argcount, PyObject *kwnames) { + size_t total_args = argcount; + if (kwnames) { + total_args += PyTuple_GET_SIZE(kwnames); + } + _PyStackRef *arguments; + if (total_args <= 8) { + _PyStackRef stack_array[8]; + arguments = stack_array; + } + else { + arguments = PyMem_Malloc(sizeof(_PyStackRef) * total_args); + if (arguments == NULL) { + return PyErr_NoMemory(); + } + } /* _PyEvalFramePushAndInit consumes the references * to func, locals and all its arguments */ Py_XINCREF(locals); for (size_t i = 0; i < argcount; i++) { - Py_INCREF(args[i]); + arguments[i] = _PyStackRef_FromPyObjectNew(args[i]); } if (kwnames) { Py_ssize_t kwcount = PyTuple_GET_SIZE(kwnames); for (Py_ssize_t i = 0; i < kwcount; i++) { - Py_INCREF(args[i+argcount]); + arguments[i+argcount] = _PyStackRef_FromPyObjectNew(args[i+argcount]); } } _PyInterpreterFrame *frame = _PyEvalFramePushAndInit( tstate, PyStackRef_FromPyObjectNew(func), locals, - (_PyStackRef const *)args, argcount, kwnames, NULL); + arguments, argcount, kwnames, NULL); + if (total_args > 8) { + PyMem_Free(arguments); + } if (frame == NULL) { return NULL; } diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 61534492fa92b9..75e20e374f4fbc 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -257,6 +257,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { variable would be pointing to already-freed memory. */ #define SETLOCAL(i, value) do { _PyStackRef tmp = GETLOCAL(i); \ GETLOCAL(i) = value; \ + PyStackRef_CheckValid(value); \ PyStackRef_XCLOSE(tmp); } while (0) #define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op) diff --git a/Python/frame.c b/Python/frame.c index 79d3ccb7966a46..592853970bfb2b 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -60,8 +60,8 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) // This may be a newly-created generator or coroutine frame. Since it's // dead anyways, just pretend that the first RESUME ran: PyCodeObject *code = _PyFrame_GetCode(new_frame); - frame->instr_ptr = - _PyFrame_GetBytecode(frame) + code->_co_firsttraceable + 1; + new_frame->instr_ptr = + _PyFrame_GetBytecode(new_frame) + code->_co_firsttraceable + 1; } assert(!_PyFrame_IsIncomplete(new_frame)); assert(f->f_back == NULL); From b7a8b5d18094287018e6c01880c410597ec6c422 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Dec 2024 10:56:24 +0000 Subject: [PATCH 10/53] Restore immortality bit --- Include/internal/pycore_frame.h | 6 +-- Include/internal/pycore_stackref.h | 81 ++++++++++++++++-------------- 2 files changed, 46 insertions(+), 41 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 89907bcaea1e0c..1d35075b7486e8 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -142,11 +142,11 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code) static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) { - dest->f_executable = PyStackRef_HeapSafe(src->f_executable); + dest->f_executable = PyStackRef_MakeHeapSafe(src->f_executable); // Don't leave a dangling pointer to the old frame when creating generators // and coroutines: dest->previous = NULL; - dest->f_funcobj = PyStackRef_HeapSafe(src->f_funcobj); + dest->f_funcobj = PyStackRef_MakeHeapSafe(src->f_funcobj); dest->f_globals = src->f_globals; dest->f_builtins = src->f_builtins; dest->f_locals = src->f_locals; @@ -157,7 +157,7 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * assert(stacktop >= 0); dest->stackpointer = dest->localsplus + stacktop; for (int i = 0; i < stacktop; i++) { - dest->localsplus[i] = PyStackRef_HeapSafe(src->localsplus[i]); + dest->localsplus[i] = PyStackRef_MakeHeapSafe(src->localsplus[i]); PyStackRef_CheckValid(dest->localsplus[i]); } diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 9d60ac698d3bb2..3eb67674bc7bb7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -165,7 +165,7 @@ PyStackRef_IsHeapSafe(_PyStackRef ref) } static inline _PyStackRef -PyStackRef_HeapSafe(_PyStackRef ref) +PyStackRef_MakeHeapSafe(_PyStackRef ref) { return ref; } @@ -200,58 +200,73 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) // With GIL #define Py_TAG_BITS 3 -#define Py_NULL_BIT 2 +#define Py_TAG_IMMORTAL 3 #define Py_TAG_REFCNT 1 #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) -#define PyStackRef_NULL_BITS (Py_TAG_REFCNT | Py_NULL_BIT) +#define PyStackRef_NULL_BITS Py_TAG_IMMORTAL static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_IsNull(ref) ((ref).bits == PyStackRef_NULL_BITS) -#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) -#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) -#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL }) -// #define PyStackRef_IsTrue(ref) ((ref).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT)) -// #define PyStackRef_IsFalse(ref) ((ref).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT)) -// #define PyStackRef_IsNone(ref) ((ref).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT)) +// #define PyStackRef_IsTrue(ref) ((ref).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL)) +// #define PyStackRef_IsFalse(ref) ((ref).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL)) +// #define /* We should be able to guarantee that the tag bits are set for immortal objects */ #define PyStackRef_IsTrue(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_TrueStruct)) #define PyStackRef_IsFalse(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_FalseStruct)) -// #define PyStackRef_IsNone(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) static inline void PyStackRef_CheckValid(_PyStackRef ref) { int tag = ref.bits & Py_TAG_BITS; - if (tag == PyStackRef_NULL_BITS) { - assert(ref.bits == PyStackRef_NULL_BITS); - } - else if (tag == 0) { - assert(!_Py_IsImmortal(BITS_TO_PTR_MASKED(ref))); + PyObject *obj = BITS_TO_PTR_MASKED(ref); + switch (tag) { + case 0: + assert(!_Py_IsImmortal(obj)); + break; + case Py_TAG_REFCNT: + /* Can be immortal if object was made immortal after reference came into existence */ + assert(obj != NULL && obj != Py_True && obj != Py_False && obj != Py_None); + break; + case Py_TAG_IMMORTAL: + assert(obj == NULL || _Py_IsImmortal(obj)); + break; + default: + assert(0); } } +#ifdef Py_DEBUG static inline int PyStackRef_IsNone(_PyStackRef ref) { if ((ref.bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) { - assert ((ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT); + assert ((ref.bits & Py_TAG_BITS) == Py_TAG_IMMORTAL); return 1; } return 0; } +#else + +#define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) + +#endif + static inline int PyStackRef_HasCount(_PyStackRef ref) { return ref.bits & Py_TAG_REFCNT; } -static inline int -PyStackRef_HasCountAndNotNull(_PyStackRef ref) +static inline bool +PyStackRef_HasCountAndMortal(_PyStackRef ref) { return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; } @@ -280,7 +295,7 @@ static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); - unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_REFCNT : 0; + unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_IMMORTAL : 0; _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); PyStackRef_CheckValid(ref); return ref; @@ -295,8 +310,8 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) static inline _PyStackRef _PyStackRef_FromPyObjectNew(PyObject *obj) { - if (_Py_IsDeferrable(obj)) { - return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; + if (_Py_IsImmortal(obj)) { + return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_IMMORTAL}; } Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; @@ -307,14 +322,11 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) /* Create a new reference from an object with an embedded reference count */ static inline _PyStackRef -_PyStackRef_FromPyObjectWithCount(PyObject *obj) +PyStackRef_FromPyObjectImmortal(PyObject *obj) { - return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT}; + assert(_Py_IsImmortal(obj)); + return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_IMMORTAL}; } -#define PyStackRef_FromPyObjectWithCount(obj) _PyStackRef_FromPyObjectWithCount(_PyObject_CAST(obj)) - -#define PyStackRef_FromPyObjectImmortal PyStackRef_FromPyObjectWithCount - static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) @@ -326,26 +338,19 @@ PyStackRef_DUP(_PyStackRef ref) return ref; } -static inline int +static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return ( - PyStackRef_IsNull(ref) || - !PyStackRef_HasCount(ref) || - _Py_IsImmortal(PyStackRef_AsPyObjectBorrow(ref)) - ); + return !PyStackRef_HasCountAndMortal(ref); } static inline _PyStackRef -PyStackRef_HeapSafe(_PyStackRef ref) +PyStackRef_MakeHeapSafe(_PyStackRef ref) { - if (!PyStackRef_HasCountAndNotNull(ref)) { + if (!PyStackRef_HasCountAndMortal(ref)) { return ref; } PyObject *obj = BITS_TO_PTR_MASKED(ref); - if (_Py_IsImmortal(obj)) { - return ref; - } Py_INCREF_MORTAL(obj); ref.bits = (uintptr_t)obj; PyStackRef_CheckValid(ref); From 49d0d4273850c30f6c97c1b9b23ae68545eca38c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Dec 2024 11:13:08 +0000 Subject: [PATCH 11/53] Fix validity check --- Include/internal/pycore_stackref.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 3eb67674bc7bb7..d656b1f0a02143 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -223,6 +223,8 @@ static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_IsFalse(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_FalseStruct)) +#ifdef Py_DEBUG + static inline void PyStackRef_CheckValid(_PyStackRef ref) { int tag = ref.bits & Py_TAG_BITS; PyObject *obj = BITS_TO_PTR_MASKED(ref); @@ -242,7 +244,6 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { } } -#ifdef Py_DEBUG static inline int PyStackRef_IsNone(_PyStackRef ref) { @@ -255,6 +256,7 @@ PyStackRef_IsNone(_PyStackRef ref) #else +#define PyStackRef_CheckValid(REF) ((void)0) #define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) #endif From 3a12d188a43862a188d27c0eccbaf8b8b1a0b89d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Dec 2024 14:25:39 +0000 Subject: [PATCH 12/53] Add PyStackRef_IsMortal --- Include/internal/pycore_stackref.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index d656b1f0a02143..1aff47a7c1ef65 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -267,12 +267,20 @@ PyStackRef_HasCount(_PyStackRef ref) return ref.bits & Py_TAG_REFCNT; } +/* Does this ref have an embedded refcount and refer to a mortal object (NULL is not mortal) */ static inline bool PyStackRef_HasCountAndMortal(_PyStackRef ref) { return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; } +/* Does this ref refer to a mortal object (NULL is not mortal) */ +static inline bool +PyStackRef_IsMortal(_PyStackRef ref) +{ + return (ref.bits & Py_TAG_BITS) != Py_TAG_IMMORTAL; +} + static inline PyObject * PyStackRef_AsPyObjectBorrow(_PyStackRef ref) { From 497fb9fc13c16c35a21564f48c0ad2796609af69 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Dec 2024 15:28:35 +0000 Subject: [PATCH 13/53] Reduce overhead of stackrefs a tiny bit for mortal objects --- Include/internal/pycore_stackref.h | 15 ++++++++--- Python/bytecodes.c | 40 ++++++++++++--------------- Python/executor_cases.c.h | 43 +++++++++++++----------------- Python/generated_cases.c.h | 34 +++++++++++------------ Python/optimizer_bytecodes.c | 2 +- Tools/cases_generator/analyzer.py | 3 ++- 6 files changed, 65 insertions(+), 72 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 1aff47a7c1ef65..8424ac8183fb02 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -261,6 +261,7 @@ PyStackRef_IsNone(_PyStackRef ref) #endif +/* Does this ref have an embedded refcount */ static inline int PyStackRef_HasCount(_PyStackRef ref) { @@ -298,9 +299,6 @@ PyStackRef_AsPyObjectSteal(_PyStackRef ref) } } -/* We will want to extend this to a larger set of objects in the future */ -#define _Py_IsDeferrable _Py_IsImmortal - static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { @@ -311,6 +309,16 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) return ref; } +static inline _PyStackRef +PyStackRef_FromPyObjectStealMortal(PyObject *obj) +{ + assert(obj != NULL); + assert(!_Py_IsImmortal(obj)); + _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) }); + PyStackRef_CheckValid(ref); + return ref; +} + // Check if a stackref is exactly the same as another stackref, including the // the deferred bit. This can only be used safely if you know that the deferred // bits of `a` and `b` match. @@ -320,6 +328,7 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) static inline _PyStackRef _PyStackRef_FromPyObjectNew(PyObject *obj) { + assert(obj != NULL); if (_Py_IsImmortal(obj)) { return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_IMMORTAL}; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7c7053257de7ab..0b67e7b0390787 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -837,7 +837,7 @@ dummy_func( PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(str_st); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectImmortal(res_o); } inst(BINARY_SUBSCR_TUPLE_INT, (unused/1, tuple_st, sub_st -- res)) { @@ -1662,8 +1662,7 @@ dummy_func( int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed); #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -1679,8 +1678,7 @@ dummy_func( int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed); #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -1718,7 +1716,7 @@ dummy_func( if (cell == NULL) { ERROR_NO_POP(); } - SETLOCAL(oparg, PyStackRef_FromPyObjectSteal(cell)); + SETLOCAL(oparg, PyStackRef_FromPyObjectStealMortal(cell)); } inst(DELETE_DEREF, (--)) { @@ -1800,17 +1798,18 @@ dummy_func( } inst(BUILD_TUPLE, (values[oparg] -- tup)) { + assert(oparg != 0); PyObject *tup_o = _PyTuple_FromStackRefSteal(values, oparg); INPUTS_DEAD(); ERROR_IF(tup_o == NULL, error); - tup = PyStackRef_FromPyObjectSteal(tup_o); + tup = PyStackRef_FromPyObjectStealMortal(tup_o); } inst(BUILD_LIST, (values[oparg] -- list)) { PyObject *list_o = _PyList_FromStackRefSteal(values, oparg); INPUTS_DEAD(); ERROR_IF(list_o == NULL, error); - list = PyStackRef_FromPyObjectSteal(list_o); + list = PyStackRef_FromPyObjectStealMortal(list_o); } inst(LIST_EXTEND, (list_st, unused[oparg-1], iterable_st -- list_st, unused[oparg-1])) { @@ -1860,7 +1859,7 @@ dummy_func( Py_DECREF(set_o); ERROR_IF(true, error); } - set = PyStackRef_FromPyObjectSteal(set_o); + set = PyStackRef_FromPyObjectStealMortal(set_o); } inst(BUILD_MAP, (values[oparg*2] -- map)) { @@ -1876,7 +1875,7 @@ dummy_func( STACKREFS_TO_PYOBJECTS_CLEANUP(values_o); DECREF_INPUTS(); ERROR_IF(map_o == NULL, error); - map = PyStackRef_FromPyObjectSteal(map_o); + map = PyStackRef_FromPyObjectStealMortal(map_o); } inst(SETUP_ANNOTATIONS, (--)) { @@ -2158,9 +2157,8 @@ dummy_func( PyObject *attr_o = *value_ptr; DEOPT_IF(attr_o == NULL); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); null = PyStackRef_NULL; - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); DECREF_INPUTS(); } @@ -2188,8 +2186,7 @@ dummy_func( PyObject *attr_o = ep->me_value; DEOPT_IF(attr_o == NULL); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; DECREF_INPUTS(); } @@ -2222,8 +2219,7 @@ dummy_func( attr_o = ep->me_value; DEOPT_IF(attr_o == NULL); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; DECREF_INPUTS(); } @@ -3681,7 +3677,7 @@ dummy_func( DEOPT_IF(callable_o != (PyObject *)&PyType_Type); DEAD(callable); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(Py_NewRef(Py_TYPE(arg_o))); + res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o)); PyStackRef_CLOSE(arg); } @@ -4596,7 +4592,7 @@ dummy_func( frame = tstate->current_frame = prev; LOAD_IP(frame->return_offset); RELOAD_STACK(); - res = PyStackRef_FromPyObjectSteal((PyObject *)gen); + res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); LLTRACE_RESUME_FRAME(); } @@ -4608,7 +4604,7 @@ dummy_func( PyObject *slice_o = PySlice_New(start_o, stop_o, step_o); DECREF_INPUTS(); ERROR_IF(slice_o == NULL, error); - slice = PyStackRef_FromPyObjectSteal(slice_o); + slice = PyStackRef_FromPyObjectStealMortal(slice_o); } inst(CONVERT_VALUE, (value -- result)) { @@ -4955,8 +4951,7 @@ dummy_func( PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); PyObject *res_o = entries[index].me_value; DEOPT_IF(res_o == NULL); - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); null = PyStackRef_NULL; } @@ -4965,8 +4960,7 @@ dummy_func( PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); PyObject *res_o = entries[index].me_value; DEOPT_IF(res_o == NULL); - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); null = PyStackRef_NULL; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2a98a44cd9eb9a..cc407a2f0efd13 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1081,7 +1081,7 @@ PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(str_st); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectImmortal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -2006,8 +2006,7 @@ JUMP_TO_JUMP_TARGET(); } #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -2040,8 +2039,7 @@ JUMP_TO_JUMP_TARGET(); } #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -2077,7 +2075,7 @@ if (cell == NULL) { JUMP_TO_ERROR(); } - SETLOCAL(oparg, PyStackRef_FromPyObjectSteal(cell)); + SETLOCAL(oparg, PyStackRef_FromPyObjectStealMortal(cell)); break; } @@ -2207,9 +2205,10 @@ _PyStackRef tup; oparg = CURRENT_OPARG(); values = &stack_pointer[-oparg]; + assert(oparg != 0); PyObject *tup_o = _PyTuple_FromStackRefSteal(values, oparg); if (tup_o == NULL) JUMP_TO_ERROR(); - tup = PyStackRef_FromPyObjectSteal(tup_o); + tup = PyStackRef_FromPyObjectStealMortal(tup_o); stack_pointer[-oparg] = tup; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -2223,7 +2222,7 @@ values = &stack_pointer[-oparg]; PyObject *list_o = _PyList_FromStackRefSteal(values, oparg); if (list_o == NULL) JUMP_TO_ERROR(); - list = PyStackRef_FromPyObjectSteal(list_o); + list = PyStackRef_FromPyObjectStealMortal(list_o); stack_pointer[-oparg] = list; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -2309,7 +2308,7 @@ Py_DECREF(set_o); if (true) JUMP_TO_ERROR(); } - set = PyStackRef_FromPyObjectSteal(set_o); + set = PyStackRef_FromPyObjectStealMortal(set_o); stack_pointer[-oparg] = set; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -2339,7 +2338,7 @@ PyStackRef_CLOSE(values[_i]); } if (map_o == NULL) JUMP_TO_ERROR(); - map = PyStackRef_FromPyObjectSteal(map_o); + map = PyStackRef_FromPyObjectStealMortal(map_o); stack_pointer[-oparg*2] = map; stack_pointer += 1 - oparg*2; assert(WITHIN_STACK_BOUNDS()); @@ -2643,9 +2642,8 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); null = PyStackRef_NULL; - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; break; @@ -2666,9 +2664,8 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); null = PyStackRef_NULL; - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; stack_pointer[0] = null; @@ -2715,8 +2712,7 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; @@ -2770,8 +2766,7 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; @@ -4441,7 +4436,7 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(Py_NewRef(Py_TYPE(arg_o))); + res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o)); PyStackRef_CLOSE(arg); stack_pointer[-3] = res; stack_pointer += -2; @@ -5525,7 +5520,7 @@ frame = tstate->current_frame = prev; LOAD_IP(frame->return_offset); stack_pointer = _PyFrame_GetStackPointer(frame); - res = PyStackRef_FromPyObjectSteal((PyObject *)gen); + res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); LLTRACE_RESUME_FRAME(); stack_pointer[0] = res; stack_pointer += 1; @@ -5550,7 +5545,7 @@ PyStackRef_CLOSE(stop); PyStackRef_XCLOSE(step); if (slice_o == NULL) JUMP_TO_ERROR(); - slice = PyStackRef_FromPyObjectSteal(slice_o); + slice = PyStackRef_FromPyObjectStealMortal(slice_o); stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = slice; stack_pointer += -1 - ((oparg == 3) ? 1 : 0); assert(WITHIN_STACK_BOUNDS()); @@ -5927,8 +5922,7 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); null = PyStackRef_NULL; stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; @@ -5949,8 +5943,7 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); null = PyStackRef_NULL; stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7e82d476c019d2..e1aa9530bd66b3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -623,7 +623,7 @@ PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(str_st); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectImmortal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -675,7 +675,7 @@ assert(WITHIN_STACK_BOUNDS()); goto error; } - list = PyStackRef_FromPyObjectSteal(list_o); + list = PyStackRef_FromPyObjectStealMortal(list_o); stack_pointer[-oparg] = list; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -715,7 +715,7 @@ assert(WITHIN_STACK_BOUNDS()); goto error; } - map = PyStackRef_FromPyObjectSteal(map_o); + map = PyStackRef_FromPyObjectStealMortal(map_o); stack_pointer[-oparg*2] = map; stack_pointer += 1 - oparg*2; assert(WITHIN_STACK_BOUNDS()); @@ -759,7 +759,7 @@ goto error; } } - set = PyStackRef_FromPyObjectSteal(set_o); + set = PyStackRef_FromPyObjectStealMortal(set_o); stack_pointer[-oparg] = set; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -789,7 +789,7 @@ assert(WITHIN_STACK_BOUNDS()); goto error; } - slice = PyStackRef_FromPyObjectSteal(slice_o); + slice = PyStackRef_FromPyObjectStealMortal(slice_o); stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = slice; stack_pointer += -1 - ((oparg == 3) ? 1 : 0); assert(WITHIN_STACK_BOUNDS()); @@ -838,13 +838,14 @@ _PyStackRef *values; _PyStackRef tup; values = &stack_pointer[-oparg]; + assert(oparg != 0); PyObject *tup_o = _PyTuple_FromStackRefSteal(values, oparg); if (tup_o == NULL) { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; } - tup = PyStackRef_FromPyObjectSteal(tup_o); + tup = PyStackRef_FromPyObjectStealMortal(tup_o); stack_pointer[-oparg] = tup; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -3093,7 +3094,7 @@ DEOPT_IF(!PyStackRef_IsNull(null), CALL); DEOPT_IF(callable_o != (PyObject *)&PyType_Type, CALL); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(Py_NewRef(Py_TYPE(arg_o))); + res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o)); PyStackRef_CLOSE(arg); stack_pointer[-3] = res; stack_pointer += -2; @@ -5423,9 +5424,8 @@ PyObject *attr_o = *value_ptr; DEOPT_IF(attr_o == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); null = PyStackRef_NULL; - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); PyStackRef_CLOSE(owner); } /* Skip 5 cache entries */ @@ -5593,8 +5593,7 @@ PyObject *attr_o = ep->me_value; DEOPT_IF(attr_o == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); } @@ -5820,8 +5819,7 @@ attr_o = ep->me_value; DEOPT_IF(attr_o == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); } @@ -6183,8 +6181,7 @@ int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed, LOAD_GLOBAL); #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -6226,8 +6223,7 @@ int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed, LOAD_GLOBAL); #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -6520,7 +6516,7 @@ if (cell == NULL) { goto error; } - SETLOCAL(oparg, PyStackRef_FromPyObjectSteal(cell)); + SETLOCAL(oparg, PyStackRef_FromPyObjectStealMortal(cell)); DISPATCH(); } @@ -7027,7 +7023,7 @@ frame = tstate->current_frame = prev; LOAD_IP(frame->return_offset); stack_pointer = _PyFrame_GetStackPointer(frame); - res = PyStackRef_FromPyObjectSteal((PyObject *)gen); + res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); LLTRACE_RESUME_FRAME(); stack_pointer[0] = res; stack_pointer += 1; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 42bdbd9ca8d0cd..1cae6b4d508abb 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -17,7 +17,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_unknown _Py_uop_sym_new_unknown #define sym_new_not_null _Py_uop_sym_new_not_null #define sym_new_type _Py_uop_sym_new_type -#define sym_is_null _Py_uop_sym_is_null +#define sym_is_null _Py_uop_sym_is_nullGUARD_BUILTINS_VERSION #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_matches_type _Py_uop_sym_matches_type diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6030570f49f90a..37653d0e6dda00 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -384,7 +384,7 @@ def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]: """Find the tokens that make up the left-hand side of an assignment""" offset = 0 for tkn in reversed(node.block.tokens[: idx]): - if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}: + if tkn.kind in {"SEMI", "LBRACE", "RBRACE", "CMACRO"}: return node.block.tokens[idx - offset : idx] offset += 1 return [] @@ -548,6 +548,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", + "PyStackRef_FromPyObjectStealMortal", "PyStackRef_HasCount", "PyStackRef_IsNone", "PyStackRef_Is", From 806c28ab94c216a3e763a72403e604bf6ba01c8d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Dec 2024 18:15:48 +0000 Subject: [PATCH 14/53] Attempt to use tagged RC for a bit of extra speed --- Include/internal/pycore_opcode_metadata.h | 13 ++- Include/internal/pycore_stackref.h | 6 + Include/internal/pycore_uop_ids.h | 105 +++++++++-------- Include/internal/pycore_uop_metadata.h | 37 ++++++ Include/opcode_ids.h | 63 +++++----- Lib/_opcode_metadata.py | 63 +++++----- Python/bytecodes.c | 6 + Python/executor_cases.c.h | 134 ++++++++++++++++++++++ Python/flowgraph.c | 7 ++ Python/generated_cases.c.h | 16 +++ Python/opcode_targets.h | 2 +- Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 9 ++ 13 files changed, 350 insertions(+), 113 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index ed696785599da4..9f3f96c509d59c 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -327,6 +327,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_FAST_LOAD_FAST: return 0; + case LOAD_FAST_WITH_COUNT: + return 0; case LOAD_FROM_DICT_OR_DEREF: return 1; case LOAD_FROM_DICT_OR_GLOBALS: @@ -786,6 +788,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_FAST_LOAD_FAST: return 2; + case LOAD_FAST_WITH_COUNT: + return 1; case LOAD_FROM_DICT_OR_DEREF: return 1; case LOAD_FROM_DICT_OR_GLOBALS: @@ -1555,6 +1559,10 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { *effect = 2; return 0; } + case LOAD_FAST_WITH_COUNT: { + *effect = 1; + return 0; + } case LOAD_FROM_DICT_OR_DEREF: { *effect = 0; return 0; @@ -2080,6 +2088,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST_WITH_COUNT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -2286,6 +2295,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, 0, 0 } } }, [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, 0, 0 } } }, [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, 5, 0 }, { _LOAD_FAST, 6, 0 } } }, + [LOAD_FAST_WITH_COUNT] = { .nuops = 1, .uops = { { _LOAD_FAST_WITH_COUNT, 0, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, [LOAD_GLOBAL] = { .nuops = 1, .uops = { { _LOAD_GLOBAL, 0, 0 } } }, [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION_PUSH_KEYS, 1, 2 }, { _LOAD_GLOBAL_BUILTINS_FROM_KEYS, 1, 3 } } }, @@ -2504,6 +2514,7 @@ const char *_PyOpcode_OpName[266] = { [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK", [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST", + [LOAD_FAST_WITH_COUNT] = "LOAD_FAST_WITH_COUNT", [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", [LOAD_FROM_DICT_OR_GLOBALS] = "LOAD_FROM_DICT_OR_GLOBALS", [LOAD_GLOBAL] = "LOAD_GLOBAL", @@ -2755,6 +2766,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, [LOAD_FAST_CHECK] = LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST, + [LOAD_FAST_WITH_COUNT] = LOAD_FAST_WITH_COUNT, [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF, [LOAD_FROM_DICT_OR_GLOBALS] = LOAD_FROM_DICT_OR_GLOBALS, [LOAD_GLOBAL] = LOAD_GLOBAL, @@ -2833,7 +2845,6 @@ const uint8_t _PyOpcode_Deopt[256] = { #endif // NEED_OPCODE_METADATA #define EXTRA_CASES \ - case 116: \ case 117: \ case 118: \ case 119: \ diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 8424ac8183fb02..209bda6e6be3a6 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -347,6 +347,12 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_IMMORTAL}; } +static inline _PyStackRef +PyStackRef_WithCount(_PyStackRef ref) +{ + return (_PyStackRef){ .bits = ref.bits | Py_TAG_REFCNT }; +} + static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) { diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index fab4ce6a25b347..2b213a5b070ce2 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -215,86 +215,95 @@ extern "C" { #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST +#define _LOAD_FAST_WITH_COUNT 440 +#define _LOAD_FAST_WITH_COUNT_0 441 +#define _LOAD_FAST_WITH_COUNT_1 442 +#define _LOAD_FAST_WITH_COUNT_2 443 +#define _LOAD_FAST_WITH_COUNT_3 444 +#define _LOAD_FAST_WITH_COUNT_4 445 +#define _LOAD_FAST_WITH_COUNT_5 446 +#define _LOAD_FAST_WITH_COUNT_6 447 +#define _LOAD_FAST_WITH_COUNT_7 448 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 440 -#define _LOAD_GLOBAL_BUILTINS 441 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 442 -#define _LOAD_GLOBAL_MODULE 443 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 444 +#define _LOAD_GLOBAL 449 +#define _LOAD_GLOBAL_BUILTINS 450 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 451 +#define _LOAD_GLOBAL_MODULE 452 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 453 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 445 -#define _LOAD_SMALL_INT_0 446 -#define _LOAD_SMALL_INT_1 447 -#define _LOAD_SMALL_INT_2 448 -#define _LOAD_SMALL_INT_3 449 +#define _LOAD_SMALL_INT 454 +#define _LOAD_SMALL_INT_0 455 +#define _LOAD_SMALL_INT_1 456 +#define _LOAD_SMALL_INT_2 457 +#define _LOAD_SMALL_INT_3 458 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 450 +#define _MAKE_CALLARGS_A_TUPLE 459 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 451 +#define _MAKE_WARM 460 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 452 -#define _MAYBE_EXPAND_METHOD_KW 453 -#define _MONITOR_CALL 454 -#define _MONITOR_JUMP_BACKWARD 455 -#define _MONITOR_RESUME 456 +#define _MAYBE_EXPAND_METHOD 461 +#define _MAYBE_EXPAND_METHOD_KW 462 +#define _MONITOR_CALL 463 +#define _MONITOR_JUMP_BACKWARD 464 +#define _MONITOR_RESUME 465 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 457 -#define _POP_JUMP_IF_TRUE 458 +#define _POP_JUMP_IF_FALSE 466 +#define _POP_JUMP_IF_TRUE 467 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 459 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 468 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 460 +#define _PUSH_FRAME 469 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 461 -#define _PY_FRAME_KW 462 -#define _QUICKEN_RESUME 463 -#define _REPLACE_WITH_TRUE 464 +#define _PY_FRAME_GENERAL 470 +#define _PY_FRAME_KW 471 +#define _QUICKEN_RESUME 472 +#define _REPLACE_WITH_TRUE 473 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 465 -#define _SEND 466 -#define _SEND_GEN_FRAME 467 +#define _SAVE_RETURN_OFFSET 474 +#define _SEND 475 +#define _SEND_GEN_FRAME 476 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 468 -#define _STORE_ATTR 469 -#define _STORE_ATTR_INSTANCE_VALUE 470 -#define _STORE_ATTR_SLOT 471 -#define _STORE_ATTR_WITH_HINT 472 +#define _START_EXECUTOR 477 +#define _STORE_ATTR 478 +#define _STORE_ATTR_INSTANCE_VALUE 479 +#define _STORE_ATTR_SLOT 480 +#define _STORE_ATTR_WITH_HINT 481 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 473 -#define _STORE_FAST_0 474 -#define _STORE_FAST_1 475 -#define _STORE_FAST_2 476 -#define _STORE_FAST_3 477 -#define _STORE_FAST_4 478 -#define _STORE_FAST_5 479 -#define _STORE_FAST_6 480 -#define _STORE_FAST_7 481 +#define _STORE_FAST 482 +#define _STORE_FAST_0 483 +#define _STORE_FAST_1 484 +#define _STORE_FAST_2 485 +#define _STORE_FAST_3 486 +#define _STORE_FAST_4 487 +#define _STORE_FAST_5 488 +#define _STORE_FAST_6 489 +#define _STORE_FAST_7 490 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 482 -#define _STORE_SUBSCR 483 +#define _STORE_SLICE 491 +#define _STORE_SUBSCR 492 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 484 -#define _TO_BOOL 485 +#define _TIER2_RESUME_CHECK 493 +#define _TO_BOOL 494 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -304,13 +313,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 486 +#define _UNPACK_SEQUENCE 495 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 486 +#define MAX_UOP_ID 495 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 3becf27422f705..bd6ba7a18f8a2f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -33,6 +33,15 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_0] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_1] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_2] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_3] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_4] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_5] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_6] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_7] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -296,6 +305,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_FAST] = 8, + [_LOAD_FAST_WITH_COUNT] = 8, [_LOAD_SMALL_INT] = 4, [_STORE_FAST] = 8, [_INIT_CALL_PY_EXACT_ARGS] = 5, @@ -487,6 +497,15 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", + [_LOAD_FAST_WITH_COUNT] = "_LOAD_FAST_WITH_COUNT", + [_LOAD_FAST_WITH_COUNT_0] = "_LOAD_FAST_WITH_COUNT_0", + [_LOAD_FAST_WITH_COUNT_1] = "_LOAD_FAST_WITH_COUNT_1", + [_LOAD_FAST_WITH_COUNT_2] = "_LOAD_FAST_WITH_COUNT_2", + [_LOAD_FAST_WITH_COUNT_3] = "_LOAD_FAST_WITH_COUNT_3", + [_LOAD_FAST_WITH_COUNT_4] = "_LOAD_FAST_WITH_COUNT_4", + [_LOAD_FAST_WITH_COUNT_5] = "_LOAD_FAST_WITH_COUNT_5", + [_LOAD_FAST_WITH_COUNT_6] = "_LOAD_FAST_WITH_COUNT_6", + [_LOAD_FAST_WITH_COUNT_7] = "_LOAD_FAST_WITH_COUNT_7", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", [_LOAD_GLOBAL] = "_LOAD_GLOBAL", [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", @@ -607,6 +626,24 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST: return 0; + case _LOAD_FAST_WITH_COUNT_0: + return 0; + case _LOAD_FAST_WITH_COUNT_1: + return 0; + case _LOAD_FAST_WITH_COUNT_2: + return 0; + case _LOAD_FAST_WITH_COUNT_3: + return 0; + case _LOAD_FAST_WITH_COUNT_4: + return 0; + case _LOAD_FAST_WITH_COUNT_5: + return 0; + case _LOAD_FAST_WITH_COUNT_6: + return 0; + case _LOAD_FAST_WITH_COUNT_7: + return 0; + case _LOAD_FAST_WITH_COUNT: + return 0; case _LOAD_FAST_AND_CLEAR: return 0; case _LOAD_FAST_LOAD_FAST: diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index ce3d23eaa6d56d..aa363552b80e9b 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -95,37 +95,38 @@ extern "C" { #define LOAD_FAST_AND_CLEAR 82 #define LOAD_FAST_CHECK 83 #define LOAD_FAST_LOAD_FAST 84 -#define LOAD_FROM_DICT_OR_DEREF 85 -#define LOAD_FROM_DICT_OR_GLOBALS 86 -#define LOAD_GLOBAL 87 -#define LOAD_NAME 88 -#define LOAD_SMALL_INT 89 -#define LOAD_SPECIAL 90 -#define LOAD_SUPER_ATTR 91 -#define MAKE_CELL 92 -#define MAP_ADD 93 -#define MATCH_CLASS 94 -#define POP_JUMP_IF_FALSE 95 -#define POP_JUMP_IF_NONE 96 -#define POP_JUMP_IF_NOT_NONE 97 -#define POP_JUMP_IF_TRUE 98 -#define RAISE_VARARGS 99 -#define RERAISE 100 -#define SEND 101 -#define SET_ADD 102 -#define SET_FUNCTION_ATTRIBUTE 103 -#define SET_UPDATE 104 -#define STORE_ATTR 105 -#define STORE_DEREF 106 -#define STORE_FAST 107 -#define STORE_FAST_LOAD_FAST 108 -#define STORE_FAST_STORE_FAST 109 -#define STORE_GLOBAL 110 -#define STORE_NAME 111 -#define SWAP 112 -#define UNPACK_EX 113 -#define UNPACK_SEQUENCE 114 -#define YIELD_VALUE 115 +#define LOAD_FAST_WITH_COUNT 85 +#define LOAD_FROM_DICT_OR_DEREF 86 +#define LOAD_FROM_DICT_OR_GLOBALS 87 +#define LOAD_GLOBAL 88 +#define LOAD_NAME 89 +#define LOAD_SMALL_INT 90 +#define LOAD_SPECIAL 91 +#define LOAD_SUPER_ATTR 92 +#define MAKE_CELL 93 +#define MAP_ADD 94 +#define MATCH_CLASS 95 +#define POP_JUMP_IF_FALSE 96 +#define POP_JUMP_IF_NONE 97 +#define POP_JUMP_IF_NOT_NONE 98 +#define POP_JUMP_IF_TRUE 99 +#define RAISE_VARARGS 100 +#define RERAISE 101 +#define SEND 102 +#define SET_ADD 103 +#define SET_FUNCTION_ATTRIBUTE 104 +#define SET_UPDATE 105 +#define STORE_ATTR 106 +#define STORE_DEREF 107 +#define STORE_FAST 108 +#define STORE_FAST_LOAD_FAST 109 +#define STORE_FAST_STORE_FAST 110 +#define STORE_GLOBAL 111 +#define STORE_NAME 112 +#define SWAP 113 +#define UNPACK_EX 114 +#define UNPACK_SEQUENCE 115 +#define YIELD_VALUE 116 #define RESUME 149 #define BINARY_OP_ADD_FLOAT 150 #define BINARY_OP_ADD_INT 151 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index cda3c340c322f3..43d4ef96ec0344 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -288,37 +288,38 @@ 'LOAD_FAST_AND_CLEAR': 82, 'LOAD_FAST_CHECK': 83, 'LOAD_FAST_LOAD_FAST': 84, - 'LOAD_FROM_DICT_OR_DEREF': 85, - 'LOAD_FROM_DICT_OR_GLOBALS': 86, - 'LOAD_GLOBAL': 87, - 'LOAD_NAME': 88, - 'LOAD_SMALL_INT': 89, - 'LOAD_SPECIAL': 90, - 'LOAD_SUPER_ATTR': 91, - 'MAKE_CELL': 92, - 'MAP_ADD': 93, - 'MATCH_CLASS': 94, - 'POP_JUMP_IF_FALSE': 95, - 'POP_JUMP_IF_NONE': 96, - 'POP_JUMP_IF_NOT_NONE': 97, - 'POP_JUMP_IF_TRUE': 98, - 'RAISE_VARARGS': 99, - 'RERAISE': 100, - 'SEND': 101, - 'SET_ADD': 102, - 'SET_FUNCTION_ATTRIBUTE': 103, - 'SET_UPDATE': 104, - 'STORE_ATTR': 105, - 'STORE_DEREF': 106, - 'STORE_FAST': 107, - 'STORE_FAST_LOAD_FAST': 108, - 'STORE_FAST_STORE_FAST': 109, - 'STORE_GLOBAL': 110, - 'STORE_NAME': 111, - 'SWAP': 112, - 'UNPACK_EX': 113, - 'UNPACK_SEQUENCE': 114, - 'YIELD_VALUE': 115, + 'LOAD_FAST_WITH_COUNT': 85, + 'LOAD_FROM_DICT_OR_DEREF': 86, + 'LOAD_FROM_DICT_OR_GLOBALS': 87, + 'LOAD_GLOBAL': 88, + 'LOAD_NAME': 89, + 'LOAD_SMALL_INT': 90, + 'LOAD_SPECIAL': 91, + 'LOAD_SUPER_ATTR': 92, + 'MAKE_CELL': 93, + 'MAP_ADD': 94, + 'MATCH_CLASS': 95, + 'POP_JUMP_IF_FALSE': 96, + 'POP_JUMP_IF_NONE': 97, + 'POP_JUMP_IF_NOT_NONE': 98, + 'POP_JUMP_IF_TRUE': 99, + 'RAISE_VARARGS': 100, + 'RERAISE': 101, + 'SEND': 102, + 'SET_ADD': 103, + 'SET_FUNCTION_ATTRIBUTE': 104, + 'SET_UPDATE': 105, + 'STORE_ATTR': 106, + 'STORE_DEREF': 107, + 'STORE_FAST': 108, + 'STORE_FAST_LOAD_FAST': 109, + 'STORE_FAST_STORE_FAST': 110, + 'STORE_GLOBAL': 111, + 'STORE_NAME': 112, + 'SWAP': 113, + 'UNPACK_EX': 114, + 'UNPACK_SEQUENCE': 115, + 'YIELD_VALUE': 116, 'INSTRUMENTED_END_FOR': 237, 'INSTRUMENTED_END_SEND': 238, 'INSTRUMENTED_LOAD_SUPER_ATTR': 239, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0b67e7b0390787..ec64ff500c903b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -266,6 +266,12 @@ dummy_func( value = PyStackRef_DUP(GETLOCAL(oparg)); } + replicate(8) pure inst(LOAD_FAST_WITH_COUNT, (-- value)) { + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + value = PyStackRef_WithCount(local); + } + inst(LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); // do not use SETLOCAL here, it decrefs the old value diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cc407a2f0efd13..39d95e9158519d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -197,6 +197,140 @@ break; } + case _LOAD_FAST_WITH_COUNT_0: { + _PyStackRef value; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_1: { + _PyStackRef value; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_2: { + _PyStackRef value; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_3: { + _PyStackRef value; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_4: { + _PyStackRef value; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_5: { + _PyStackRef value; + oparg = 5; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_6: { + _PyStackRef value; + oparg = 6; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_7: { + _PyStackRef value; + oparg = 7; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT: { + _PyStackRef value; + oparg = CURRENT_OPARG(); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_FAST_AND_CLEAR: { _PyStackRef value; oparg = CURRENT_OPARG(); diff --git a/Python/flowgraph.c b/Python/flowgraph.c index b1097b64469ecd..b75dbda7b9b413 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1871,6 +1871,13 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) continue; } break; + case LOAD_FAST: + /* If the local is consumed immediately, we can use an empheral + * reference count */ + if (nextop == LOAD_ATTR) { + INSTR_SET_OP1(inst, LOAD_FAST_WITH_COUNT, oparg); + } + break; } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e1aa9530bd66b3..fd6f0bd2f5c6d8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5993,6 +5993,22 @@ DISPATCH(); } + TARGET(LOAD_FAST_WITH_COUNT) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(LOAD_FAST_WITH_COUNT); + _PyStackRef value; + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + TARGET(LOAD_FROM_DICT_OR_DEREF) { frame->instr_ptr = next_instr; next_instr += 1; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c93941dcac4abf..505c93604f95cc 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -84,6 +84,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_FAST_AND_CLEAR, &&TARGET_LOAD_FAST_CHECK, &&TARGET_LOAD_FAST_LOAD_FAST, + &&TARGET_LOAD_FAST_WITH_COUNT, &&TARGET_LOAD_FROM_DICT_OR_DEREF, &&TARGET_LOAD_FROM_DICT_OR_GLOBALS, &&TARGET_LOAD_GLOBAL, @@ -147,7 +148,6 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 1cae6b4d508abb..42bdbd9ca8d0cd 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -17,7 +17,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_unknown _Py_uop_sym_new_unknown #define sym_new_not_null _Py_uop_sym_new_not_null #define sym_new_type _Py_uop_sym_new_type -#define sym_is_null _Py_uop_sym_is_nullGUARD_BUILTINS_VERSION +#define sym_is_null _Py_uop_sym_is_null #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_matches_type _Py_uop_sym_matches_type diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index f77a5aa35bdf82..4e1249279f350d 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -47,6 +47,15 @@ break; } + case _LOAD_FAST_WITH_COUNT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_FAST_AND_CLEAR: { _Py_UopsSymbol *value; value = GETLOCAL(oparg); From 0c20416a5b0d841e2e44c2db42305bc357cb1104 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 11:19:09 +0000 Subject: [PATCH 15/53] Revert addition of LOAD_FAST_WITH_COUNT --- Include/internal/pycore_opcode_metadata.h | 13 +-- Include/internal/pycore_uop_ids.h | 105 ++++++++--------- Include/internal/pycore_uop_metadata.h | 37 ------ Include/opcode_ids.h | 63 +++++----- Lib/_opcode_metadata.py | 63 +++++----- Python/bytecodes.c | 6 - Python/executor_cases.c.h | 134 ---------------------- Python/flowgraph.c | 7 -- Python/gc.c | 6 +- Python/generated_cases.c.h | 16 --- Python/opcode_targets.h | 2 +- Python/optimizer_cases.c.h | 9 -- 12 files changed, 114 insertions(+), 347 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 9f3f96c509d59c..ed696785599da4 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -327,8 +327,6 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_FAST_LOAD_FAST: return 0; - case LOAD_FAST_WITH_COUNT: - return 0; case LOAD_FROM_DICT_OR_DEREF: return 1; case LOAD_FROM_DICT_OR_GLOBALS: @@ -788,8 +786,6 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_FAST_LOAD_FAST: return 2; - case LOAD_FAST_WITH_COUNT: - return 1; case LOAD_FROM_DICT_OR_DEREF: return 1; case LOAD_FROM_DICT_OR_GLOBALS: @@ -1559,10 +1555,6 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { *effect = 2; return 0; } - case LOAD_FAST_WITH_COUNT: { - *effect = 1; - return 0; - } case LOAD_FROM_DICT_OR_DEREF: { *effect = 0; return 0; @@ -2088,7 +2080,6 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, - [LOAD_FAST_WITH_COUNT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -2295,7 +2286,6 @@ _PyOpcode_macro_expansion[256] = { [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, 0, 0 } } }, [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, 0, 0 } } }, [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, 5, 0 }, { _LOAD_FAST, 6, 0 } } }, - [LOAD_FAST_WITH_COUNT] = { .nuops = 1, .uops = { { _LOAD_FAST_WITH_COUNT, 0, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, [LOAD_GLOBAL] = { .nuops = 1, .uops = { { _LOAD_GLOBAL, 0, 0 } } }, [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION_PUSH_KEYS, 1, 2 }, { _LOAD_GLOBAL_BUILTINS_FROM_KEYS, 1, 3 } } }, @@ -2514,7 +2504,6 @@ const char *_PyOpcode_OpName[266] = { [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK", [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST", - [LOAD_FAST_WITH_COUNT] = "LOAD_FAST_WITH_COUNT", [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", [LOAD_FROM_DICT_OR_GLOBALS] = "LOAD_FROM_DICT_OR_GLOBALS", [LOAD_GLOBAL] = "LOAD_GLOBAL", @@ -2766,7 +2755,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, [LOAD_FAST_CHECK] = LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST, - [LOAD_FAST_WITH_COUNT] = LOAD_FAST_WITH_COUNT, [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF, [LOAD_FROM_DICT_OR_GLOBALS] = LOAD_FROM_DICT_OR_GLOBALS, [LOAD_GLOBAL] = LOAD_GLOBAL, @@ -2845,6 +2833,7 @@ const uint8_t _PyOpcode_Deopt[256] = { #endif // NEED_OPCODE_METADATA #define EXTRA_CASES \ + case 116: \ case 117: \ case 118: \ case 119: \ diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 2b213a5b070ce2..fab4ce6a25b347 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -215,95 +215,86 @@ extern "C" { #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST -#define _LOAD_FAST_WITH_COUNT 440 -#define _LOAD_FAST_WITH_COUNT_0 441 -#define _LOAD_FAST_WITH_COUNT_1 442 -#define _LOAD_FAST_WITH_COUNT_2 443 -#define _LOAD_FAST_WITH_COUNT_3 444 -#define _LOAD_FAST_WITH_COUNT_4 445 -#define _LOAD_FAST_WITH_COUNT_5 446 -#define _LOAD_FAST_WITH_COUNT_6 447 -#define _LOAD_FAST_WITH_COUNT_7 448 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 449 -#define _LOAD_GLOBAL_BUILTINS 450 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 451 -#define _LOAD_GLOBAL_MODULE 452 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 453 +#define _LOAD_GLOBAL 440 +#define _LOAD_GLOBAL_BUILTINS 441 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 442 +#define _LOAD_GLOBAL_MODULE 443 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 444 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 454 -#define _LOAD_SMALL_INT_0 455 -#define _LOAD_SMALL_INT_1 456 -#define _LOAD_SMALL_INT_2 457 -#define _LOAD_SMALL_INT_3 458 +#define _LOAD_SMALL_INT 445 +#define _LOAD_SMALL_INT_0 446 +#define _LOAD_SMALL_INT_1 447 +#define _LOAD_SMALL_INT_2 448 +#define _LOAD_SMALL_INT_3 449 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 459 +#define _MAKE_CALLARGS_A_TUPLE 450 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 460 +#define _MAKE_WARM 451 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 461 -#define _MAYBE_EXPAND_METHOD_KW 462 -#define _MONITOR_CALL 463 -#define _MONITOR_JUMP_BACKWARD 464 -#define _MONITOR_RESUME 465 +#define _MAYBE_EXPAND_METHOD 452 +#define _MAYBE_EXPAND_METHOD_KW 453 +#define _MONITOR_CALL 454 +#define _MONITOR_JUMP_BACKWARD 455 +#define _MONITOR_RESUME 456 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 466 -#define _POP_JUMP_IF_TRUE 467 +#define _POP_JUMP_IF_FALSE 457 +#define _POP_JUMP_IF_TRUE 458 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 468 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 459 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 469 +#define _PUSH_FRAME 460 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 470 -#define _PY_FRAME_KW 471 -#define _QUICKEN_RESUME 472 -#define _REPLACE_WITH_TRUE 473 +#define _PY_FRAME_GENERAL 461 +#define _PY_FRAME_KW 462 +#define _QUICKEN_RESUME 463 +#define _REPLACE_WITH_TRUE 464 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 474 -#define _SEND 475 -#define _SEND_GEN_FRAME 476 +#define _SAVE_RETURN_OFFSET 465 +#define _SEND 466 +#define _SEND_GEN_FRAME 467 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 477 -#define _STORE_ATTR 478 -#define _STORE_ATTR_INSTANCE_VALUE 479 -#define _STORE_ATTR_SLOT 480 -#define _STORE_ATTR_WITH_HINT 481 +#define _START_EXECUTOR 468 +#define _STORE_ATTR 469 +#define _STORE_ATTR_INSTANCE_VALUE 470 +#define _STORE_ATTR_SLOT 471 +#define _STORE_ATTR_WITH_HINT 472 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 482 -#define _STORE_FAST_0 483 -#define _STORE_FAST_1 484 -#define _STORE_FAST_2 485 -#define _STORE_FAST_3 486 -#define _STORE_FAST_4 487 -#define _STORE_FAST_5 488 -#define _STORE_FAST_6 489 -#define _STORE_FAST_7 490 +#define _STORE_FAST 473 +#define _STORE_FAST_0 474 +#define _STORE_FAST_1 475 +#define _STORE_FAST_2 476 +#define _STORE_FAST_3 477 +#define _STORE_FAST_4 478 +#define _STORE_FAST_5 479 +#define _STORE_FAST_6 480 +#define _STORE_FAST_7 481 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 491 -#define _STORE_SUBSCR 492 +#define _STORE_SLICE 482 +#define _STORE_SUBSCR 483 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 493 -#define _TO_BOOL 494 +#define _TIER2_RESUME_CHECK 484 +#define _TO_BOOL 485 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -313,13 +304,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 495 +#define _UNPACK_SEQUENCE 486 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 495 +#define MAX_UOP_ID 486 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index bd6ba7a18f8a2f..3becf27422f705 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -33,15 +33,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_0] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_1] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_2] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_3] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_4] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_5] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_6] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_7] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -305,7 +296,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_FAST] = 8, - [_LOAD_FAST_WITH_COUNT] = 8, [_LOAD_SMALL_INT] = 4, [_STORE_FAST] = 8, [_INIT_CALL_PY_EXACT_ARGS] = 5, @@ -497,15 +487,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", - [_LOAD_FAST_WITH_COUNT] = "_LOAD_FAST_WITH_COUNT", - [_LOAD_FAST_WITH_COUNT_0] = "_LOAD_FAST_WITH_COUNT_0", - [_LOAD_FAST_WITH_COUNT_1] = "_LOAD_FAST_WITH_COUNT_1", - [_LOAD_FAST_WITH_COUNT_2] = "_LOAD_FAST_WITH_COUNT_2", - [_LOAD_FAST_WITH_COUNT_3] = "_LOAD_FAST_WITH_COUNT_3", - [_LOAD_FAST_WITH_COUNT_4] = "_LOAD_FAST_WITH_COUNT_4", - [_LOAD_FAST_WITH_COUNT_5] = "_LOAD_FAST_WITH_COUNT_5", - [_LOAD_FAST_WITH_COUNT_6] = "_LOAD_FAST_WITH_COUNT_6", - [_LOAD_FAST_WITH_COUNT_7] = "_LOAD_FAST_WITH_COUNT_7", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", [_LOAD_GLOBAL] = "_LOAD_GLOBAL", [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", @@ -626,24 +607,6 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST: return 0; - case _LOAD_FAST_WITH_COUNT_0: - return 0; - case _LOAD_FAST_WITH_COUNT_1: - return 0; - case _LOAD_FAST_WITH_COUNT_2: - return 0; - case _LOAD_FAST_WITH_COUNT_3: - return 0; - case _LOAD_FAST_WITH_COUNT_4: - return 0; - case _LOAD_FAST_WITH_COUNT_5: - return 0; - case _LOAD_FAST_WITH_COUNT_6: - return 0; - case _LOAD_FAST_WITH_COUNT_7: - return 0; - case _LOAD_FAST_WITH_COUNT: - return 0; case _LOAD_FAST_AND_CLEAR: return 0; case _LOAD_FAST_LOAD_FAST: diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index aa363552b80e9b..ce3d23eaa6d56d 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -95,38 +95,37 @@ extern "C" { #define LOAD_FAST_AND_CLEAR 82 #define LOAD_FAST_CHECK 83 #define LOAD_FAST_LOAD_FAST 84 -#define LOAD_FAST_WITH_COUNT 85 -#define LOAD_FROM_DICT_OR_DEREF 86 -#define LOAD_FROM_DICT_OR_GLOBALS 87 -#define LOAD_GLOBAL 88 -#define LOAD_NAME 89 -#define LOAD_SMALL_INT 90 -#define LOAD_SPECIAL 91 -#define LOAD_SUPER_ATTR 92 -#define MAKE_CELL 93 -#define MAP_ADD 94 -#define MATCH_CLASS 95 -#define POP_JUMP_IF_FALSE 96 -#define POP_JUMP_IF_NONE 97 -#define POP_JUMP_IF_NOT_NONE 98 -#define POP_JUMP_IF_TRUE 99 -#define RAISE_VARARGS 100 -#define RERAISE 101 -#define SEND 102 -#define SET_ADD 103 -#define SET_FUNCTION_ATTRIBUTE 104 -#define SET_UPDATE 105 -#define STORE_ATTR 106 -#define STORE_DEREF 107 -#define STORE_FAST 108 -#define STORE_FAST_LOAD_FAST 109 -#define STORE_FAST_STORE_FAST 110 -#define STORE_GLOBAL 111 -#define STORE_NAME 112 -#define SWAP 113 -#define UNPACK_EX 114 -#define UNPACK_SEQUENCE 115 -#define YIELD_VALUE 116 +#define LOAD_FROM_DICT_OR_DEREF 85 +#define LOAD_FROM_DICT_OR_GLOBALS 86 +#define LOAD_GLOBAL 87 +#define LOAD_NAME 88 +#define LOAD_SMALL_INT 89 +#define LOAD_SPECIAL 90 +#define LOAD_SUPER_ATTR 91 +#define MAKE_CELL 92 +#define MAP_ADD 93 +#define MATCH_CLASS 94 +#define POP_JUMP_IF_FALSE 95 +#define POP_JUMP_IF_NONE 96 +#define POP_JUMP_IF_NOT_NONE 97 +#define POP_JUMP_IF_TRUE 98 +#define RAISE_VARARGS 99 +#define RERAISE 100 +#define SEND 101 +#define SET_ADD 102 +#define SET_FUNCTION_ATTRIBUTE 103 +#define SET_UPDATE 104 +#define STORE_ATTR 105 +#define STORE_DEREF 106 +#define STORE_FAST 107 +#define STORE_FAST_LOAD_FAST 108 +#define STORE_FAST_STORE_FAST 109 +#define STORE_GLOBAL 110 +#define STORE_NAME 111 +#define SWAP 112 +#define UNPACK_EX 113 +#define UNPACK_SEQUENCE 114 +#define YIELD_VALUE 115 #define RESUME 149 #define BINARY_OP_ADD_FLOAT 150 #define BINARY_OP_ADD_INT 151 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index 43d4ef96ec0344..cda3c340c322f3 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -288,38 +288,37 @@ 'LOAD_FAST_AND_CLEAR': 82, 'LOAD_FAST_CHECK': 83, 'LOAD_FAST_LOAD_FAST': 84, - 'LOAD_FAST_WITH_COUNT': 85, - 'LOAD_FROM_DICT_OR_DEREF': 86, - 'LOAD_FROM_DICT_OR_GLOBALS': 87, - 'LOAD_GLOBAL': 88, - 'LOAD_NAME': 89, - 'LOAD_SMALL_INT': 90, - 'LOAD_SPECIAL': 91, - 'LOAD_SUPER_ATTR': 92, - 'MAKE_CELL': 93, - 'MAP_ADD': 94, - 'MATCH_CLASS': 95, - 'POP_JUMP_IF_FALSE': 96, - 'POP_JUMP_IF_NONE': 97, - 'POP_JUMP_IF_NOT_NONE': 98, - 'POP_JUMP_IF_TRUE': 99, - 'RAISE_VARARGS': 100, - 'RERAISE': 101, - 'SEND': 102, - 'SET_ADD': 103, - 'SET_FUNCTION_ATTRIBUTE': 104, - 'SET_UPDATE': 105, - 'STORE_ATTR': 106, - 'STORE_DEREF': 107, - 'STORE_FAST': 108, - 'STORE_FAST_LOAD_FAST': 109, - 'STORE_FAST_STORE_FAST': 110, - 'STORE_GLOBAL': 111, - 'STORE_NAME': 112, - 'SWAP': 113, - 'UNPACK_EX': 114, - 'UNPACK_SEQUENCE': 115, - 'YIELD_VALUE': 116, + 'LOAD_FROM_DICT_OR_DEREF': 85, + 'LOAD_FROM_DICT_OR_GLOBALS': 86, + 'LOAD_GLOBAL': 87, + 'LOAD_NAME': 88, + 'LOAD_SMALL_INT': 89, + 'LOAD_SPECIAL': 90, + 'LOAD_SUPER_ATTR': 91, + 'MAKE_CELL': 92, + 'MAP_ADD': 93, + 'MATCH_CLASS': 94, + 'POP_JUMP_IF_FALSE': 95, + 'POP_JUMP_IF_NONE': 96, + 'POP_JUMP_IF_NOT_NONE': 97, + 'POP_JUMP_IF_TRUE': 98, + 'RAISE_VARARGS': 99, + 'RERAISE': 100, + 'SEND': 101, + 'SET_ADD': 102, + 'SET_FUNCTION_ATTRIBUTE': 103, + 'SET_UPDATE': 104, + 'STORE_ATTR': 105, + 'STORE_DEREF': 106, + 'STORE_FAST': 107, + 'STORE_FAST_LOAD_FAST': 108, + 'STORE_FAST_STORE_FAST': 109, + 'STORE_GLOBAL': 110, + 'STORE_NAME': 111, + 'SWAP': 112, + 'UNPACK_EX': 113, + 'UNPACK_SEQUENCE': 114, + 'YIELD_VALUE': 115, 'INSTRUMENTED_END_FOR': 237, 'INSTRUMENTED_END_SEND': 238, 'INSTRUMENTED_LOAD_SUPER_ATTR': 239, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ec64ff500c903b..0b67e7b0390787 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -266,12 +266,6 @@ dummy_func( value = PyStackRef_DUP(GETLOCAL(oparg)); } - replicate(8) pure inst(LOAD_FAST_WITH_COUNT, (-- value)) { - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - value = PyStackRef_WithCount(local); - } - inst(LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); // do not use SETLOCAL here, it decrefs the old value diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 39d95e9158519d..cc407a2f0efd13 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -197,140 +197,6 @@ break; } - case _LOAD_FAST_WITH_COUNT_0: { - _PyStackRef value; - oparg = 0; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_1: { - _PyStackRef value; - oparg = 1; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_2: { - _PyStackRef value; - oparg = 2; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_3: { - _PyStackRef value; - oparg = 3; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_4: { - _PyStackRef value; - oparg = 4; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_5: { - _PyStackRef value; - oparg = 5; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_6: { - _PyStackRef value; - oparg = 6; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_7: { - _PyStackRef value; - oparg = 7; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT: { - _PyStackRef value; - oparg = CURRENT_OPARG(); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - case _LOAD_FAST_AND_CLEAR: { _PyStackRef value; oparg = CURRENT_OPARG(); diff --git a/Python/flowgraph.c b/Python/flowgraph.c index b75dbda7b9b413..b1097b64469ecd 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1871,13 +1871,6 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) continue; } break; - case LOAD_FAST: - /* If the local is consumed immediately, we can use an empheral - * reference count */ - if (nextop == LOAD_ATTR) { - INSTR_SET_OP1(inst, LOAD_FAST_WITH_COUNT, oparg); - } - break; } } diff --git a/Python/gc.c b/Python/gc.c index fd29a48518e71b..b999f6a5927c78 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1438,11 +1438,9 @@ frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_s while (sp > locals) { sp--; _PyStackRef ref = *sp; - if (!PyStackRef_IsNull(ref)) { + if (PyStackRef_IsMortal(ref)) { PyObject *op = PyStackRef_AsPyObjectBorrow(ref); - if (!_Py_IsImmortal(op)) { - move_unvisited(op, stack, visited_space); - } + move_unvisited(op, stack, visited_space); } } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index fd6f0bd2f5c6d8..e1aa9530bd66b3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5993,22 +5993,6 @@ DISPATCH(); } - TARGET(LOAD_FAST_WITH_COUNT) { - frame->instr_ptr = next_instr; - next_instr += 1; - INSTRUCTION_STATS(LOAD_FAST_WITH_COUNT); - _PyStackRef value; - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - DISPATCH(); - } - TARGET(LOAD_FROM_DICT_OR_DEREF) { frame->instr_ptr = next_instr; next_instr += 1; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 505c93604f95cc..c93941dcac4abf 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -84,7 +84,6 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_FAST_AND_CLEAR, &&TARGET_LOAD_FAST_CHECK, &&TARGET_LOAD_FAST_LOAD_FAST, - &&TARGET_LOAD_FAST_WITH_COUNT, &&TARGET_LOAD_FROM_DICT_OR_DEREF, &&TARGET_LOAD_FROM_DICT_OR_GLOBALS, &&TARGET_LOAD_GLOBAL, @@ -148,6 +147,7 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, + &&_unknown_opcode, &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 4e1249279f350d..f77a5aa35bdf82 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -47,15 +47,6 @@ break; } - case _LOAD_FAST_WITH_COUNT: { - _Py_UopsSymbol *value; - value = sym_new_not_null(ctx); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - case _LOAD_FAST_AND_CLEAR: { _Py_UopsSymbol *value; value = GETLOCAL(oparg); From c88bcbceb11db0c334034c4b7bf95596b16a729b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 17:47:24 +0000 Subject: [PATCH 16/53] Avoid INCREF + Steal pairs by using New or return stack ref directly --- Include/internal/pycore_ceval.h | 2 +- Include/internal/pycore_long.h | 2 +- Objects/floatobject.c | 27 ++++++++++++----------- Objects/longobject.c | 29 +++++++++++++++++++++++-- Python/bytecodes.c | 38 ++++++++++++++------------------- Python/executor_cases.c.h | 35 ++++++++++++++---------------- Python/generated_cases.c.h | 35 ++++++++++++++---------------- 7 files changed, 92 insertions(+), 76 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 80bd19a887871c..e33622c1a040e5 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -325,7 +325,7 @@ _Py_eval_breaker_bit_is_set(PyThreadState *tstate, uintptr_t bit) void _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); void _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); -PyAPI_FUNC(PyObject *) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value); +PyAPI_FUNC(_PyStackRef) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value); #ifdef __cplusplus diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 196b4152280a35..3ba98507b2a590 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -109,7 +109,7 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, int64_t); // Export for 'math' shared extension PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, int64_t); -PyAPI_FUNC(PyObject*) _PyLong_Add(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(_PyStackRef) _PyLong_Add(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(PyObject*) _PyLong_Multiply(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(PyObject*) _PyLong_Subtract(PyLongObject *left, PyLongObject *right); diff --git a/Objects/floatobject.c b/Objects/floatobject.c index bcc77287454768..06e2438cad8f0c 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -136,34 +136,37 @@ PyFloat_FromDouble(double fval) #ifdef Py_GIL_DISABLED -PyObject *_PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) +_PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) { PyStackRef_CLOSE(left); PyStackRef_CLOSE(right); - return PyFloat_FromDouble(value); + return PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(value)); } #else // Py_GIL_DISABLED -PyObject *_PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) +_PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) { - PyObject *left_o = PyStackRef_AsPyObjectSteal(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); if (Py_REFCNT(left_o) == 1) { ((PyFloatObject *)left_o)->ob_fval = value; - _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); - return left_o; + PyStackRef_CLOSE(right); + return left; } else if (Py_REFCNT(right_o) == 1) { ((PyFloatObject *)right_o)->ob_fval = value; - _Py_DECREF_NO_DEALLOC(left_o); - return right_o; + PyStackRef_CLOSE(left); + return right; } else { PyObject *result = PyFloat_FromDouble(value); - _Py_DECREF_NO_DEALLOC(left_o); - _Py_DECREF_NO_DEALLOC(right_o); - return result; + PyStackRef_CLOSE(left); + PyStackRef_CLOSE(right); + if (result == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectStealMortal(result); } } diff --git a/Objects/longobject.c b/Objects/longobject.c index 4aa35685b509f2..8db51301b63229 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -286,6 +286,27 @@ _PyLong_FromSTwoDigits(stwodigits x) return (PyLongObject*)_PyLong_FromLarge(x); } +/* Create a new int object from a C word-sized int, return a stackref */ +static inline _PyStackRef +_PyLongRef_FromSTwoDigitsRef(stwodigits x) +{ + if (IS_SMALL_INT(x)) { + return PyStackRef_FromPyObjectImmortal(get_small_int((sdigit)x)); + } + assert(x != 0); + PyObject *res; + if (is_medium_int(x)) { + res = _PyLong_FromMedium((sdigit)x); + } + else { + res = _PyLong_FromLarge(x); + } + if (res == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectStealMortal(res); +} + /* If a freshly-allocated int is already shared, it must be a small integer, so negating it must go to PyLong_FromLong */ Py_LOCAL_INLINE(void) @@ -3812,10 +3833,14 @@ long_add(PyLongObject *a, PyLongObject *b) return z; } -PyObject * +_PyStackRef _PyLong_Add(PyLongObject *a, PyLongObject *b) { - return (PyObject*)long_add(a, b); + if (_PyLong_BothAreCompact(a, b)) { + stwodigits z = medium_value(a) + medium_value(b); + return _PyLongRef_FromSTwoDigitsRef(z); + } + return PyStackRef_FromPyObjectSteal((PyObject*)long_add(a, b)); } static PyObject * diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0b67e7b0390787..3f0906013de721 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -542,12 +542,11 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); INPUTS_DEAD(); - ERROR_IF(res_o == NULL, error); - res = PyStackRef_FromPyObjectSteal(res_o); + ERROR_IF(PyStackRef_IsNull(res), error); } pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { @@ -595,10 +594,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); - ERROR_IF(res_o == NULL, error); - res = PyStackRef_FromPyObjectSteal(res_o); + ERROR_IF(PyStackRef_IsNull(res), error); } pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { @@ -609,10 +607,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); - ERROR_IF(res_o == NULL, error); - res = PyStackRef_FromPyObjectSteal(res_o); + ERROR_IF(PyStackRef_IsNull(res), error); } pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { @@ -623,10 +620,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); - ERROR_IF(res_o == NULL, error); - res = PyStackRef_FromPyObjectSteal(res_o); + ERROR_IF(PyStackRef_IsNull(res), error); } macro(BINARY_OP_MULTIPLY_FLOAT) = @@ -807,17 +803,18 @@ dummy_func( PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); DEOPT_IF(res_o == NULL); STAT_INC(BINARY_SUBSCR, hit); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + DEAD(sub_st); + PyStackRef_CLOSE(list_st); + res = PyStackRef_FromPyObjectSteal(res_o); #else DEOPT_IF(index >= PyList_GET_SIZE(list)); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); - Py_INCREF(res_o); + res = PyStackRef_FromPyObjectNew(res_o); + DECREF_INPUTS(); #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); - DEAD(sub_st); - PyStackRef_CLOSE(list_st); - res = PyStackRef_FromPyObjectSteal(res_o); } inst(BINARY_SUBSCR_STR_INT, (unused/1, str_st, sub_st -- res)) { @@ -854,11 +851,8 @@ dummy_func( STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); - Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); - DEAD(sub_st); - PyStackRef_CLOSE(tuple_st); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); + DECREF_INPUTS(); } inst(BINARY_SUBSCR_DICT, (unused/1, dict_st, sub_st -- res)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cc407a2f0efd13..9ba99dd5f1c057 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -683,11 +683,10 @@ PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); - if (res_o == NULL) JUMP_TO_ERROR(); - res = PyStackRef_FromPyObjectSteal(res_o); + if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -766,9 +765,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) JUMP_TO_ERROR(); - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -787,9 +785,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) JUMP_TO_ERROR(); - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -808,9 +805,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) JUMP_TO_ERROR(); - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -1027,6 +1023,9 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(BINARY_SUBSCR, hit); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE(list_st); + res = PyStackRef_FromPyObjectSteal(res_o); #else if (index >= PyList_GET_SIZE(list)) { UOP_STAT_INC(uopcode, miss); @@ -1035,11 +1034,10 @@ STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); - Py_INCREF(res_o); - #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + res = PyStackRef_FromPyObjectNew(res_o); PyStackRef_CLOSE(list_st); - res = PyStackRef_FromPyObjectSteal(res_o); + PyStackRef_CLOSE(sub_st); + #endif stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -1117,10 +1115,9 @@ STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); - Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + res = PyStackRef_FromPyObjectNew(res_o); PyStackRef_CLOSE(tuple_st); - res = PyStackRef_FromPyObjectSteal(res_o); + PyStackRef_CLOSE(sub_st); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e1aa9530bd66b3..30e7cab4d56444 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -84,9 +84,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) goto pop_2_error; - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) goto pop_2_error; } stack_pointer[-2] = res; stack_pointer += -1; @@ -117,11 +116,10 @@ PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); - if (res_o == NULL) goto pop_2_error; - res = PyStackRef_FromPyObjectSteal(res_o); + if (PyStackRef_IsNull(res)) goto pop_2_error; } stack_pointer[-2] = res; stack_pointer += -1; @@ -250,9 +248,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) goto pop_2_error; - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) goto pop_2_error; } stack_pointer[-2] = res; stack_pointer += -1; @@ -321,9 +318,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) goto pop_2_error; - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) goto pop_2_error; } stack_pointer[-2] = res; stack_pointer += -1; @@ -582,16 +578,18 @@ stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(res_o == NULL, BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE(list_st); + res = PyStackRef_FromPyObjectSteal(res_o); #else DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); - Py_INCREF(res_o); - #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + res = PyStackRef_FromPyObjectNew(res_o); PyStackRef_CLOSE(list_st); - res = PyStackRef_FromPyObjectSteal(res_o); + PyStackRef_CLOSE(sub_st); + #endif stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -652,10 +650,9 @@ STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); - Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + res = PyStackRef_FromPyObjectNew(res_o); PyStackRef_CLOSE(tuple_st); - res = PyStackRef_FromPyObjectSteal(res_o); + PyStackRef_CLOSE(sub_st); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); From f5dec96adef1fc25395b0aaea14e6f94d1320181 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 18:16:56 +0000 Subject: [PATCH 17/53] Avoid masking when comparing to True and False --- Include/internal/pycore_stackref.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 209bda6e6be3a6..eadf4a222a15fb 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -219,9 +219,9 @@ static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; /* We should be able to guarantee that the tag bits are set for immortal objects */ -#define PyStackRef_IsTrue(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_TrueStruct)) -#define PyStackRef_IsFalse(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_FalseStruct)) - +#define PyStackRef_IsTrue(REF) ((REF).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL)) +#define PyStackRef_IsFalse(REF) ((REF).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL)) +#define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) #ifdef Py_DEBUG @@ -244,20 +244,9 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { } } -static inline int -PyStackRef_IsNone(_PyStackRef ref) -{ - if ((ref.bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) { - assert ((ref.bits & Py_TAG_BITS) == Py_TAG_IMMORTAL); - return 1; - } - return 0; -} - #else #define PyStackRef_CheckValid(REF) ((void)0) -#define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) #endif From 1069d98d1e82adbe814be7d472d04b6a4056ece7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 10 Dec 2024 15:21:50 +0000 Subject: [PATCH 18/53] Revert not-quite-true assert --- Include/internal/pycore_stackref.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index eadf4a222a15fb..dd9aab4a97f93d 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -230,7 +230,6 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { PyObject *obj = BITS_TO_PTR_MASKED(ref); switch (tag) { case 0: - assert(!_Py_IsImmortal(obj)); break; case Py_TAG_REFCNT: /* Can be immortal if object was made immortal after reference came into existence */ From d23ae47507a849026c67fd2497a163a93df4cb1b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 12 Dec 2024 16:10:10 +0000 Subject: [PATCH 19/53] Fix up after merge --- Include/internal/pycore_opcode_metadata.h | 4 ++-- Include/internal/pycore_stackref.h | 4 ++-- Include/internal/pycore_uop_metadata.h | 2 +- Include/refcount.h | 2 +- Python/bytecodes.c | 7 +++---- Python/ceval.c | 4 +++- Python/executor_cases.c.h | 13 ++++++------ Python/generated_cases.c.h | 24 ++++++++--------------- 8 files changed, 27 insertions(+), 33 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 208ca8d13d823e..ed696785599da4 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1975,9 +1975,9 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [CALL_INTRINSIC_2] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ISINSTANCE] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 4339531ea2bd86..282fc635b7e957 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -226,11 +226,11 @@ static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #ifdef Py_DEBUG static inline void PyStackRef_CheckValid(_PyStackRef ref) { + assert(ref.bits != 0); int tag = ref.bits & Py_TAG_BITS; PyObject *obj = BITS_TO_PTR_MASKED(ref); switch (tag) { case 0: - break; case Py_TAG_REFCNT: /* Can be immortal if object was made immortal after reference came into existence */ assert(obj != NULL && obj != Py_True && obj != Py_False && obj != Py_None); @@ -346,7 +346,7 @@ PyStackRef_DUP(_PyStackRef ref) { assert(!PyStackRef_IsNull(ref)); if (!PyStackRef_HasCount(ref)) { - Py_INCREF(BITS_TO_PTR(ref)); + Py_INCREF_MORTAL(BITS_TO_PTR(ref)); } return ref; } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 0c00d51dca6e79..3becf27422f705 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -248,7 +248,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_METHOD_DESCRIPTOR_NOARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_FAST] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MAYBE_EXPAND_METHOD_KW] = HAS_ARG_FLAG, - [_PY_FRAME_KW] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_PY_FRAME_KW] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_FUNCTION_VERSION_KW] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_CHECK_METHOD_VERSION_KW] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_EXPAND_METHOD_KW] = HAS_ARG_FLAG, diff --git a/Include/refcount.h b/Include/refcount.h index 26a8da661525d1..f78343f8f0a559 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -245,7 +245,7 @@ PyAPI_FUNC(void) _Py_DecRef(PyObject *); #ifndef Py_GIL_DISABLED static inline Py_ALWAYS_INLINE void Py_INCREF_MORTAL(PyObject *op) { - assert(!_Py_IsImmortal(op)); + assert(!_Py_IsStaticImmortal(op)); op->ob_refcnt++; _Py_INCREF_STAT_INC(); #if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f7ec71b7891b0f..3df5ae7324d7b9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4312,7 +4312,7 @@ dummy_func( assert(Py_TYPE(callable_o) == &PyFunction_Type); int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -4320,9 +4320,8 @@ dummy_func( // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. SYNC_SP(); - if (new_frame == NULL) { - ERROR_NO_POP(); - } + ERROR_IF(temp == NULL, error); + new_frame = temp; } op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable[1], self_or_null[1], unused[oparg], kwnames -- callable[1], self_or_null[1], unused[oparg], kwnames)) { diff --git a/Python/ceval.c b/Python/ceval.c index 7531bbae293ef4..4e4fae2350ce64 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1500,7 +1500,9 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, { PyCodeObject *co = (PyCodeObject*)func->func_code; const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount; - + for (Py_ssize_t i = 0; i < argcount; i++) { + PyStackRef_CheckValid(args[i]); + } /* Create a dictionary for keyword parameters (**kwags) */ PyObject *kwdict; Py_ssize_t i; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 720cb0f4a18b21..e9f05a603c6559 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5263,7 +5263,7 @@ int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -5271,12 +5271,13 @@ PyStackRef_CLOSE(kwnames); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; - stack_pointer += -2 - oparg; + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + if (temp == NULL) JUMP_TO_ERROR(); + new_frame = temp; + stack_pointer[0].bits = (uintptr_t)new_frame; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - JUMP_TO_ERROR(); - } break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 4c4111b068124f..a59e3198d7efd8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2096,7 +2096,7 @@ int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -2104,12 +2104,10 @@ PyStackRef_CLOSE(kwnames); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; - stack_pointer += -2 - oparg; + stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - goto error; - } + if (temp == NULL) goto error; + new_frame = temp; } // _SAVE_RETURN_OFFSET { @@ -2126,8 +2124,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); @@ -2274,7 +2270,7 @@ int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -2282,12 +2278,10 @@ PyStackRef_CLOSE(kwnames); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; - stack_pointer += -2 - oparg; + stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - goto error; - } + if (temp == NULL) goto error; + new_frame = temp; } // _SAVE_RETURN_OFFSET { @@ -2304,8 +2298,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); From d230f6868d4b791fb8571ee852ce78699a7d37db Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 13 Dec 2024 11:27:44 +0000 Subject: [PATCH 20/53] Fix compiler errors and warnings --- Include/internal/pycore_frame.h | 1 - Python/ceval.c | 2 +- Python/optimizer_bytecodes.c | 3 +-- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 1d35075b7486e8..febd96e5ffa17b 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -158,7 +158,6 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * dest->stackpointer = dest->localsplus + stacktop; for (int i = 0; i < stacktop; i++) { dest->localsplus[i] = PyStackRef_MakeHeapSafe(src->localsplus[i]); - PyStackRef_CheckValid(dest->localsplus[i]); } #ifdef Py_GIL_DISABLED diff --git a/Python/ceval.c b/Python/ceval.c index 5d565c42062f46..8dc4ab5f3d6919 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1898,9 +1898,9 @@ _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func, if (kwnames) { total_args += PyTuple_GET_SIZE(kwnames); } + _PyStackRef stack_array[8]; _PyStackRef *arguments; if (total_args <= 8) { - _PyStackRef stack_array[8]; arguments = stack_array; } else { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 42bdbd9ca8d0cd..bff9121926b2c4 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -193,8 +193,7 @@ dummy_func(void) { { assert(PyLong_CheckExact(sym_get_const(left))); assert(PyLong_CheckExact(sym_get_const(right))); - PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(left), - (PyLongObject *)sym_get_const(right)); + PyObject *temp = PyNumber_Add(sym_get_const(left), sym_get_const(right)); if (temp == NULL) { goto error; } From 69e993cdbe2bbbef2f67ba9f2fc9995d33c52efb Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 4 Feb 2025 11:17:43 +0000 Subject: [PATCH 21/53] Post merge fix up --- Lib/test/test_generated_cases.py | 3 ++- Python/bytecodes.c | 26 +++++++++++++------------- Python/executor_cases.c.h | 32 ++++++++++++++------------------ Python/generated_cases.c.h | 32 ++++++++++++++------------------ 4 files changed, 43 insertions(+), 50 deletions(-) diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index b21a28a15066f7..35600ce5486642 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1442,7 +1442,8 @@ def test_push_then_error(self): b = 1; if (cond) { stack_pointer[0] = a; - stack_pointer += 1; + stack_pointer[1] = b; + stack_pointer += 2; assert(WITHIN_STACK_BOUNDS()); goto error; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7ad985ad907984..d324b03027246f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -720,6 +720,11 @@ dummy_func( // specializations, but there is no output. // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyUnicode_CheckExact(left_o)); + assert(PyUnicode_CheckExact(right_o)); + int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -728,7 +733,6 @@ dummy_func( next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - PyObject *left_o = PyStackRef_AsPyObjectSteal(left); assert(PyUnicode_CheckExact(left_o)); DEOPT_IF(PyStackRef_AsPyObjectBorrow(*target_local) != left_o); STAT_INC(BINARY_OP, hit); @@ -743,12 +747,12 @@ dummy_func( * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - assert(PyUnicode_CheckExact(right_o)); - PyStackRef_CLEAR(*target_local); - assert(Py_REFCNT(left_o) >= 1); - PyUnicode_Append(&left_o, right_o); - *target_local = PyStackRef_FromPyObjectSteal(left_o); + assert(Py_REFCNT(left_o) >= 2); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + DEAD(left); + PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyUnicode_Append(&temp, right_o); + *target_local = PyStackRef_FromPyObjectSteal(temp); PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); DEAD(right); ERROR_IF(PyStackRef_IsNull(*target_local), error); @@ -886,19 +890,15 @@ dummy_func( #ifdef Py_GIL_DISABLED PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); DEOPT_IF(res_o == NULL); - STAT_INC(BINARY_SUBSCR, hit); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); - DEAD(sub_st); - PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); #else DEOPT_IF(index >= PyList_GET_SIZE(list)); - STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); res = PyStackRef_FromPyObjectNew(res_o); - DECREF_INPUTS(); #endif + STAT_INC(BINARY_SUBSCR, hit); + DECREF_INPUTS(); } inst(BINARY_SUBSCR_STR_INT, (unused/1, str_st, sub_st -- res)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1317fde477a58a..5e43dafa04b458 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -935,6 +935,10 @@ _PyStackRef left; right = stack_pointer[-1]; left = stack_pointer[-2]; + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyUnicode_CheckExact(left_o)); + assert(PyUnicode_CheckExact(right_o)); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -943,7 +947,6 @@ next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - PyObject *left_o = PyStackRef_AsPyObjectSteal(left); assert(PyUnicode_CheckExact(left_o)); if (PyStackRef_AsPyObjectBorrow(*target_local) != left_o) { UOP_STAT_INC(uopcode, miss); @@ -961,12 +964,11 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - assert(PyUnicode_CheckExact(right_o)); - PyStackRef_CLEAR(*target_local); - assert(Py_REFCNT(left_o) >= 1); - PyUnicode_Append(&left_o, right_o); - *target_local = PyStackRef_FromPyObjectSteal(left_o); + assert(Py_REFCNT(left_o) >= 2); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyUnicode_Append(&temp, right_o); + *target_local = PyStackRef_FromPyObjectSteal(temp); PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) { stack_pointer += -2; @@ -1166,27 +1168,21 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - STAT_INC(BINARY_SUBSCR, hit); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(list_st); - stack_pointer = _PyFrame_GetStackPointer(frame); res = PyStackRef_FromPyObjectSteal(res_o); #else if (index >= PyList_GET_SIZE(list)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); res = PyStackRef_FromPyObjectNew(res_o); - #endif - stack_pointer[0] = res; - stack_pointer += 1; + STAT_INC(BINARY_SUBSCR, hit); + PyStackRef_CLOSE(list_st); + PyStackRef_CLOSE(sub_st); + stack_pointer[-2] = res; + stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3c2601d9e4d7fd..c88b28414ff9c4 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -250,6 +250,10 @@ /* Skip 5 cache entries */ // _BINARY_OP_INPLACE_ADD_UNICODE { + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyUnicode_CheckExact(left_o)); + assert(PyUnicode_CheckExact(right_o)); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -258,7 +262,6 @@ next_oparg = CURRENT_OPERAND0(); #endif _PyStackRef *target_local = &GETLOCAL(next_oparg); - PyObject *left_o = PyStackRef_AsPyObjectSteal(left); assert(PyUnicode_CheckExact(left_o)); DEOPT_IF(PyStackRef_AsPyObjectBorrow(*target_local) != left_o, BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -273,12 +276,11 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - assert(PyUnicode_CheckExact(right_o)); - PyStackRef_CLEAR(*target_local); - assert(Py_REFCNT(left_o) >= 1); - PyUnicode_Append(&left_o, right_o); - *target_local = PyStackRef_FromPyObjectSteal(left_o); + assert(Py_REFCNT(left_o) >= 2); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyUnicode_Append(&temp, right_o); + *target_local = PyStackRef_FromPyObjectSteal(temp); PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) { goto pop_2_error; @@ -675,24 +677,18 @@ PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(res_o == NULL, BINARY_SUBSCR); - STAT_INC(BINARY_SUBSCR, hit); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); - stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(list_st); - stack_pointer = _PyFrame_GetStackPointer(frame); res = PyStackRef_FromPyObjectSteal(res_o); #else DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); - STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); res = PyStackRef_FromPyObjectNew(res_o); - #endif - stack_pointer[0] = res; - stack_pointer += 1; + STAT_INC(BINARY_SUBSCR, hit); + PyStackRef_CLOSE(list_st); + PyStackRef_CLOSE(sub_st); + stack_pointer[-2] = res; + stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); DISPATCH(); } From 2efa0bdf6b0c8eaa6f029aee07d4a7d23ae5c2b6 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 17 Feb 2025 11:01:50 +0000 Subject: [PATCH 22/53] Post merge fixups --- Include/refcount.h | 18 +++++++++++++++++- Objects/floatobject.c | 8 ++++---- Python/ceval.c | 1 + 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/Include/refcount.h b/Include/refcount.h index 20b86a88ce7943..e800174c514368 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -398,7 +398,23 @@ static inline void Py_DECREF_MORTAL(const char *filename, int lineno, PyObject * } } #define Py_DECREF_MORTAL(op) Py_DECREF_MORTAL(__FILE__, __LINE__, _PyObject_CAST(op)) -#define Py_DECREF_MORTAL_SPECIALIZED(op, destruct) Py_DECREF_MORTAL(op) + + + +static inline void _Py_DECREF_MORTAL_SPECIALIZED(const char *filename, int lineno, PyObject *op, destructor destruct) +{ + if (op->ob_refcnt <= 0) { + _Py_NegativeRefcount(filename, lineno, op); + } + _Py_DECREF_STAT_INC(); + if (!_Py_IsImmortal(op)) { + _Py_DECREF_DecRefTotal(); + } + if (--op->ob_refcnt == 0) { + destruct(op); + } +} +#define Py_DECREF_MORTAL_SPECIALIZED(op, destruct) _Py_DECREF_MORTAL_SPECIALIZED(__FILE__, __LINE__, op, destruct) static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) { diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 767c324f868df9..70bc8ce3053a68 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -151,18 +151,18 @@ _PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef righ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); if (Py_REFCNT(left_o) == 1) { ((PyFloatObject *)left_o)->ob_fval = value; - PyStackRef_CLOSE(right); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); return left; } else if (Py_REFCNT(right_o) == 1) { ((PyFloatObject *)right_o)->ob_fval = value; - PyStackRef_CLOSE(left); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); return right; } else { PyObject *result = PyFloat_FromDouble(value); - PyStackRef_CLOSE(left); - PyStackRef_CLOSE(right); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); if (result == NULL) { return PyStackRef_NULL; } diff --git a/Python/ceval.c b/Python/ceval.c index cca6904271c9c2..052ff88e3d93bd 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -163,6 +163,7 @@ dump_item(_PyStackRef item) static void dump_stack(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer) { + _PyFrame_SetStackPointer(frame, stack_pointer); _PyStackRef *locals_base = _PyFrame_GetLocalsArray(frame); _PyStackRef *stack_base = _PyFrame_Stackbase(frame); PyObject *exc = PyErr_GetRaisedException(); From 036ffc50487bb1fbb4893351b11125ce0855d8eb Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 17 Feb 2025 11:38:10 +0000 Subject: [PATCH 23/53] Post merge fixup 2 --- Objects/floatobject.c | 4 ++-- Python/bytecodes.c | 1 + Python/executor_cases.c.h | 1 + Python/generated_cases.c.h | 1 + Tools/cases_generator/analyzer.py | 1 + Tools/cases_generator/generators_common.py | 2 ++ 6 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 70bc8ce3053a68..309b8ddcd23839 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -138,8 +138,8 @@ PyFloat_FromDouble(double fval) _PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) { - PyStackRef_CLOSE(left); - PyStackRef_CLOSE(right); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); return PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(value)); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d2ffcb605d680e..6ec2279652e916 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -869,6 +869,7 @@ dummy_func( PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); DEOPT_IF(res_o == NULL); STAT_INC(BINARY_OP, hit); + res = PyStackRef_FromPyObjectSteal(res_o); #else DEOPT_IF(index >= PyList_GET_SIZE(list)); STAT_INC(BINARY_OP, hit); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d7b26fcf0a26f3..4d0799bda8f976 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1242,6 +1242,7 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(BINARY_OP, hit); + res = PyStackRef_FromPyObjectSteal(res_o); #else if (index >= PyList_GET_SIZE(list)) { UOP_STAT_INC(uopcode, miss); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 9f386bd11480f8..e120d693301f66 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -689,6 +689,7 @@ JUMP_TO_PREDICTED(BINARY_OP); } STAT_INC(BINARY_OP, hit); + res = PyStackRef_FromPyObjectSteal(res_o); #else if (index >= PyList_GET_SIZE(list)) { UPDATE_MISS_STATS(BINARY_OP); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index bd2f8161adbfa5..dee866aed3ff6f 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -591,6 +591,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", + "PyStackRef_IsExactly", "PyStackRef_FromPyObjectStealMortal", "PyStackRef_HasCount", "PyStackRef_IsNone", diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index a099fb5426f174..db7793a3f0f776 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -330,6 +330,8 @@ def stackref_close_specialized( self.out.emit(comma) dealloc = next(tkn_iter) self.out.emit(dealloc) + if dealloc.kind != "IDENTIFIER": + raise analysis_error("Expected identifier", dealloc) if name.kind == "IDENTIFIER": escapes = dealloc.text not in NON_ESCAPING_DEALLOCS return self.stackref_kill(name, storage, escapes) From ea43b6db15479ea49f5686fe97ed0e017c67d376 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 17 Feb 2025 12:25:53 +0000 Subject: [PATCH 24/53] Revert changes to _PyLong_Add as not relevant to this PR --- Include/internal/pycore_long.h | 2 +- Include/internal/pycore_stackref.h | 6 ------ Objects/longobject.c | 29 ++--------------------------- Python/bytecodes.c | 5 +++-- Python/executor_cases.c.h | 8 ++++---- Python/generated_cases.c.h | 5 +++-- 6 files changed, 13 insertions(+), 42 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 2692ef3da0fc5d..df0656a7cb8f0c 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -113,7 +113,7 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, int64_t); // Export for 'math' shared extension PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, int64_t); -PyAPI_FUNC(_PyStackRef) _PyLong_Add(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(PyObject*) _PyLong_Add(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(PyObject*) _PyLong_Multiply(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(PyObject*) _PyLong_Subtract(PyLongObject *left, PyLongObject *right); diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index eb5585137414c2..5d35fd065a7da4 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -339,12 +339,6 @@ static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL }) #define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL }) -// #define PyStackRef_IsTrue(ref) ((ref).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL)) -// #define PyStackRef_IsFalse(ref) ((ref).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL)) -// #define - -/* We should be able to guarantee that the tag bits are set for immortal objects */ - #define PyStackRef_IsTrue(REF) ((REF).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL)) #define PyStackRef_IsFalse(REF) ((REF).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL)) #define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) diff --git a/Objects/longobject.c b/Objects/longobject.c index 38e4ba8f8ead1a..370328dcfe8c9a 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -315,27 +315,6 @@ _PyLong_FromSTwoDigits(stwodigits x) return (PyLongObject*)_PyLong_FromLarge(x); } -/* Create a new int object from a C word-sized int, return a stackref */ -static inline _PyStackRef -_PyLongRef_FromSTwoDigitsRef(stwodigits x) -{ - if (IS_SMALL_INT(x)) { - return PyStackRef_FromPyObjectImmortal(get_small_int((sdigit)x)); - } - assert(x != 0); - PyObject *res; - if (is_medium_int(x)) { - res = _PyLong_FromMedium((sdigit)x); - } - else { - res = _PyLong_FromLarge(x); - } - if (res == NULL) { - return PyStackRef_NULL; - } - return PyStackRef_FromPyObjectStealMortal(res); -} - /* If a freshly-allocated int is already shared, it must be a small integer, so negating it must go to PyLong_FromLong */ Py_LOCAL_INLINE(void) @@ -3897,14 +3876,10 @@ long_add(PyLongObject *a, PyLongObject *b) return z; } -_PyStackRef +PyObject * _PyLong_Add(PyLongObject *a, PyLongObject *b) { - if (_PyLong_BothAreCompact(a, b)) { - stwodigits z = medium_value(a) + medium_value(b); - return _PyLongRef_FromSTwoDigitsRef(z); - } - return PyStackRef_FromPyObjectSteal((PyObject*)long_add(a, b)); + return (PyObject*)long_add(a, b); } static PyObject * diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6ec2279652e916..b43c9362dd14cc 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -601,11 +601,12 @@ dummy_func( assert(PyLong_CheckExact(right_o)); STAT_INC(BINARY_OP, hit); - res = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(res_o == NULL, error); + res = PyStackRef_FromPyObjectSteal(res_o); } pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4d0799bda8f976..fa923e03c101b5 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -801,16 +801,16 @@ assert(PyLong_CheckExact(right_o)); STAT_INC(BINARY_OP, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (PyStackRef_IsNull(res)) { - stack_pointer[-2] = res; - stack_pointer += -1; + if (res_o == NULL) { + stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); JUMP_TO_ERROR(); } + res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e120d693301f66..7eb3fb3c575623 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -174,13 +174,14 @@ assert(PyLong_CheckExact(right_o)); STAT_INC(BINARY_OP, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); - if (PyStackRef_IsNull(res)) { + if (res_o == NULL) { JUMP_TO_LABEL(pop_2_error); } + res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; stack_pointer += -1; From db7e049bee2bfb31ea5f175277e61082f3cc38c8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 17 Feb 2025 12:43:41 +0000 Subject: [PATCH 25/53] Fix up a couple of comments --- Python/bytecodes.c | 1 - Python/frame.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b43c9362dd14cc..765099173a0991 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3538,7 +3538,6 @@ dummy_func( arguments, total_args, NULL, frame ); ERROR_IF(new_frame == NULL, error); - // Manipulate stack directly since we leave using DISPATCH_INLINED(). // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. SYNC_SP(); diff --git a/Python/frame.c b/Python/frame.c index 23af9d69c7f688..00cadeff53df57 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -111,7 +111,7 @@ _PyFrame_ClearExceptCode(_PyInterpreterFrame *frame) PyFrameObject *f = frame->frame_obj; frame->frame_obj = NULL; if (Py_REFCNT(f) > 1) { - // Take ownership takes the reference to the executable, + // take_ownership takes the reference to the executable, // so we need to incref it. PyStackRef_AsPyObjectNew(frame->f_executable); take_ownership(f, frame); From 26e7868adb72e8ea6ab8e93fec055906a0c08c72 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 17 Feb 2025 12:58:47 +0000 Subject: [PATCH 26/53] Add news --- .../2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst new file mode 100644 index 00000000000000..1523a69a651f4e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst @@ -0,0 +1,5 @@ +Use tagged references (``_PyStackRef``) for the default build as well as for +the free-threading build. This has a small negative performance impact +short-term but will enable larger speedups in the future and signficantly +reduce maintenance costs by allowing a single implementations of tagged +references in the future. From d3da41caa9849ff542ad8a59b39a5c8b412b7eee Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 17 Feb 2025 13:14:34 +0000 Subject: [PATCH 27/53] Clarifications --- Include/internal/pycore_stackref.h | 30 ++++++++++++------------------ Python/generated_cases.c.h | 2 -- Tools/cases_generator/analyzer.py | 1 - 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 5d35fd065a7da4..79e07311093c3f 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -371,14 +371,14 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { /* Does this ref have an embedded refcount */ static inline int -PyStackRef_HasCount(_PyStackRef ref) +PyStackRef_IsUncountedMortal(_PyStackRef ref) { - return ref.bits & Py_TAG_REFCNT; + return (ref.bits & Py_TAG_BITS) == 0; } /* Does this ref have an embedded refcount and refer to a mortal object (NULL is not mortal) */ static inline bool -PyStackRef_HasCountAndMortal(_PyStackRef ref) +PyStackRef_IsCountedMortal(_PyStackRef ref) { return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; } @@ -399,11 +399,11 @@ PyStackRef_AsPyObjectBorrow(_PyStackRef ref) static inline PyObject * PyStackRef_AsPyObjectSteal(_PyStackRef ref) { - if (PyStackRef_HasCount(ref)) { - return Py_NewRef(BITS_TO_PTR_MASKED(ref)); + if (PyStackRef_IsUncountedMortal(ref)) { + return BITS_TO_PTR(ref); } else { - return BITS_TO_PTR(ref); + return Py_NewRef(BITS_TO_PTR_MASKED(ref)); } } @@ -455,17 +455,11 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_IMMORTAL}; } -static inline _PyStackRef -PyStackRef_WithCount(_PyStackRef ref) -{ - return (_PyStackRef){ .bits = ref.bits | Py_TAG_REFCNT }; -} - static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) { assert(!PyStackRef_IsNull(ref)); - if (!PyStackRef_HasCount(ref)) { + if (PyStackRef_IsUncountedMortal(ref)) { Py_INCREF_MORTAL(BITS_TO_PTR(ref)); } return ref; @@ -474,13 +468,13 @@ PyStackRef_DUP(_PyStackRef ref) static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return !PyStackRef_HasCountAndMortal(ref); + return !PyStackRef_IsCountedMortal(ref); } static inline _PyStackRef PyStackRef_MakeHeapSafe(_PyStackRef ref) { - if (!PyStackRef_HasCountAndMortal(ref)) { + if (!PyStackRef_IsCountedMortal(ref)) { return ref; } PyObject *obj = BITS_TO_PTR_MASKED(ref); @@ -494,7 +488,7 @@ static inline void PyStackRef_CLOSE(_PyStackRef ref) { assert(!PyStackRef_IsNull(ref)); - if (!PyStackRef_HasCount(ref)) { + if (PyStackRef_IsUncountedMortal(ref)) { Py_DECREF_MORTAL(BITS_TO_PTR(ref)); } } @@ -503,7 +497,7 @@ static inline void PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct) { assert(!PyStackRef_IsNull(ref)); - if (!PyStackRef_HasCount(ref)) { + if (PyStackRef_IsUncountedMortal(ref)) { Py_DECREF_MORTAL_SPECIALIZED(BITS_TO_PTR(ref), destruct); } } @@ -512,7 +506,7 @@ static inline void PyStackRef_XCLOSE(_PyStackRef ref) { assert(ref.bits != 0); - if (!PyStackRef_HasCount(ref)) { + if (PyStackRef_IsUncountedMortal(ref)) { assert(!PyStackRef_IsNull(ref)); Py_DECREF_MORTAL(BITS_TO_PTR(ref)); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7eb3fb3c575623..050eb99baff6be 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1344,7 +1344,6 @@ assert(WITHIN_STACK_BOUNDS()); JUMP_TO_LABEL(error); } - // Manipulate stack directly since we leave using DISPATCH_INLINED(). // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. stack_pointer += -2 - oparg; @@ -6115,7 +6114,6 @@ assert(WITHIN_STACK_BOUNDS()); JUMP_TO_LABEL(error); } - // Manipulate stack directly since we leave using DISPATCH_INLINED(). // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. stack_pointer += -2 - oparg; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index dee866aed3ff6f..e17f4ef524d773 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -593,7 +593,6 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_FromPyObjectSteal", "PyStackRef_IsExactly", "PyStackRef_FromPyObjectStealMortal", - "PyStackRef_HasCount", "PyStackRef_IsNone", "PyStackRef_Is", "PyStackRef_IsHeapSafe", From 9711f62b938d2675571dd38312815236f3b113b9 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 17 Feb 2025 15:57:35 +0000 Subject: [PATCH 28/53] Fixup comments --- Include/internal/pycore_stackref.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 79e07311093c3f..6010a669688f9a 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -369,14 +369,14 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { #endif -/* Does this ref have an embedded refcount */ +/* Does this ref not have an embedded refcount and refer to a mortal object? */ static inline int PyStackRef_IsUncountedMortal(_PyStackRef ref) { return (ref.bits & Py_TAG_BITS) == 0; } -/* Does this ref have an embedded refcount and refer to a mortal object (NULL is not mortal) */ +/* Does this ref have an embedded refcount and refer to a mortal object (NULL is not mortal)? */ static inline bool PyStackRef_IsCountedMortal(_PyStackRef ref) { From d61e0bf0c4da6554aa6664f03de1e2349b05df9b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 17 Feb 2025 15:57:57 +0000 Subject: [PATCH 29/53] Tidy up frame ownership changes --- Python/frame.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/frame.c b/Python/frame.c index 00cadeff53df57..afb3e768491540 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -52,6 +52,9 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT); _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)f->_f_frame_data; _PyFrame_Copy(frame, new_frame); + // _PyFrame_Copy takes the reference to the executable, + // so we need to restore it. + frame->f_executable = PyStackRef_DUP(new_frame->f_executable); f->f_frame = new_frame; new_frame->owner = FRAME_OWNED_BY_FRAME_OBJECT; if (_PyFrame_IsIncomplete(new_frame)) { @@ -111,9 +114,6 @@ _PyFrame_ClearExceptCode(_PyInterpreterFrame *frame) PyFrameObject *f = frame->frame_obj; frame->frame_obj = NULL; if (Py_REFCNT(f) > 1) { - // take_ownership takes the reference to the executable, - // so we need to incref it. - PyStackRef_AsPyObjectNew(frame->f_executable); take_ownership(f, frame); Py_DECREF(f); return; From 2bc9ace4430287c62e0ac499c6e3dd263b71f429 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 18 Feb 2025 11:26:51 +0000 Subject: [PATCH 30/53] Get Py_STACKREF_DEBUG working again --- Include/internal/pycore_stackref.h | 69 ++++++++++++++++++++++++------ Python/ceval.c | 2 +- Python/stackrefs.c | 9 ++++ 3 files changed, 66 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 6010a669688f9a..e8ec971b860410 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -5,7 +5,7 @@ extern "C" { #endif // Define this to get precise tracking of stackrefs. -// #define Py_STACKREF_DEBUG 1 +#define Py_STACKREF_DEBUG 1 #ifndef Py_BUILD_CORE # error "this header requires Py_BUILD_CORE define" @@ -144,6 +144,16 @@ PyStackRef_CLOSE(_PyStackRef ref) Py_DECREF(obj); } +static inline void +PyStackRef_XCLOSE(_PyStackRef ref) +{ + if (PyStackRef_IsNull(ref)) { + return; + } + PyObject *obj = _Py_stackref_close(ref); + Py_DECREF(obj); +} + static inline _PyStackRef _PyStackRef_DUP(_PyStackRef ref, const char *filename, int linenumber) { @@ -153,7 +163,47 @@ _PyStackRef_DUP(_PyStackRef ref, const char *filename, int linenumber) } #define PyStackRef_DUP(REF) _PyStackRef_DUP(REF, __FILE__, __LINE__) -#define PyStackRef_CLOSE_SPECIALIZED(stackref, dealloc) PyStackRef_CLOSE(stackref) +extern void PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct); + +static inline _PyStackRef +PyStackRef_MakeHeapSafe(_PyStackRef ref) +{ + return ref; +} + +#define PyStackRef_CLEAR(REF) \ + do { \ + _PyStackRef *_tmp_op_ptr = &(REF); \ + _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ + *_tmp_op_ptr = PyStackRef_NULL; \ + PyStackRef_XCLOSE(_tmp_old_op); \ + } while (0) + + +static inline _PyStackRef +_PyStackRef_FromPyObjectStealMortal(PyObject *obj, const char *filename, int linenumber) +{ + assert(!_Py_IsImmortal(obj)); + return _Py_stackref_create(obj, filename, linenumber); +} +#define PyStackRef_FromPyObjectStealMortal(obj) _PyStackRef_FromPyObjectStealMortal(_PyObject_CAST(obj), __FILE__, __LINE__) + +static inline bool +PyStackRef_IsMortal(_PyStackRef ref) +{ + PyObject *obj = _Py_stackref_get_object(ref); + if (obj == NULL) { + return false; + } + return _Py_IsImmortal(obj); +} + +static inline int +PyStackRef_IsHeapSafe(_PyStackRef ref) +{ + return 1; +} + #else @@ -512,13 +562,6 @@ PyStackRef_XCLOSE(_PyStackRef ref) } } -#define PyStackRef_CLEAR(REF) \ - do { \ - _PyStackRef *_tmp_op_ptr = &(REF); \ - _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ - *_tmp_op_ptr = PyStackRef_NULL; \ - PyStackRef_XCLOSE(_tmp_old_op); \ - } while (0) #endif // Py_GIL_DISABLED @@ -526,13 +569,13 @@ PyStackRef_XCLOSE(_PyStackRef ref) #define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_BITS)) == ((b).bits & (~Py_TAG_BITS))) -// Converts a PyStackRef back to a PyObject *, converting the -// stackref to a new reference. -#define PyStackRef_AsPyObjectNew(stackref) Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref)) +#endif // !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) #define PyStackRef_TYPE(stackref) Py_TYPE(PyStackRef_AsPyObjectBorrow(stackref)) -#endif // !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) +// Converts a PyStackRef back to a PyObject *, converting the +// stackref to a new reference. +#define PyStackRef_AsPyObjectNew(stackref) Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref)) // StackRef type checks diff --git a/Python/ceval.c b/Python/ceval.c index 052ff88e3d93bd..f55165bcd29a8c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -157,7 +157,7 @@ dump_item(_PyStackRef item) PyErr_Clear(); } // Don't call __repr__(), it might recurse into the interpreter. - printf("<%s at %p>", Py_TYPE(obj)->tp_name, (void *)(item.bits)); + printf("<%s at %p>", Py_TYPE(obj)->tp_name, (void *)obj); } static void diff --git a/Python/stackrefs.c b/Python/stackrefs.c index 9bb46897685570..fe62f921187b94 100644 --- a/Python/stackrefs.c +++ b/Python/stackrefs.c @@ -1,6 +1,7 @@ #include "Python.h" +#include "pycore_object.h" #include "pycore_stackref.h" #if !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) @@ -149,8 +150,16 @@ _Py_stackref_report_leaks(PyInterpreterState *interp) int leak = 0; _Py_hashtable_foreach(interp->stackref_debug_table, report_leak, &leak); if (leak) { + fflush(stdout); Py_FatalError("Stackrefs leaked."); } } +void +PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct) +{ + PyObject *obj = _Py_stackref_close(ref); + _Py_DECREF_SPECIALIZED(obj, destruct); +} + #endif From 12b961b6f6c9dfa05cc1bb70597b1e72c65e6e68 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 18 Feb 2025 11:40:52 +0000 Subject: [PATCH 31/53] Turn off Py_STACKREF_DEBUG --- Include/internal/pycore_stackref.h | 2 +- Python/optimizer_bytecodes.c | 3 ++- Tools/cases_generator/generators_common.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index e8ec971b860410..418b154b9d3a7d 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -5,7 +5,7 @@ extern "C" { #endif // Define this to get precise tracking of stackrefs. -#define Py_STACKREF_DEBUG 1 +// #define Py_STACKREF_DEBUG 1 #ifndef Py_BUILD_CORE # error "this header requires Py_BUILD_CORE define" diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e0f2f9cdafd25b..41eb59c931aaa7 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -228,7 +228,8 @@ dummy_func(void) { { assert(PyLong_CheckExact(sym_get_const(left))); assert(PyLong_CheckExact(sym_get_const(right))); - PyObject *temp = PyNumber_Add(sym_get_const(left), sym_get_const(right)); + PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(left), + (PyLongObject *)sym_get_const(right)); if (temp == NULL) { goto error; } diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index db7793a3f0f776..6b2ef51b29f4eb 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -329,9 +329,9 @@ def stackref_close_specialized( raise analysis_error("Expected comma", comma) self.out.emit(comma) dealloc = next(tkn_iter) - self.out.emit(dealloc) if dealloc.kind != "IDENTIFIER": raise analysis_error("Expected identifier", dealloc) + self.out.emit(dealloc) if name.kind == "IDENTIFIER": escapes = dealloc.text not in NON_ESCAPING_DEALLOCS return self.stackref_kill(name, storage, escapes) From 00cd0652c3f3494dc39522ac4db64c78542c7208 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 18 Feb 2025 11:54:42 +0000 Subject: [PATCH 32/53] Reduce diff --- Include/internal/pycore_stackref.h | 9 ++++++++- Python/bytecodes.c | 1 + Python/executor_cases.c.h | 10 ++++------ Python/generated_cases.c.h | 10 ++++------ Python/optimizer_cases.c.h | 3 ++- 5 files changed, 19 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 418b154b9d3a7d..1bd96411dbb263 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -179,7 +179,6 @@ PyStackRef_MakeHeapSafe(_PyStackRef ref) PyStackRef_XCLOSE(_tmp_old_op); \ } while (0) - static inline _PyStackRef _PyStackRef_FromPyObjectStealMortal(PyObject *obj, const char *filename, int linenumber) { @@ -562,6 +561,14 @@ PyStackRef_XCLOSE(_PyStackRef ref) } } +#define PyStackRef_CLEAR(REF) \ + do { \ + _PyStackRef *_tmp_op_ptr = &(REF); \ + _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ + *_tmp_op_ptr = PyStackRef_NULL; \ + PyStackRef_XCLOSE(_tmp_old_op); \ + } while (0) + #endif // Py_GIL_DISABLED diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 765099173a0991..a6a861a1c9cedf 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -916,6 +916,7 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); res = PyStackRef_FromPyObjectNew(res_o); DECREF_INPUTS(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fa923e03c101b5..29ca6f3bb764e1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1344,18 +1344,16 @@ STAT_INC(BINARY_OP, hit); PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); res = PyStackRef_FromPyObjectNew(res_o); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); _PyStackRef tmp = tuple_st; tuple_st = res; - stack_pointer[-2] = tuple_st; + stack_pointer[-1] = tuple_st; PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(sub_st); - stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[-1] = res; break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 050eb99baff6be..9cb9767743d484 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -824,18 +824,16 @@ STAT_INC(BINARY_OP, hit); PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); res = PyStackRef_FromPyObjectNew(res_o); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); _PyStackRef tmp = tuple_st; tuple_st = res; - stack_pointer[-2] = tuple_st; + stack_pointer[-1] = tuple_st; PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(sub_st); - stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[-1] = res; DISPATCH(); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 6f74f1b3b665a7..51d0fa63e64bc5 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -310,7 +310,8 @@ assert(PyLong_CheckExact(sym_get_const(right))); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); - PyObject *temp = PyNumber_Add(sym_get_const(left), sym_get_const(right)); + PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(left), + (PyLongObject *)sym_get_const(right)); if (temp == NULL) { goto error; } From 0bea1f1819dfb4ee5ded3a7a4105fe3ae97622b3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 18 Feb 2025 11:58:33 +0000 Subject: [PATCH 33/53] Reduce diff --- Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 3 ++- Python/executor_cases.c.h | 8 ++++---- Python/generated_cases.c.h | 6 ++---- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 8c2dc369fd53d5..a3c84d9c4d95d6 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -232,7 +232,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_STR_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_AND_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_EXIT_INIT_CHECK] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_CLASS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a6a861a1c9cedf..c6cd4a9632c1b6 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3903,9 +3903,10 @@ dummy_func( DEAD(self); _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); + SYNC_SP(); if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); - ERROR_IF(true, error); + ERROR_NO_POP(); } init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 29ca6f3bb764e1..5915ae9dffce25 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5323,12 +5323,12 @@ _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); if (temp == NULL) { _PyFrame_SetStackPointer(frame, stack_pointer); _PyEval_FrameClearAndPop(tstate, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); JUMP_TO_ERROR(); } init_frame = temp; @@ -5337,8 +5337,8 @@ * We don't check recursion depth here, * as it will be checked after start_frame */ tstate->py_recursion_remaining--; - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; + stack_pointer[0].bits = (uintptr_t)init_frame; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 9cb9767743d484..d5a6305c70081b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1542,12 +1542,12 @@ _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); if (temp == NULL) { _PyFrame_SetStackPointer(frame, stack_pointer); _PyEval_FrameClearAndPop(tstate, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); JUMP_TO_LABEL(error); } init_frame = temp; @@ -1564,8 +1564,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); From 0bae25d5161e9436efef8347eeb3cb1149bea7f3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 18 Feb 2025 12:05:34 +0000 Subject: [PATCH 34/53] Reduce diff --- Include/internal/pycore_opcode_metadata.h | 4 ++-- Python/bytecodes.c | 7 +++++-- Python/generated_cases.c.h | 22 ++++++++++------------ 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index b11f874a7ff357..ddac7cc431e7b4 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -2024,7 +2024,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BUILD_STRING] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [CACHE] = { true, INSTR_FMT_IX, 0 }, - [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -2094,7 +2094,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_FROM] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG }, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c6cd4a9632c1b6..39986d01504e7f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3538,10 +3538,13 @@ dummy_func( tstate, callable[0], locals, arguments, total_args, NULL, frame ); - ERROR_IF(new_frame == NULL, error); + // Manipulate stack directly since we leave using DISPATCH_INLINED(). + SYNC_SP(); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - SYNC_SP(); + if (new_frame == NULL) { + ERROR_NO_POP(); + } frame->return_offset = INSTRUCTION_SIZE; DISPATCH_INLINED(new_frame); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index d5a6305c70081b..fed1f557fd3ed3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1337,15 +1337,14 @@ arguments, total_args, NULL, frame ); stack_pointer = _PyFrame_GetStackPointer(frame); + // Manipulate stack directly since we leave using DISPATCH_INLINED(). + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. if (new_frame == NULL) { - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); JUMP_TO_LABEL(error); } - // The frame has stolen all the arguments from the stack, - // so there is no need to clean them up. - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); frame->return_offset = 4 ; DISPATCH_INLINED(new_frame); } @@ -6105,15 +6104,14 @@ arguments, total_args, NULL, frame ); stack_pointer = _PyFrame_GetStackPointer(frame); + // Manipulate stack directly since we leave using DISPATCH_INLINED(). + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. if (new_frame == NULL) { - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); JUMP_TO_LABEL(error); } - // The frame has stolen all the arguments from the stack, - // so there is no need to clean them up. - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); frame->return_offset = 4 ; DISPATCH_INLINED(new_frame); } From a527aaf4e459146c1514940ccbeda9634a92fbf2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 18 Feb 2025 15:17:53 +0000 Subject: [PATCH 35/53] Fix immortality test --- Lib/test/test_builtin.py | 9 +++------ Python/sysmodule.c | 3 --- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 913d007a126d72..314480a10b4f1d 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2694,18 +2694,15 @@ def __del__(self): class ImmortalTests(unittest.TestCase): if sys.maxsize < (1 << 32): - if support.Py_GIL_DISABLED: - IMMORTAL_REFCOUNT = 5 << 28 - else: - IMMORTAL_REFCOUNT = 7 << 28 + IMMORTAL_REFCOUNT_MINIMUM = 1 << 30 else: - IMMORTAL_REFCOUNT = 3 << 30 + IMMORTAL_REFCOUNT_MINIMUM = 1 << 31 IMMORTALS = (None, True, False, Ellipsis, NotImplemented, *range(-5, 257)) def assert_immortal(self, immortal): with self.subTest(immortal): - self.assertEqual(sys.getrefcount(immortal), self.IMMORTAL_REFCOUNT) + self.assertGreater(sys.getrefcount(immortal), self.IMMORTAL_REFCOUNT_MINIMUM) def test_immortals(self): for immortal in self.IMMORTALS: diff --git a/Python/sysmodule.c b/Python/sysmodule.c index a61c35d53d6736..d5cb448eb618e8 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1954,9 +1954,6 @@ static Py_ssize_t sys_getrefcount_impl(PyObject *module, PyObject *object) /*[clinic end generated code: output=5fd477f2264b85b2 input=bf474efd50a21535]*/ { - if (_Py_IsImmortal(object)) { - return _Py_IMMORTAL_INITIAL_REFCNT; - } return Py_REFCNT(object); } From 56509f41f2ca256d7111d8d179be2c2c7a5cfa6d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 19 Feb 2025 14:57:11 +0000 Subject: [PATCH 36/53] Add some asserts --- Include/refcount.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Include/refcount.h b/Include/refcount.h index e800174c514368..7e8a10941bb425 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -390,6 +390,7 @@ static inline void Py_DECREF_MORTAL(const char *filename, int lineno, PyObject * _Py_NegativeRefcount(filename, lineno, op); } _Py_DECREF_STAT_INC(); + assert(!_Py_IsStaticImmortal(op)); if (!_Py_IsImmortal(op)) { _Py_DECREF_DecRefTotal(); } @@ -407,6 +408,7 @@ static inline void _Py_DECREF_MORTAL_SPECIALIZED(const char *filename, int linen _Py_NegativeRefcount(filename, lineno, op); } _Py_DECREF_STAT_INC(); + assert(!_Py_IsStaticImmortal(op)); if (!_Py_IsImmortal(op)) { _Py_DECREF_DecRefTotal(); } @@ -442,6 +444,7 @@ static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) #else static inline void Py_DECREF_MORTAL(PyObject *op) { + assert(!_Py_IsStaticImmortal(op)); _Py_DECREF_STAT_INC(); if (--op->ob_refcnt == 0) { _Py_Dealloc(op); @@ -451,6 +454,7 @@ static inline void Py_DECREF_MORTAL(PyObject *op) static inline void Py_DECREF_MORTAL_SPECIALIZED(PyObject *op, destructor destruct) { + assert(!_Py_IsStaticImmortal(op)); _Py_DECREF_STAT_INC(); if (--op->ob_refcnt == 0) { destruct(op); From d270553d1a760dad3bc2beba2da8bb8c320bcc1f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 20 Feb 2025 11:10:02 +0000 Subject: [PATCH 37/53] Fix refcount accounting --- Include/internal/pycore_stackref.h | 2 +- Include/refcount.h | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 1bd96411dbb263..b3198cac816138 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -402,7 +402,7 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { case 0: case Py_TAG_REFCNT: /* Can be immortal if object was made immortal after reference came into existence */ - assert(obj != NULL && obj != Py_True && obj != Py_False && obj != Py_None); + assert(!_Py_IsStaticImmortal(obj)); break; case Py_TAG_IMMORTAL: assert(obj == NULL || _Py_IsImmortal(obj)); diff --git a/Include/refcount.h b/Include/refcount.h index 7e8a10941bb425..8cb92482f21950 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -391,9 +391,7 @@ static inline void Py_DECREF_MORTAL(const char *filename, int lineno, PyObject * } _Py_DECREF_STAT_INC(); assert(!_Py_IsStaticImmortal(op)); - if (!_Py_IsImmortal(op)) { - _Py_DECREF_DecRefTotal(); - } + _Py_DECREF_DecRefTotal(); if (--op->ob_refcnt == 0) { _Py_Dealloc(op); } @@ -409,9 +407,7 @@ static inline void _Py_DECREF_MORTAL_SPECIALIZED(const char *filename, int linen } _Py_DECREF_STAT_INC(); assert(!_Py_IsStaticImmortal(op)); - if (!_Py_IsImmortal(op)) { - _Py_DECREF_DecRefTotal(); - } + _Py_DECREF_DecRefTotal(); if (--op->ob_refcnt == 0) { destruct(op); } From 98e132b5287204b7c1cf3a3f33a3a24a8886a0c2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 20 Feb 2025 16:22:10 +0000 Subject: [PATCH 38/53] Use Sam's suggestion to avoid branching --- Include/internal/pycore_stackref.h | 7 ++++++- Include/object.h | 10 ++++++---- Include/refcount.h | 7 ++++--- Objects/object.c | 3 +++ 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index b3198cac816138..b83951382024f0 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -375,7 +375,7 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) // With GIL #define Py_TAG_BITS 3 -#define Py_TAG_IMMORTAL 3 +#define Py_TAG_IMMORTAL _Py_IMMORTAL_FLAGS #define Py_TAG_REFCNT 1 #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) @@ -460,7 +460,12 @@ static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); + unsigned int tag; +#if SIZEOF_VOID_P > 4 + tag = obj->ob_flags & Py_TAG_BITS; +#else unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_IMMORTAL : 0; +#endif _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); PyStackRef_CheckValid(ref); return ref; diff --git a/Include/object.h b/Include/object.h index da7b3668c033f4..9172ba8adfdf29 100644 --- a/Include/object.h +++ b/Include/object.h @@ -124,11 +124,13 @@ struct _object { PY_INT64_T ob_refcnt_full; /* This field is needed for efficient initialization with Clang on ARM */ struct { # if PY_BIG_ENDIAN - PY_UINT32_T ob_flags; - PY_UINT32_T ob_refcnt; + uint16_t ob_flags; + uint16_t ob_overflow; + uint32_t ob_refcnt; # else - PY_UINT32_T ob_refcnt; - PY_UINT32_T ob_flags; + uint32_t ob_refcnt; + uint16_t ob_overflow; + uint16_t ob_flags; # endif }; #else diff --git a/Include/refcount.h b/Include/refcount.h index 8cb92482f21950..51aeec6a2488b3 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -20,7 +20,8 @@ cleanup during runtime finalization. */ /* Leave the low bits for refcount overflow for old stable ABI code */ -#define _Py_STATICALLY_ALLOCATED_FLAG (1 << 7) +#define _Py_STATICALLY_ALLOCATED_FLAG 4 +#define _Py_IMMORTAL_FLAGS 3 #if SIZEOF_VOID_P > 4 /* @@ -43,7 +44,8 @@ be done by checking the bit sign flag in the lower 32 bits. */ #define _Py_IMMORTAL_INITIAL_REFCNT (3UL << 30) -#define _Py_STATIC_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(_Py_IMMORTAL_INITIAL_REFCNT | (((Py_ssize_t)_Py_STATICALLY_ALLOCATED_FLAG) << 32))) +#define _Py_STATIC_FLAG_BITS ((Py_ssize_t)(_Py_STATICALLY_ALLOCATED_FLAG | _Py_IMMORTAL_FLAGS)) +#define _Py_STATIC_IMMORTAL_INITIAL_REFCNT (((Py_ssize_t)_Py_IMMORTAL_INITIAL_REFCNT) | (_Py_STATIC_FLAG_BITS << 48)) #else /* @@ -114,7 +116,6 @@ PyAPI_FUNC(Py_ssize_t) Py_REFCNT(PyObject *ob); #endif #endif - static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) { #if defined(Py_GIL_DISABLED) diff --git a/Objects/object.c b/Objects/object.c index d342549b6ffecc..b3d9d00b3ac175 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2524,6 +2524,9 @@ _Py_SetImmortalUntracked(PyObject *op) op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; op->ob_ref_shared = 0; _Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED); +#elif SIZEOF_VOID_P > 4 + op->ob_flags = _Py_IMMORTAL_FLAGS; + op->ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT; #else op->ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT; #endif From b1edb9658cb4e935d4389799f92b327a05d4501b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 25 Feb 2025 16:40:02 +0000 Subject: [PATCH 39/53] Post merge fixup --- Include/internal/pycore_stackref.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index b83951382024f0..60ba593090d2d7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -460,9 +460,8 @@ static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); - unsigned int tag; #if SIZEOF_VOID_P > 4 - tag = obj->ob_flags & Py_TAG_BITS; + unsigned int tag = obj->ob_flags & Py_TAG_BITS; #else unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_IMMORTAL : 0; #endif From 4b0ef082c3f711ae580dc7e639ecdbed70053ab4 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 25 Feb 2025 17:51:03 +0000 Subject: [PATCH 40/53] Revert changes to nogil flags --- Include/internal/pycore_object.h | 2 +- Include/object.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 0852146b430c5a..56712e33155397 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -73,7 +73,7 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); #define _PyObject_HEAD_INIT(type) \ { \ .ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL, \ - .ob_flags = _Py_STATIC_FLAG_BITS, \ + .ob_flags = _Py_STATICALLY_ALLOCATED_FLAG, \ .ob_gc_bits = _PyGC_BITS_DEFERRED, \ .ob_type = (type) \ } diff --git a/Include/object.h b/Include/object.h index ae2e59e9318065..a4c11264fc6727 100644 --- a/Include/object.h +++ b/Include/object.h @@ -71,7 +71,7 @@ whose size is determined when the object is allocated. #define PyObject_HEAD_INIT(type) \ { \ 0, \ - _Py_STATIC_FLAG_BITS, \ + _Py_STATIC_IMMORTAL_INITIAL_REFCNT, \ { 0 }, \ 0, \ _Py_IMMORTAL_REFCNT_LOCAL, \ From c501175f7f099aa23e9ffc856f1e1583846be1fb Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 27 Feb 2025 12:04:17 +0000 Subject: [PATCH 41/53] Revert erroneous change to FT refcount initialization --- Include/object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/object.h b/Include/object.h index a4c11264fc6727..9172ba8adfdf29 100644 --- a/Include/object.h +++ b/Include/object.h @@ -71,7 +71,7 @@ whose size is determined when the object is allocated. #define PyObject_HEAD_INIT(type) \ { \ 0, \ - _Py_STATIC_IMMORTAL_INITIAL_REFCNT, \ + _Py_STATICALLY_ALLOCATED_FLAG, \ { 0 }, \ 0, \ _Py_IMMORTAL_REFCNT_LOCAL, \ From 4702d585eeb558d71d10fc8ddcfb0e0d02c0a72c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 25 Feb 2025 10:10:57 +0000 Subject: [PATCH 42/53] Use macros on Windows --- Include/internal/pycore_stackref.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 60ba593090d2d7..1a8b2f327a714d 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -370,6 +370,7 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) } \ } while (0) + #else // Py_GIL_DISABLED // With GIL @@ -418,6 +419,12 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { #endif +#ifdef _WIN32 +#define PyStackRef_IsUncountedMortal(REF) (((REF).bits & Py_TAG_BITS) == 0) +#define PyStackRef_IsCountedMortal(REF) (((REF).bits & Py_TAG_BITS) == Py_TAG_REFCNT) +#define PyStackRef_IsMortal(REF) (((REF).bits & Py_TAG_BITS) != Py_TAG_IMMORTAL) +#define PyStackRef_AsPyObjectBorrow BITS_TO_PTR_MASKED +#else /* Does this ref not have an embedded refcount and refer to a mortal object? */ static inline int PyStackRef_IsUncountedMortal(_PyStackRef ref) @@ -444,6 +451,7 @@ PyStackRef_AsPyObjectBorrow(_PyStackRef ref) { return BITS_TO_PTR_MASKED(ref); } +#endif static inline PyObject * PyStackRef_AsPyObjectSteal(_PyStackRef ref) @@ -508,6 +516,10 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_IMMORTAL}; } +#ifdef _WIN32 +#define PyStackRef_DUP(REF) \ + (PyStackRef_IsUncountedMortal(REF) ? Py_INCREF_MORTAL(BITS_TO_PTR(ref)) : (REF)) +#else static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) { @@ -517,6 +529,7 @@ PyStackRef_DUP(_PyStackRef ref) } return ref; } +#endif static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) @@ -537,6 +550,12 @@ PyStackRef_MakeHeapSafe(_PyStackRef ref) return ref; } +#ifdef _WIN32 +#define PyStackRef_CLOSE(REF) \ +do { \ + if (PyStackRef_IsUncountedMortal(REF)) Py_DECREF_MORTAL(BITS_TO_PTR(ref)); \ +} while (0) +#else static inline void PyStackRef_CLOSE(_PyStackRef ref) { @@ -545,6 +564,7 @@ PyStackRef_CLOSE(_PyStackRef ref) Py_DECREF_MORTAL(BITS_TO_PTR(ref)); } } +#endif static inline void PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct) @@ -555,6 +575,9 @@ PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct) } } +#ifdef _WIN32 +#define PyStackRef_XCLOSE PyStackRef_CLOSE +#else static inline void PyStackRef_XCLOSE(_PyStackRef ref) { @@ -564,6 +587,7 @@ PyStackRef_XCLOSE(_PyStackRef ref) Py_DECREF_MORTAL(BITS_TO_PTR(ref)); } } +#endif #define PyStackRef_CLEAR(REF) \ do { \ From 3e64d74c336cb4b8d95b02825f1db6d2e4157615 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 26 Feb 2025 09:15:27 +0000 Subject: [PATCH 43/53] Fix typo --- Include/internal/pycore_stackref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 1a8b2f327a714d..a99e1205d3dfee 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -518,7 +518,7 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) #ifdef _WIN32 #define PyStackRef_DUP(REF) \ - (PyStackRef_IsUncountedMortal(REF) ? Py_INCREF_MORTAL(BITS_TO_PTR(ref)) : (REF)) + (PyStackRef_IsUncountedMortal(REF) ? Py_INCREF_MORTAL(BITS_TO_PTR(REF)) : (REF)) #else static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) From 0b7a5449c8acdc9a4d10670068d5c81d078c31fd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 27 Feb 2025 12:20:16 +0000 Subject: [PATCH 44/53] Fix macro --- Include/internal/pycore_stackref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index a99e1205d3dfee..627b24d8dd3d84 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -553,7 +553,7 @@ PyStackRef_MakeHeapSafe(_PyStackRef ref) #ifdef _WIN32 #define PyStackRef_CLOSE(REF) \ do { \ - if (PyStackRef_IsUncountedMortal(REF)) Py_DECREF_MORTAL(BITS_TO_PTR(ref)); \ + if (PyStackRef_IsUncountedMortal(REF)) Py_DECREF_MORTAL(BITS_TO_PTR(REF)); \ } while (0) #else static inline void From 17b97c699679da9d1db7cd8b80d5e3a65050496d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 27 Feb 2025 12:31:30 +0000 Subject: [PATCH 45/53] Make sure PyStackRef_DUP returns a reference --- Include/internal/pycore_stackref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 627b24d8dd3d84..bc06a085eb5434 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -518,7 +518,7 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) #ifdef _WIN32 #define PyStackRef_DUP(REF) \ - (PyStackRef_IsUncountedMortal(REF) ? Py_INCREF_MORTAL(BITS_TO_PTR(REF)) : (REF)) + (PyStackRef_IsUncountedMortal(REF) ? (Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) #else static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) From bf863cd465c78d41cdd22438dc7ae12090266d87 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 27 Feb 2025 17:50:14 +0000 Subject: [PATCH 46/53] Avoid branch for known mortal objects --- Include/internal/pycore_stackref.h | 12 ++++++++++++ Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- Tools/cases_generator/analyzer.py | 1 + 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index bc06a085eb5434..03ac31bff3a572 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -370,6 +370,7 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) } \ } while (0) +#define PyStackRef_FromPyObjectNewMortal PyStackRef_FromPyObjectNew #else // Py_GIL_DISABLED @@ -508,6 +509,17 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) } #define PyStackRef_FromPyObjectNew(obj) _PyStackRef_FromPyObjectNew(_PyObject_CAST(obj)) +static inline _PyStackRef +_PyStackRef_FromPyObjectNewMortal(PyObject *obj) +{ + assert(obj != NULL); + Py_INCREF_MORTAL(obj); + _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; + PyStackRef_CheckValid(ref); + return ref; +} +#define PyStackRef_FromPyObjectNewMortal(obj) _PyStackRef_FromPyObjectNewMortal(_PyObject_CAST(obj)) + /* Create a new reference from an object with an embedded reference count */ static inline _PyStackRef PyStackRef_FromPyObjectImmortal(PyObject *obj) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 108355732c5967..747623f317c5d4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -311,7 +311,7 @@ dummy_func( inst(LOAD_CONST_MORTAL, (-- value)) { PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNew(obj); + value = PyStackRef_FromPyObjectNewMortal(obj); } inst(LOAD_CONST_IMMORTAL, (-- value)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 16c83bf83aa586..b67dad1ee5187d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -218,7 +218,7 @@ _PyStackRef value; oparg = CURRENT_OPARG(); PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNew(obj); + value = PyStackRef_FromPyObjectNewMortal(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index daa65f3d1c63b1..7aa5b02441cfba 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8703,7 +8703,7 @@ static_assert(0 == 0, "incorrect cache size"); _PyStackRef value; PyObject *obj = GETITEM(FRAME_CO_CONSTS, oparg); - value = PyStackRef_FromPyObjectNew(obj); + value = PyStackRef_FromPyObjectNewMortal(obj); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 234faf4f44ab4b..890c9a03153557 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -583,6 +583,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PySlice_New", "PyStackRef_AsPyObjectBorrow", "PyStackRef_AsPyObjectNew", + "PyStackRef_FromPyObjectNewMortal", "PyStackRef_AsPyObjectSteal", "PyStackRef_CLEAR", "PyStackRef_CLOSE_SPECIALIZED", From 1e66e5074753aa564684020540f34e9dd18dba49 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 28 Feb 2025 10:46:56 +0000 Subject: [PATCH 47/53] Don't evaluate macro argument twice --- Include/internal/pycore_stackref.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 03ac31bff3a572..0130e7c05e75eb 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -528,6 +528,7 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_IMMORTAL}; } +/* WARNING: This macro evaluates its argument twice */ #ifdef _WIN32 #define PyStackRef_DUP(REF) \ (PyStackRef_IsUncountedMortal(REF) ? (Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) @@ -565,7 +566,8 @@ PyStackRef_MakeHeapSafe(_PyStackRef ref) #ifdef _WIN32 #define PyStackRef_CLOSE(REF) \ do { \ - if (PyStackRef_IsUncountedMortal(REF)) Py_DECREF_MORTAL(BITS_TO_PTR(REF)); \ + _PyStackRef _temp = (REF); + if (PyStackRef_IsUncountedMortal(_temp)) Py_DECREF_MORTAL(BITS_TO_PTR(_temp)); \ } while (0) #else static inline void From 3e929d7a125ac5b16e36d14a5b0fa7f043d1853f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 28 Feb 2025 11:11:51 +0000 Subject: [PATCH 48/53] Add missing backslash --- Include/internal/pycore_stackref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 0130e7c05e75eb..f591a6c6b7c515 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -566,7 +566,7 @@ PyStackRef_MakeHeapSafe(_PyStackRef ref) #ifdef _WIN32 #define PyStackRef_CLOSE(REF) \ do { \ - _PyStackRef _temp = (REF); + _PyStackRef _temp = (REF); \ if (PyStackRef_IsUncountedMortal(_temp)) Py_DECREF_MORTAL(BITS_TO_PTR(_temp)); \ } while (0) #else From e2f1387bf2c22c632f757ec5254346a995ffef95 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 3 Mar 2025 15:16:22 +0000 Subject: [PATCH 49/53] Update Misc/NEWS.d/next/Core_and_Builtins/2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- .../2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst index 1523a69a651f4e..bf7e6eaba911f6 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-17-12-48-00.gh-issue-127705.Qad2hx.rst @@ -1,5 +1,5 @@ Use tagged references (``_PyStackRef``) for the default build as well as for the free-threading build. This has a small negative performance impact short-term but will enable larger speedups in the future and signficantly -reduce maintenance costs by allowing a single implementations of tagged +reduce maintenance costs by allowing a single implementation of tagged references in the future. From 36f6034e39be7ccae4edc0a3d5f7e76fa93ae6eb Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 5 Mar 2025 11:26:07 +0000 Subject: [PATCH 50/53] Use consistent return types for PyStackRef_IsHeapSafe --- Include/internal/pycore_stackref.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index f591a6c6b7c515..4b73197d892cb6 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -197,10 +197,10 @@ PyStackRef_IsMortal(_PyStackRef ref) return _Py_IsImmortal(obj); } -static inline int +static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return 1; + return true; } @@ -333,10 +333,10 @@ PyStackRef_DUP(_PyStackRef stackref) return stackref; } -static inline int +static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return 1; + return true; } static inline _PyStackRef From 0f9c86595472e7285e504c0fe08a4564b5199680 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 7 Mar 2025 10:12:24 +0000 Subject: [PATCH 51/53] Drop 'immortal' bit. Only use 'counted' bit. --- Include/internal/pycore_stackref.h | 37 ++++++------------------------ Include/refcount.h | 3 +-- Python/gc.c | 4 ++-- 3 files changed, 10 insertions(+), 34 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 4b73197d892cb6..d6324c03dd45c6 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -187,16 +187,6 @@ _PyStackRef_FromPyObjectStealMortal(PyObject *obj, const char *filename, int lin } #define PyStackRef_FromPyObjectStealMortal(obj) _PyStackRef_FromPyObjectStealMortal(_PyObject_CAST(obj), __FILE__, __LINE__) -static inline bool -PyStackRef_IsMortal(_PyStackRef ref) -{ - PyObject *obj = _Py_stackref_get_object(ref); - if (obj == NULL) { - return false; - } - return _Py_IsImmortal(obj); -} - static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { @@ -377,8 +367,12 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) // With GIL #define Py_TAG_BITS 3 -#define Py_TAG_IMMORTAL _Py_IMMORTAL_FLAGS #define Py_TAG_REFCNT 1 +#if _Py_IMMORTAL_FLAGS != Py_TAG_REFCNT +# error "_Py_IMMORTAL_FLAGS != Py_TAG_REFCNT" +#endif +#define Py_TAG_IMMORTAL _Py_IMMORTAL_FLAGS + #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) @@ -402,7 +396,6 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { PyObject *obj = BITS_TO_PTR_MASKED(ref); switch (tag) { case 0: - case Py_TAG_REFCNT: /* Can be immortal if object was made immortal after reference came into existence */ assert(!_Py_IsStaticImmortal(obj)); break; @@ -422,8 +415,6 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { #ifdef _WIN32 #define PyStackRef_IsUncountedMortal(REF) (((REF).bits & Py_TAG_BITS) == 0) -#define PyStackRef_IsCountedMortal(REF) (((REF).bits & Py_TAG_BITS) == Py_TAG_REFCNT) -#define PyStackRef_IsMortal(REF) (((REF).bits & Py_TAG_BITS) != Py_TAG_IMMORTAL) #define PyStackRef_AsPyObjectBorrow BITS_TO_PTR_MASKED #else /* Does this ref not have an embedded refcount and refer to a mortal object? */ @@ -433,20 +424,6 @@ PyStackRef_IsUncountedMortal(_PyStackRef ref) return (ref.bits & Py_TAG_BITS) == 0; } -/* Does this ref have an embedded refcount and refer to a mortal object (NULL is not mortal)? */ -static inline bool -PyStackRef_IsCountedMortal(_PyStackRef ref) -{ - return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; -} - -/* Does this ref refer to a mortal object (NULL is not mortal) */ -static inline bool -PyStackRef_IsMortal(_PyStackRef ref) -{ - return (ref.bits & Py_TAG_BITS) != Py_TAG_IMMORTAL; -} - static inline PyObject * PyStackRef_AsPyObjectBorrow(_PyStackRef ref) { @@ -547,13 +524,13 @@ PyStackRef_DUP(_PyStackRef ref) static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return !PyStackRef_IsCountedMortal(ref); + return (ref.bits & Py_TAG_BITS) == 0 || ref.bits == PyStackRef_NULL_BITS || _Py_IsImmortal(BITS_TO_PTR_MASKED(ref)); } static inline _PyStackRef PyStackRef_MakeHeapSafe(_PyStackRef ref) { - if (!PyStackRef_IsCountedMortal(ref)) { + if (PyStackRef_IsHeapSafe(ref)) { return ref; } PyObject *obj = BITS_TO_PTR_MASKED(ref); diff --git a/Include/refcount.h b/Include/refcount.h index c0cb86dfcd59b2..88918409d934f6 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -19,9 +19,8 @@ immortal. The latter should be the only instances that require cleanup during runtime finalization. */ -/* Leave the low bits for refcount overflow for old stable ABI code */ #define _Py_STATICALLY_ALLOCATED_FLAG 4 -#define _Py_IMMORTAL_FLAGS 3 +#define _Py_IMMORTAL_FLAGS 1 #if SIZEOF_VOID_P > 4 /* diff --git a/Python/gc.c b/Python/gc.c index fa113e90e7ebb7..f2a88657e8d8d6 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1488,10 +1488,10 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b objects_marked += move_to_reachable(func, &reachable, visited_space); while (sp > locals) { sp--; - if (!PyStackRef_IsMortal(*sp)) { + PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); + if (op == NULL || _Py_IsImmortal(op)) { continue; } - PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && From 37f1e184fc563e41718bd31313eb39713b763ea4 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 7 Mar 2025 14:57:41 +0000 Subject: [PATCH 52/53] Remove use of Py_TAG_IMMORTAL --- Include/internal/pycore_stackref.h | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index d6324c03dd45c6..aac900e2a2b50a 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -368,25 +368,24 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) #define Py_TAG_BITS 3 #define Py_TAG_REFCNT 1 -#if _Py_IMMORTAL_FLAGS != Py_TAG_REFCNT +#if Py_TAG_REFCNT != Py_TAG_REFCNT # error "_Py_IMMORTAL_FLAGS != Py_TAG_REFCNT" #endif -#define Py_TAG_IMMORTAL _Py_IMMORTAL_FLAGS #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) -#define PyStackRef_NULL_BITS Py_TAG_IMMORTAL +#define PyStackRef_NULL_BITS Py_TAG_REFCNT static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_IsNull(ref) ((ref).bits == PyStackRef_NULL_BITS) -#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL }) -#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL }) -#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) -#define PyStackRef_IsTrue(REF) ((REF).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL)) -#define PyStackRef_IsFalse(REF) ((REF).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL)) -#define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) +#define PyStackRef_IsTrue(REF) ((REF).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT)) +#define PyStackRef_IsFalse(REF) ((REF).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT)) +#define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT)) #ifdef Py_DEBUG @@ -399,7 +398,7 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { /* Can be immortal if object was made immortal after reference came into existence */ assert(!_Py_IsStaticImmortal(obj)); break; - case Py_TAG_IMMORTAL: + case Py_TAG_REFCNT: assert(obj == NULL || _Py_IsImmortal(obj)); break; default: @@ -449,7 +448,7 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) #if SIZEOF_VOID_P > 4 unsigned int tag = obj->ob_flags & Py_TAG_BITS; #else - unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_IMMORTAL : 0; + unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_REFCNT : 0; #endif _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); PyStackRef_CheckValid(ref); @@ -477,7 +476,7 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) { assert(obj != NULL); if (_Py_IsImmortal(obj)) { - return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_IMMORTAL}; + return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; } Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; @@ -502,7 +501,7 @@ static inline _PyStackRef PyStackRef_FromPyObjectImmortal(PyObject *obj) { assert(_Py_IsImmortal(obj)); - return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_IMMORTAL}; + return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT}; } /* WARNING: This macro evaluates its argument twice */ From d74417988c47e7677fcb08a6c57bcf0445a2857e Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 10 Mar 2025 12:04:22 +0000 Subject: [PATCH 53/53] Rename functions and add comment --- Include/internal/pycore_stackref.h | 31 ++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index aac900e2a2b50a..5f00ad4d07f4f6 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -366,9 +366,12 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) // With GIL -#define Py_TAG_BITS 3 +/* References to immortal objects always have their tag bit set to Py_TAG_REFCNT + * as they can (must) have their reclamation deferred */ + +#define Py_TAG_BITS 1 #define Py_TAG_REFCNT 1 -#if Py_TAG_REFCNT != Py_TAG_REFCNT +#if _Py_IMMORTAL_FLAGS != Py_TAG_REFCNT # error "_Py_IMMORTAL_FLAGS != Py_TAG_REFCNT" #endif @@ -413,12 +416,12 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { #endif #ifdef _WIN32 -#define PyStackRef_IsUncountedMortal(REF) (((REF).bits & Py_TAG_BITS) == 0) +#define PyStackRef_RefcountOnObject(REF) (((REF).bits & Py_TAG_BITS) == 0) #define PyStackRef_AsPyObjectBorrow BITS_TO_PTR_MASKED #else -/* Does this ref not have an embedded refcount and refer to a mortal object? */ +/* Does this ref not have an embedded refcount and thus not refer to a declared immmortal object? */ static inline int -PyStackRef_IsUncountedMortal(_PyStackRef ref) +PyStackRef_RefcountOnObject(_PyStackRef ref) { return (ref.bits & Py_TAG_BITS) == 0; } @@ -433,7 +436,7 @@ PyStackRef_AsPyObjectBorrow(_PyStackRef ref) static inline PyObject * PyStackRef_AsPyObjectSteal(_PyStackRef ref) { - if (PyStackRef_IsUncountedMortal(ref)) { + if (PyStackRef_RefcountOnObject(ref)) { return BITS_TO_PTR(ref); } else { @@ -504,16 +507,16 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT}; } -/* WARNING: This macro evaluates its argument twice */ +/* WARNING: This macro evaluates its argument more than once */ #ifdef _WIN32 #define PyStackRef_DUP(REF) \ - (PyStackRef_IsUncountedMortal(REF) ? (Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) + (PyStackRef_RefcountOnObject(REF) ? (Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) #else static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) { assert(!PyStackRef_IsNull(ref)); - if (PyStackRef_IsUncountedMortal(ref)) { + if (PyStackRef_RefcountOnObject(ref)) { Py_INCREF_MORTAL(BITS_TO_PTR(ref)); } return ref; @@ -543,14 +546,14 @@ PyStackRef_MakeHeapSafe(_PyStackRef ref) #define PyStackRef_CLOSE(REF) \ do { \ _PyStackRef _temp = (REF); \ - if (PyStackRef_IsUncountedMortal(_temp)) Py_DECREF_MORTAL(BITS_TO_PTR(_temp)); \ + if (PyStackRef_RefcountOnObject(_temp)) Py_DECREF_MORTAL(BITS_TO_PTR(_temp)); \ } while (0) #else static inline void PyStackRef_CLOSE(_PyStackRef ref) { assert(!PyStackRef_IsNull(ref)); - if (PyStackRef_IsUncountedMortal(ref)) { + if (PyStackRef_RefcountOnObject(ref)) { Py_DECREF_MORTAL(BITS_TO_PTR(ref)); } } @@ -560,7 +563,7 @@ static inline void PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct) { assert(!PyStackRef_IsNull(ref)); - if (PyStackRef_IsUncountedMortal(ref)) { + if (PyStackRef_RefcountOnObject(ref)) { Py_DECREF_MORTAL_SPECIALIZED(BITS_TO_PTR(ref), destruct); } } @@ -572,7 +575,7 @@ static inline void PyStackRef_XCLOSE(_PyStackRef ref) { assert(ref.bits != 0); - if (PyStackRef_IsUncountedMortal(ref)) { + if (PyStackRef_RefcountOnObject(ref)) { assert(!PyStackRef_IsNull(ref)); Py_DECREF_MORTAL(BITS_TO_PTR(ref)); } @@ -592,7 +595,7 @@ PyStackRef_XCLOSE(_PyStackRef ref) // Note: this is a macro because MSVC (Windows) has trouble inlining it. -#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_BITS)) == ((b).bits & (~Py_TAG_BITS))) +#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_REFCNT)) == ((b).bits & (~Py_TAG_REFCNT))) #endif // !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG)