From 29e2a03ae362d9be59d8bca4c8d4621b69420746 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 11 Apr 2024 11:51:29 +0100 Subject: [PATCH 1/5] Streamline the trashcan mechanism --- Include/cpython/object.h | 39 +++++++------ Include/cpython/pystate.h | 7 +-- Objects/object.c | 112 +++++--------------------------------- Python/pystate.c | 2 + 4 files changed, 37 insertions(+), 123 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index b64db1ba9a6dd2..19acf8b69511ce 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -461,30 +461,29 @@ passed as second argument to Py_TRASHCAN_BEGIN(). /* Python 3.9 private API, invoked by the macros below. */ PyAPI_FUNC(int) _PyTrash_begin(PyThreadState *tstate, PyObject *op); PyAPI_FUNC(void) _PyTrash_end(PyThreadState *tstate); + +PyAPI_FUNC(void) PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op); +PyAPI_FUNC(void) PyTrash_thread_destroy_chain(PyThreadState *tstate); + + /* Python 3.10 private API, invoked by the Py_TRASHCAN_BEGIN(). */ -PyAPI_FUNC(int) _PyTrash_cond(PyObject *op, destructor dealloc); -#define Py_TRASHCAN_BEGIN_CONDITION(op, cond) \ - do { \ - PyThreadState *_tstate = NULL; \ - /* If "cond" is false, then _tstate remains NULL and the deallocator \ - * is run normally without involving the trashcan */ \ - if (cond) { \ - _tstate = PyThreadState_GetUnchecked(); \ - if (_PyTrash_begin(_tstate, _PyObject_CAST(op))) { \ - break; \ - } \ - } - /* The body of the deallocator is here. */ +#define Py_TRASHCAN_BEGIN(op, dealloc) \ +do { \ + PyThreadState *tstate = PyThreadState_Get(); \ + if (tstate->c_recursion_remaining <= 0 && Py_TYPE(op)->tp_dealloc == (destructor)dealloc) { \ + PyTrash_thread_deposit_object(tstate, (PyObject *)op); \ + break; \ + } \ + tstate->c_recursion_remaining--; + /* The body of the deallocator is here. */ #define Py_TRASHCAN_END \ - if (_tstate) { \ - _PyTrash_end(_tstate); \ - } \ - } while (0); + tstate->c_recursion_remaining++; \ + if (tstate->delete_later && tstate->c_recursion_remaining > 50) { \ + PyTrash_thread_destroy_chain(tstate); \ + } \ +} while (0); -#define Py_TRASHCAN_BEGIN(op, dealloc) \ - Py_TRASHCAN_BEGIN_CONDITION((op), \ - _PyTrash_cond(_PyObject_CAST(op), (destructor)(dealloc))) PyAPI_FUNC(void *) PyObject_GetItemData(PyObject *obj); diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 7fb6b176392173..55f39600c0eaa5 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -56,11 +56,6 @@ typedef struct _stack_chunk { PyObject * data[1]; /* Variable sized */ } _PyStackChunk; -struct _py_trashcan { - int delete_nesting; - PyObject *delete_later; -}; - struct _ts { /* See Python/ceval.c for comments explaining most fields */ @@ -152,7 +147,7 @@ struct _ts { */ unsigned long native_thread_id; - struct _py_trashcan trash; + PyObject *delete_later; /* Tagged pointer to top-most critical section, or zero if there is no * active critical section. Critical sections are only used in diff --git a/Objects/object.c b/Objects/object.c index c8e6f8fc1a2b40..e40258cb371dba 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2692,27 +2692,27 @@ Py_ReprLeave(PyObject *obj) * call-stack depth gets large. op must be a currently untracked gc'ed * object, with refcount 0. Py_DECREF must already have been called on it. */ -static void -_PyTrash_thread_deposit_object(struct _py_trashcan *trash, PyObject *op) +void +PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) { _PyObject_ASSERT(op, _PyObject_IS_GC(op)); _PyObject_ASSERT(op, !_PyObject_GC_IS_TRACKED(op)); _PyObject_ASSERT(op, Py_REFCNT(op) == 0); #ifdef Py_GIL_DISABLED _PyObject_ASSERT(op, op->ob_tid == 0); - op->ob_tid = (uintptr_t)trash->delete_later; + op->ob_tid = (uintptr_t)tstate->delete_later; #else - _PyGCHead_SET_PREV(_Py_AS_GC(op), (PyGC_Head*)trash->delete_later); + _PyGCHead_SET_PREV(_Py_AS_GC(op), (PyGC_Head*)tstate->delete_later); #endif - trash->delete_later = op; + tstate->delete_later = op; } /* Deallocate all the objects in the gcstate->trash_delete_later list. * Called when the call-stack unwinds again. */ -static void -_PyTrash_thread_destroy_chain(struct _py_trashcan *trash) +void +PyTrash_thread_destroy_chain(PyThreadState *tstate) { - /* We need to increase trash_delete_nesting here, otherwise, + /* We need to increase c_recursion_remaining here, otherwise, _PyTrash_thread_destroy_chain will be called recursively and then possibly crash. An example that may crash without increase: @@ -2723,17 +2723,17 @@ _PyTrash_thread_destroy_chain(struct _py_trashcan *trash) tups = [(tup,) for tup in tups] del tups */ - assert(trash->delete_nesting == 0); - ++trash->delete_nesting; - while (trash->delete_later) { - PyObject *op = trash->delete_later; + assert(tstate->c_recursion_remaining > 50); + tstate->c_recursion_remaining--; + while (tstate->delete_later) { + PyObject *op = tstate->delete_later; destructor dealloc = Py_TYPE(op)->tp_dealloc; #ifdef Py_GIL_DISABLED - trash->delete_later = (PyObject*) op->ob_tid; + tstate->delete_later = (PyObject*) op->ob_tid; op->ob_tid = 0; #else - trash->delete_later = (PyObject*) _PyGCHead_PREV(_Py_AS_GC(op)); + tstate->delete_later = (PyObject*) _PyGCHead_PREV(_Py_AS_GC(op)); #endif /* Call the deallocator directly. This used to try to @@ -2744,92 +2744,10 @@ _PyTrash_thread_destroy_chain(struct _py_trashcan *trash) */ _PyObject_ASSERT(op, Py_REFCNT(op) == 0); (*dealloc)(op); - assert(trash->delete_nesting == 1); - } - --trash->delete_nesting; -} - - -static struct _py_trashcan * -_PyTrash_get_state(PyThreadState *tstate) -{ - if (tstate != NULL) { - return &tstate->trash; - } - // The current thread must be finalizing. - // Fall back to using thread-local state. - // XXX Use thread-local variable syntax? - assert(PyThread_tss_is_created(&_PyRuntime.trashTSSkey)); - struct _py_trashcan *trash = - (struct _py_trashcan *)PyThread_tss_get(&_PyRuntime.trashTSSkey); - if (trash == NULL) { - trash = PyMem_RawMalloc(sizeof(struct _py_trashcan)); - if (trash == NULL) { - Py_FatalError("Out of memory"); - } - PyThread_tss_set(&_PyRuntime.trashTSSkey, (void *)trash); - } - return trash; -} - -static void -_PyTrash_clear_state(PyThreadState *tstate) -{ - if (tstate != NULL) { - assert(tstate->trash.delete_later == NULL); - return; - } - if (PyThread_tss_is_created(&_PyRuntime.trashTSSkey)) { - struct _py_trashcan *trash = - (struct _py_trashcan *)PyThread_tss_get(&_PyRuntime.trashTSSkey); - if (trash != NULL) { - PyThread_tss_set(&_PyRuntime.trashTSSkey, (void *)NULL); - PyMem_RawFree(trash); - } } + tstate->c_recursion_remaining++; } - -int -_PyTrash_begin(PyThreadState *tstate, PyObject *op) -{ - // XXX Make sure the GIL is held. - struct _py_trashcan *trash = _PyTrash_get_state(tstate); - if (trash->delete_nesting >= _PyTrash_UNWIND_LEVEL) { - /* Store the object (to be deallocated later) and jump past - * Py_TRASHCAN_END, skipping the body of the deallocator */ - _PyTrash_thread_deposit_object(trash, op); - return 1; - } - ++trash->delete_nesting; - return 0; -} - - -void -_PyTrash_end(PyThreadState *tstate) -{ - // XXX Make sure the GIL is held. - struct _py_trashcan *trash = _PyTrash_get_state(tstate); - --trash->delete_nesting; - if (trash->delete_nesting <= 0) { - if (trash->delete_later != NULL) { - _PyTrash_thread_destroy_chain(trash); - } - _PyTrash_clear_state(tstate); - } -} - - -/* bpo-40170: It's only be used in Py_TRASHCAN_BEGIN macro to hide - implementation details. */ -int -_PyTrash_cond(PyObject *op, destructor dealloc) -{ - return Py_TYPE(op)->tp_dealloc == dealloc; -} - - void _Py_NO_RETURN _PyObject_AssertFailed(PyObject *obj, const char *expr, const char *msg, const char *file, int line, const char *function) diff --git a/Python/pystate.c b/Python/pystate.c index b0fb210bb5f5f6..2e9f07409b14f2 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1478,6 +1478,8 @@ init_threadstate(_PyThreadStateImpl *_tstate, tstate->what_event = -1; tstate->previous_executor = NULL; + tstate->delete_later = NULL; + llist_init(&_tstate->mem_free_queue); if (interp->stoptheworld.requested || _PyRuntime.stoptheworld.requested) { From 78ceefb7d84609469d1198708218c2b770f269b3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 12 Apr 2024 11:40:55 +0100 Subject: [PATCH 2/5] Set maxDiff to None to see failure reason --- Lib/test/test_ordered_dict.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py index 4571b23dfe7c1a..999ab66f6ee721 100644 --- a/Lib/test/test_ordered_dict.py +++ b/Lib/test/test_ordered_dict.py @@ -31,6 +31,8 @@ def replaced_module(name, replacement): class OrderedDictTests: + maxDiff = None + def test_init(self): OrderedDict = self.OrderedDict with self.assertRaises(TypeError): From 9a61914e6c0693babc6be8afdf861c7121923652 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 12 Apr 2024 14:16:02 +0100 Subject: [PATCH 3/5] Invoke trashcan earlier to avoid hitting C recursion limit during object finalization. --- Include/cpython/object.h | 4 ++-- Lib/test/test_ordered_dict.py | 2 -- Objects/object.c | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 19acf8b69511ce..b2404544f1ba98 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -471,7 +471,7 @@ PyAPI_FUNC(void) PyTrash_thread_destroy_chain(PyThreadState *tstate); #define Py_TRASHCAN_BEGIN(op, dealloc) \ do { \ PyThreadState *tstate = PyThreadState_Get(); \ - if (tstate->c_recursion_remaining <= 0 && Py_TYPE(op)->tp_dealloc == (destructor)dealloc) { \ + if (tstate->c_recursion_remaining <= 50 && Py_TYPE(op)->tp_dealloc == (destructor)dealloc) { \ PyTrash_thread_deposit_object(tstate, (PyObject *)op); \ break; \ } \ @@ -479,7 +479,7 @@ do { \ /* The body of the deallocator is here. */ #define Py_TRASHCAN_END \ tstate->c_recursion_remaining++; \ - if (tstate->delete_later && tstate->c_recursion_remaining > 50) { \ + if (tstate->delete_later && tstate->c_recursion_remaining > 100) { \ PyTrash_thread_destroy_chain(tstate); \ } \ } while (0); diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py index 999ab66f6ee721..4571b23dfe7c1a 100644 --- a/Lib/test/test_ordered_dict.py +++ b/Lib/test/test_ordered_dict.py @@ -31,8 +31,6 @@ def replaced_module(name, replacement): class OrderedDictTests: - maxDiff = None - def test_init(self): OrderedDict = self.OrderedDict with self.assertRaises(TypeError): diff --git a/Objects/object.c b/Objects/object.c index e40258cb371dba..a32ce31f7fc321 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2723,7 +2723,7 @@ PyTrash_thread_destroy_chain(PyThreadState *tstate) tups = [(tup,) for tup in tups] del tups */ - assert(tstate->c_recursion_remaining > 50); + assert(tstate->c_recursion_remaining > 100); tstate->c_recursion_remaining--; while (tstate->delete_later) { PyObject *op = tstate->delete_later; From 215df68cb514898e2bf7cb2946569574501886ee Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 16 Apr 2024 11:44:19 +0100 Subject: [PATCH 4/5] Address review comments --- Include/cpython/object.h | 18 +++++++++++------- Objects/object.c | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index b2404544f1ba98..20a8a5752db67e 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -462,30 +462,34 @@ passed as second argument to Py_TRASHCAN_BEGIN(). PyAPI_FUNC(int) _PyTrash_begin(PyThreadState *tstate, PyObject *op); PyAPI_FUNC(void) _PyTrash_end(PyThreadState *tstate); -PyAPI_FUNC(void) PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op); -PyAPI_FUNC(void) PyTrash_thread_destroy_chain(PyThreadState *tstate); +PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op); +PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(PyThreadState *tstate); /* Python 3.10 private API, invoked by the Py_TRASHCAN_BEGIN(). */ +/* To avoid raising recursion errors during dealloc trigger trashcan before we reach + * recursion limit. To avoid trashing, we don't attempt to empty the trashcan until + * we have headroom above the trigger limit */ +#define Py_TRASHCAN_HEADROOM 50 + #define Py_TRASHCAN_BEGIN(op, dealloc) \ do { \ PyThreadState *tstate = PyThreadState_Get(); \ - if (tstate->c_recursion_remaining <= 50 && Py_TYPE(op)->tp_dealloc == (destructor)dealloc) { \ - PyTrash_thread_deposit_object(tstate, (PyObject *)op); \ + if (tstate->c_recursion_remaining <= Py_TRASHCAN_HEADROOM && Py_TYPE(op)->tp_dealloc == (destructor)dealloc) { \ + _PyTrash_thread_deposit_object(tstate, (PyObject *)op); \ break; \ } \ tstate->c_recursion_remaining--; /* The body of the deallocator is here. */ #define Py_TRASHCAN_END \ tstate->c_recursion_remaining++; \ - if (tstate->delete_later && tstate->c_recursion_remaining > 100) { \ - PyTrash_thread_destroy_chain(tstate); \ + if (tstate->delete_later && tstate->c_recursion_remaining > (Py_TRASHCAN_HEADROOM*2)) { \ + _PyTrash_thread_destroy_chain(tstate); \ } \ } while (0); - PyAPI_FUNC(void *) PyObject_GetItemData(PyObject *obj); PyAPI_FUNC(int) PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg); diff --git a/Objects/object.c b/Objects/object.c index a32ce31f7fc321..0cd6373c35d9a7 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2693,7 +2693,7 @@ Py_ReprLeave(PyObject *obj) * object, with refcount 0. Py_DECREF must already have been called on it. */ void -PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) +_PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) { _PyObject_ASSERT(op, _PyObject_IS_GC(op)); _PyObject_ASSERT(op, !_PyObject_GC_IS_TRACKED(op)); @@ -2710,7 +2710,7 @@ PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) /* Deallocate all the objects in the gcstate->trash_delete_later list. * Called when the call-stack unwinds again. */ void -PyTrash_thread_destroy_chain(PyThreadState *tstate) +_PyTrash_thread_destroy_chain(PyThreadState *tstate) { /* We need to increase c_recursion_remaining here, otherwise, _PyTrash_thread_destroy_chain will be called recursively From 43630d83adb7f1a54859a4be72d5c69f1d56855d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 16 Apr 2024 12:39:44 +0100 Subject: [PATCH 5/5] Tidy up macros --- Include/cpython/object.h | 4 ++-- Include/internal/pycore_object.h | 1 - Objects/object.c | 4 +--- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 20a8a5752db67e..2797051281f3b4 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -448,8 +448,8 @@ without deallocating anything (and so unbounded call-stack depth is avoided). When the call stack finishes unwinding again, code generated by the END macro notices this, and calls another routine to deallocate all the objects that may have been added to the list of deferred deallocations. In effect, a -chain of N deallocations is broken into (N-1)/(_PyTrash_UNWIND_LEVEL-1) pieces, -with the call stack never exceeding a depth of _PyTrash_UNWIND_LEVEL. +chain of N deallocations is broken into (N-1)/(Py_TRASHCAN_HEADROOM-1) pieces, +with the call stack never exceeding a depth of Py_TRASHCAN_HEADROOM. Since the tp_dealloc of a subclass typically calls the tp_dealloc of the base class, we need to ensure that the trashcan is only triggered on the tp_dealloc diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 9aa2e5bf918a7b..4a40c649351fcd 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -600,7 +600,6 @@ _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(PyObject *op) return (PyWeakReference **)((char *)op + offset); } - // Fast inlined version of PyObject_IS_GC() static inline int _PyObject_IS_GC(PyObject *obj) diff --git a/Objects/object.c b/Objects/object.c index 0cd6373c35d9a7..e9a02498122908 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2686,8 +2686,6 @@ Py_ReprLeave(PyObject *obj) /* Trashcan support. */ -#define _PyTrash_UNWIND_LEVEL 50 - /* Add op to the gcstate->trash_delete_later list. Called when the current * call-stack depth gets large. op must be a currently untracked gc'ed * object, with refcount 0. Py_DECREF must already have been called on it. @@ -2723,7 +2721,7 @@ _PyTrash_thread_destroy_chain(PyThreadState *tstate) tups = [(tup,) for tup in tups] del tups */ - assert(tstate->c_recursion_remaining > 100); + assert(tstate->c_recursion_remaining > Py_TRASHCAN_HEADROOM); tstate->c_recursion_remaining--; while (tstate->delete_later) { PyObject *op = tstate->delete_later;