diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 1e1f65851fd811..76770a9d797036 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -920,6 +920,35 @@ always available. It is not guaranteed to exist in all implementations of Python. +.. function:: getobjects(limit[, type]) + + This function only exists if CPython was built using the + specialized configure option :option:`--with-trace-refs`. + It is intended only for debugging garbage-collection issues. + + Return a list of up to *limit* dynamically allocated Python objects. + If *type* is given, only objects of that exact type (not subtypes) + are included. + + Objects from the list are not safe to use. + Specifically, the result will include objects from all interpreters that + share their object allocator state (that is, ones created with + :c:member:`PyInterpreterConfig.use_main_obmalloc` set to 1 + or using :c:func:`Py_NewInterpreter`, and the + :ref:`main interpreter `). + Mixing objects from different interpreters may lead to crashes + or other unexpected behavior. + + .. impl-detail:: + + This function should be used for specialized purposes only. + It is not guaranteed to exist in all implementations of Python. + + .. versionchanged:: next + + The result may include objects from other interpreters. + + .. function:: getprofile() .. index:: diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 052289bd827366..713b2afc91d199 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -708,7 +708,7 @@ Debug options Effects: * Define the ``Py_TRACE_REFS`` macro. - * Add :func:`!sys.getobjects` function. + * Add :func:`sys.getobjects` function. * Add :envvar:`PYTHONDUMPREFS` environment variable. The :envvar:`PYTHONDUMPREFS` environment variable can be used to dump diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index a2897097aaba57..253d97c7e9577b 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -2706,3 +2706,14 @@ Regression Test Changes option. If used, it specifies a module that should be imported early in the lifecycle of the interpreter, before ``site.py`` is executed. (Contributed by Ɓukasz Langa in :gh:`110769`.) + + +Notable changes in 3.13.1 +========================= + +sys +--- + +* The previously undocumented special function :func:`sys.getobjects`, + which only exists in specialized builds of Python, may now return objects + from other interpreters than the one it's called in. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst new file mode 100644 index 00000000000000..e3741321006548 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst @@ -0,0 +1,5 @@ +Fix a crash caused by immortal interned strings being shared between +sub-interpreters that use basic single-phase init. In that case, the string +can be used by an interpreter that outlives the interpreter that created and +interned it. For interpreters that share obmalloc state, also share the +interned dict with the main interpreter. diff --git a/Objects/object.c b/Objects/object.c index cbf576d5e5aee3..43d1761d5161cf 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -169,6 +169,48 @@ _PyDebug_PrintTotalRefs(void) { #define REFCHAIN(interp) interp->object_state.refchain #define REFCHAIN_VALUE ((void*)(uintptr_t)1) +static inline int +has_own_refchain(PyInterpreterState *interp) +{ + if (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) { + return (_Py_IsMainInterpreter(interp) + || _PyInterpreterState_Main() == NULL); + } + return 1; +} + +static int +refchain_init(PyInterpreterState *interp) +{ + if (!has_own_refchain(interp)) { + // Legacy subinterpreters share a refchain with the main interpreter. + REFCHAIN(interp) = REFCHAIN(_PyInterpreterState_Main()); + return 0; + } + _Py_hashtable_allocator_t alloc = { + // Don't use default PyMem_Malloc() and PyMem_Free() which + // require the caller to hold the GIL. + .malloc = PyMem_RawMalloc, + .free = PyMem_RawFree, + }; + REFCHAIN(interp) = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct, + NULL, NULL, &alloc); + if (REFCHAIN(interp) == NULL) { + return -1; + } + return 0; +} + +static void +refchain_fini(PyInterpreterState *interp) +{ + if (has_own_refchain(interp) && REFCHAIN(interp) != NULL) { + _Py_hashtable_destroy(REFCHAIN(interp)); + } + REFCHAIN(interp) = NULL; +} + bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj) { @@ -2171,16 +2213,7 @@ PyStatus _PyObject_InitState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - _Py_hashtable_allocator_t alloc = { - // Don't use default PyMem_Malloc() and PyMem_Free() which - // require the caller to hold the GIL. - .malloc = PyMem_RawMalloc, - .free = PyMem_RawFree, - }; - REFCHAIN(interp) = _Py_hashtable_new_full( - _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct, - NULL, NULL, &alloc); - if (REFCHAIN(interp) == NULL) { + if (refchain_init(interp) < 0) { return _PyStatus_NO_MEMORY(); } #endif @@ -2191,8 +2224,7 @@ void _PyObject_FiniState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - _Py_hashtable_destroy(REFCHAIN(interp)); - REFCHAIN(interp) = NULL; + refchain_fini(interp); #endif } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5a6ae78fe23bae..c69a64de062baa 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -277,13 +277,37 @@ hashtable_unicode_compare(const void *key1, const void *key2) } } +/* Return true if this interpreter should share the main interpreter's + intern_dict. That's important for interpreters which load basic + single-phase init extension modules (m_size == -1). There could be interned + immortal strings that are shared between interpreters, due to the + PyDict_Update(mdict, m_copy) call in import_find_extension(). + + It's not safe to deallocate those strings until all interpreters that + potentially use them are freed. By storing them in the main interpreter, we + ensure they get freed after all other interpreters are freed. +*/ +static bool +has_shared_intern_dict(PyInterpreterState *interp) +{ + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC; +} + static int init_interned_dict(PyInterpreterState *interp) { assert(get_interned_dict(interp) == NULL); - PyObject *interned = interned = PyDict_New(); - if (interned == NULL) { - return -1; + PyObject *interned; + if (has_shared_intern_dict(interp)) { + interned = get_interned_dict(_PyInterpreterState_Main()); + Py_INCREF(interned); + } + else { + interned = PyDict_New(); + if (interned == NULL) { + return -1; + } } _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; return 0; @@ -294,7 +318,10 @@ clear_interned_dict(PyInterpreterState *interp) { PyObject *interned = get_interned_dict(interp); if (interned != NULL) { - PyDict_Clear(interned); + if (!has_shared_intern_dict(interp)) { + // only clear if the dict belongs to this interpreter + PyDict_Clear(interned); + } Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } @@ -15306,6 +15333,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); + if (has_shared_intern_dict(interp)) { + // the dict doesn't belong to this interpreter, skip the debug + // checks on it and just clear the pointer to it + clear_interned_dict(interp); + return; + } + #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -15827,8 +15861,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(get_interned_dict(interp) == NULL); + if (!has_shared_intern_dict(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(get_interned_dict(interp) == NULL); + } _PyUnicode_FiniEncodings(&state->fs_codec); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 1701a1cd217440..0cd4fb417636a2 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -670,6 +670,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + status = _PyObject_InitState(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + // initialize the interp->obmalloc state. This must be done after // the settings are loaded (so that feature_flags are set) but before // any calls are made to obmalloc functions. @@ -2290,6 +2297,13 @@ new_interpreter(PyThreadState **tstate_p, goto error; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + status = _PyObject_InitState(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + // initialize the interp->obmalloc state. This must be done after // the settings are loaded (so that feature_flags are set) but before // any calls are made to obmalloc functions. diff --git a/Python/pystate.c b/Python/pystate.c index 66fd392c3e3ad0..ad3fdce69bf1d5 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -632,10 +632,8 @@ init_interpreter(PyInterpreterState *interp, assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; - PyStatus status = _PyObject_InitState(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; - } + // We would call _PyObject_InitState() at this point + // if interp->feature_flags were alredy set. _PyEval_InitState(interp); _PyGC_InitState(&interp->gc);