diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index c048ae88d9000c..eb2265d370ffbd 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -28,6 +28,13 @@ PyAPI_FUNC(PyObject *) _PyImport_GetModuleAttr(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) _PyImport_GetModuleAttrString(const char *, const char *); +struct _cached_module_def { + char *filename; + char *name; + PyModuleDef *def; + struct _cached_module_def *next; +}; + struct _import_runtime_state { /* The builtin modules (defined in config.c). */ struct _inittab *inittab; @@ -37,19 +44,15 @@ struct _import_runtime_state { See PyInterpreterState.modules_by_index for more info. */ Py_ssize_t last_module_index; struct { - /* A thread state tied to the main interpreter, - used exclusively for when the extensions dict is access/modified - from an arbitrary thread. */ - PyThreadState main_tstate; - /* A lock to guard the dict. */ + /* A lock to guard the cache. */ PyThread_type_lock mutex; - /* A dict mapping (filename, name) to PyModuleDef for modules. + /* The actual cache of (filename, name, PyModuleDef) for modules. Only legacy (single-phase init) extension modules are added and only if they support multiple initialization (m_size >- 0) or are imported in the main interpreter. - This is initialized lazily in _PyImport_FixupExtensionObject(). + This is added to in _PyImport_FixupExtensionObject(). Modules are added there and looked up in _imp.find_extension(). */ - PyObject *dict; + struct _cached_module_def *head; } extensions; /* Package context -- the full module name for package imports */ const char * pkgcontext; diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 0659084194d293..2dee4dfec72325 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -125,11 +125,6 @@ PyAPI_FUNC(PyThreadState *) _PyThreadState_New(PyInterpreterState *interp); PyAPI_FUNC(void) _PyThreadState_Bind(PyThreadState *tstate); PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate); -extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *); -extern void _PyThreadState_ClearDetached(PyThreadState *); -extern void _PyThreadState_BindDetached(PyThreadState *); -extern void _PyThreadState_UnbindDetached(PyThreadState *); - PyAPI_FUNC(PyObject*) _PyThreadState_GetDict(PyThreadState *tstate); /* The implementation of sys._current_frames() Returns a dict mapping diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index e72e7422c7207e..4ab358839dbc61 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -97,11 +97,6 @@ extern PyTypeObject _PyExc_MemoryError; in accordance with the specification. */ \ .autoTSSkey = Py_tss_NEEDS_INIT, \ .parser = _parser_runtime_state_INIT, \ - .imports = { \ - .extensions = { \ - .main_tstate = _PyThreadState_INIT, \ - }, \ - }, \ .ceval = { \ .perf = _PyEval_RUNTIME_PERF_INIT, \ }, \ diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-07-19-09-58-57.gh-issue-105699.tRrKUL.rst b/Misc/NEWS.d/next/Core and Builtins/2023-07-19-09-58-57.gh-issue-105699.tRrKUL.rst new file mode 100644 index 00000000000000..ed4638a13c7f61 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-07-19-09-58-57.gh-issue-105699.tRrKUL.rst @@ -0,0 +1,3 @@ +Python no longer crashes due to an infrequent race when simultaneously +initializing the sys/builtins modules in multiple subinterpreters that have +their own GILs. diff --git a/Python/import.c b/Python/import.c index 3e52a4e4eb1450..8cc05f0329fc14 100644 --- a/Python/import.c +++ b/Python/import.c @@ -919,167 +919,134 @@ extensions_lock_release(void) static void _extensions_cache_init(void) { - /* The runtime (i.e. main interpreter) must be initializing, - so we don't need to worry about the lock. */ - _PyThreadState_InitDetached(&EXTENSIONS.main_tstate, - _PyInterpreterState_Main()); + /* The lock is initialized directly with the general runtime state. */ + assert(EXTENSIONS.mutex != NULL); + //assert(EXTENSIONS.head == NULL); } -static PyModuleDef * -_extensions_cache_get(PyObject *filename, PyObject *name) +static struct _cached_module_def * +_extensions_cache_lookup(PyObject *filename, PyObject *name, + struct _cached_module_def **prev_p) { - PyModuleDef *def = NULL; - extensions_lock_acquire(); - - PyObject *key = PyTuple_Pack(2, filename, name); - if (key == NULL) { - goto finally; + struct _cached_module_def *prev = NULL; + struct _cached_module_def *cached = EXTENSIONS.head; + while (cached != NULL) { + if (PyUnicode_CompareWithASCIIString(filename, cached->filename) == 0) { + if (PyUnicode_CompareWithASCIIString(name, cached->name) == 0) { + break; + } + } + prev = cached; + cached = cached->next; } - - PyObject *extensions = EXTENSIONS.dict; - if (extensions == NULL) { - goto finally; + if (prev_p != NULL) { + *prev_p = prev; } - def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); + return cached; +} -finally: - Py_XDECREF(key); +static PyModuleDef * +_extensions_cache_get(PyObject *filename, PyObject *name) +{ + extensions_lock_acquire(); + struct _cached_module_def *found = + _extensions_cache_lookup(filename, name, NULL); extensions_lock_release(); - return def; + return (found != NULL) ? found->def : NULL; } static int _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) { int res = -1; - PyThreadState *oldts = NULL; extensions_lock_acquire(); - /* Swap to the main interpreter, if necessary. This matters if - the dict hasn't been created yet or if the item isn't in the - dict yet. In both cases we must ensure the relevant objects - are created using the main interpreter. */ - PyThreadState *main_tstate = &EXTENSIONS.main_tstate; - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_Py_IsMainInterpreter(interp)) { - _PyThreadState_BindDetached(main_tstate); - oldts = _PyThreadState_Swap(interp->runtime, main_tstate); - assert(!_Py_IsMainInterpreter(oldts->interp)); - - /* Make sure the name and filename objects are owned - by the main interpreter. */ - name = PyUnicode_InternFromString(PyUnicode_AsUTF8(name)); - assert(name != NULL); - filename = PyUnicode_InternFromString(PyUnicode_AsUTF8(filename)); - assert(filename != NULL); - } - - PyObject *key = PyTuple_Pack(2, filename, name); - if (key == NULL) { + struct _cached_module_def *found = + _extensions_cache_lookup(filename, name, NULL); + if (found != NULL) { + assert(found->def == def); + res = 0; goto finally; } - PyObject *extensions = EXTENSIONS.dict; - if (extensions == NULL) { - extensions = PyDict_New(); - if (extensions == NULL) { - goto finally; - } - EXTENSIONS.dict = extensions; - } - - PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); - if (PyErr_Occurred()) { + /* Create and populate the new entry. */ + struct _cached_module_def *cached = PyMem_RawMalloc( + sizeof(struct _cached_module_def)); + if (cached == NULL) { + PyErr_NoMemory(); goto finally; } - else if (actual != NULL) { - /* We expect it to be static, so it must be the same pointer. */ - assert(def == actual); - res = 0; + cached->filename = PyMem_RawMalloc(strlen(PyUnicode_AsUTF8(filename)) + 1); + if (cached->filename == NULL) { + PyMem_RawFree(cached); + PyErr_NoMemory(); goto finally; } - - /* This might trigger a resize, which is why we must switch - to the main interpreter. */ - res = PyDict_SetItem(extensions, key, (PyObject *)def); - if (res < 0) { - res = -1; + cached->name = PyMem_RawMalloc(strlen(PyUnicode_AsUTF8(name)) + 1); + if (cached->name == NULL) { + PyMem_RawFree(cached->filename); + PyMem_RawFree(cached); + PyErr_NoMemory(); goto finally; } + strcpy(cached->filename, PyUnicode_AsUTF8(filename)); + strcpy(cached->name, PyUnicode_AsUTF8(name)); + cached->def = (PyModuleDef *)Py_NewRef(def); + + /* Add it to the list. */ + cached->next = EXTENSIONS.head; + EXTENSIONS.head = cached; + res = 0; finally: - Py_XDECREF(key); - if (oldts != NULL) { - _PyThreadState_Swap(interp->runtime, oldts); - _PyThreadState_UnbindDetached(main_tstate); - Py_DECREF(name); - Py_DECREF(filename); - } extensions_lock_release(); return res; } -static int +static void _extensions_cache_delete(PyObject *filename, PyObject *name) { - int res = -1; - PyThreadState *oldts = NULL; extensions_lock_acquire(); - PyObject *key = PyTuple_Pack(2, filename, name); - if (key == NULL) { - goto finally; - } - - PyObject *extensions = EXTENSIONS.dict; - if (extensions == NULL) { - res = 0; - goto finally; - } - - PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); - if (PyErr_Occurred()) { - goto finally; - } - else if (actual == NULL) { + struct _cached_module_def *prev = NULL; + struct _cached_module_def *found = + _extensions_cache_lookup(filename, name, &prev); + if (found == NULL) { /* It was already removed or never added. */ - res = 0; goto finally; } - /* Swap to the main interpreter, if necessary. */ - PyThreadState *main_tstate = &EXTENSIONS.main_tstate; - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_Py_IsMainInterpreter(interp)) { - _PyThreadState_BindDetached(main_tstate); - oldts = _PyThreadState_Swap(interp->runtime, main_tstate); - assert(!_Py_IsMainInterpreter(oldts->interp)); + /* Drop it from the list. */ + if (prev == NULL) { + assert(EXTENSIONS.head == found); + EXTENSIONS.head = found->next; } - - if (PyDict_DelItem(extensions, key) < 0) { - goto finally; + else { + prev->next = found->next; } - res = 0; + + /* Destroy the cache data. */ + PyMem_RawFree(found->filename); + PyMem_RawFree(found->name); + PyMem_RawFree(found); finally: - if (oldts != NULL) { - _PyThreadState_Swap(interp->runtime, oldts); - _PyThreadState_UnbindDetached(main_tstate); - } - Py_XDECREF(key); extensions_lock_release(); - return res; } static void _extensions_cache_clear_all(void) { - /* The runtime (i.e. main interpreter) must be finalizing, - so we don't need to worry about the lock. */ - // XXX assert(_Py_IsMainInterpreter(_PyInterpreterState_GET())); - Py_CLEAR(EXTENSIONS.dict); - _PyThreadState_ClearDetached(&EXTENSIONS.main_tstate); + struct _cached_module_def *cached = EXTENSIONS.head; + while (cached != NULL) { + struct _cached_module_def *last = cached; + cached = cached->next; + PyMem_RawFree(last->filename); + PyMem_RawFree(last->name); + Py_DECREF(last->def); + PyMem_RawFree(last); + } } @@ -1233,6 +1200,8 @@ import_find_extension(PyThreadState *tstate, PyObject *name, PyObject *m_copy = def->m_base.m_copy; /* Module does not support repeated initialization */ if (m_copy == NULL) { + /* It might be a core module (e.g. sys & builtins), + for which we don't set m_copy. */ m_copy = get_core_module_dict(tstate->interp, name, filename); if (m_copy == NULL) { return NULL; @@ -1302,9 +1271,7 @@ clear_singlephase_extension(PyInterpreterState *interp, } /* Clear the cached module def. */ - if (_extensions_cache_delete(filename, name) < 0) { - return -1; - } + _extensions_cache_delete(filename, name); return 0; } @@ -3053,6 +3020,8 @@ void _PyImport_Fini(void) { /* Destroy the database used by _PyImport_{Fixup,Find}Extension */ + // XXX Should we actually leave them (mostly) intact, since we don't + // ever dlclose() the module files? _extensions_cache_clear_all(); /* Use the same memory allocator as _PyImport_Init(). */ diff --git a/Python/pystate.c b/Python/pystate.c index a9b404bd5c93e3..c48a2e99e4d878 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1640,75 +1640,6 @@ _PyThreadState_DeleteExcept(PyThreadState *tstate) } -//------------------------- -// "detached" thread states -//------------------------- - -void -_PyThreadState_InitDetached(PyThreadState *tstate, PyInterpreterState *interp) -{ - _PyRuntimeState *runtime = interp->runtime; - - HEAD_LOCK(runtime); - interp->threads.next_unique_id += 1; - uint64_t id = interp->threads.next_unique_id; - HEAD_UNLOCK(runtime); - - init_threadstate(tstate, interp, id); - // We do not call add_threadstate(). -} - -void -_PyThreadState_ClearDetached(PyThreadState *tstate) -{ - assert(!tstate->_status.bound); - assert(!tstate->_status.bound_gilstate); - assert(tstate->datastack_chunk == NULL); - assert(tstate->thread_id == 0); - assert(tstate->native_thread_id == 0); - assert(tstate->next == NULL); - assert(tstate->prev == NULL); - - PyThreadState_Clear(tstate); - clear_datastack(tstate); -} - -void -_PyThreadState_BindDetached(PyThreadState *tstate) -{ - assert(!_Py_IsMainInterpreter( - current_fast_get(tstate->interp->runtime)->interp)); - assert(_Py_IsMainInterpreter(tstate->interp)); - bind_tstate(tstate); - /* Unlike _PyThreadState_Bind(), we do not modify gilstate TSS. */ -} - -void -_PyThreadState_UnbindDetached(PyThreadState *tstate) -{ - assert(!_Py_IsMainInterpreter( - current_fast_get(tstate->interp->runtime)->interp)); - assert(_Py_IsMainInterpreter(tstate->interp)); - assert(tstate_is_alive(tstate)); - assert(!tstate->_status.active); - assert(gilstate_tss_get(tstate->interp->runtime) != tstate); - - unbind_tstate(tstate); - - /* This thread state may be bound/unbound repeatedly, - so we must erase evidence that it was ever bound (or unbound). */ - tstate->_status.bound = 0; - tstate->_status.unbound = 0; - - /* We must fully unlink the thread state from any OS thread, - to allow it to be bound more than once. */ - tstate->thread_id = 0; -#ifdef PY_HAVE_THREAD_NATIVE_ID - tstate->native_thread_id = 0; -#endif -} - - //---------- // accessors //----------