From 33977774c6f8229eb098417da1d0f21a61357924 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 19 Jul 2023 07:50:41 -0600 Subject: [PATCH 01/13] Add a comment. --- Python/import.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/import.c b/Python/import.c index cf993cbd62a2ef..6fc62741185985 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1233,6 +1233,8 @@ import_find_extension(PyThreadState *tstate, PyObject *name, PyObject *m_copy = def->m_base.m_copy; /* Module does not support repeated initialization */ if (m_copy == NULL) { + /* It might be a core module (e.g. sys & builtins), + for which we don't set m_copy. */ m_copy = get_core_module_dict(tstate->interp, name, filename); if (m_copy == NULL) { return NULL; From 670e632496b32dc638117e3a5d42df877aae0182 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 19 Jul 2023 09:36:10 -0600 Subject: [PATCH 02/13] Add a TODO. --- Python/import.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/import.c b/Python/import.c index 6fc62741185985..192b5a44680815 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3055,6 +3055,8 @@ void _PyImport_Fini(void) { /* Destroy the database used by _PyImport_{Fixup,Find}Extension */ + // XXX Should we actually leave them (mostly) intact, since we don't + // ever dlclose() the module files? _extensions_cache_clear_all(); /* Use the same memory allocator as _PyImport_Init(). */ From 485d130637503a207b0440a4f600a42fffb32042 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 21 Jul 2023 14:43:40 -0600 Subject: [PATCH 03/13] Use a low-level hashtable instead of a dict. --- Include/internal/pycore_import.h | 11 +- Include/internal/pycore_runtime_init.h | 5 - Python/import.c | 211 +++++++++++++------------ 3 files changed, 112 insertions(+), 115 deletions(-) diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index c048ae88d9000c..eb063ebe54ad52 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -5,6 +5,7 @@ extern "C" { #endif +#include "pycore_hashtable.h" // _Py_hashtable_t #include "pycore_time.h" // _PyTime_t extern int _PyImport_IsInitialized(PyInterpreterState *); @@ -37,19 +38,15 @@ struct _import_runtime_state { See PyInterpreterState.modules_by_index for more info. */ Py_ssize_t last_module_index; struct { - /* A thread state tied to the main interpreter, - used exclusively for when the extensions dict is access/modified - from an arbitrary thread. */ - PyThreadState main_tstate; - /* A lock to guard the dict. */ + /* A lock to guard the cache. */ PyThread_type_lock mutex; - /* A dict mapping (filename, name) to PyModuleDef for modules. + /* The actual cache of (filename, name, PyModuleDef) for modules. Only legacy (single-phase init) extension modules are added and only if they support multiple initialization (m_size >- 0) or are imported in the main interpreter. This is initialized lazily in _PyImport_FixupExtensionObject(). Modules are added there and looked up in _imp.find_extension(). */ - PyObject *dict; + _Py_hashtable_t *hashtable; } extensions; /* Package context -- the full module name for package imports */ const char * pkgcontext; diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index e72e7422c7207e..4ab358839dbc61 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -97,11 +97,6 @@ extern PyTypeObject _PyExc_MemoryError; in accordance with the specification. */ \ .autoTSSkey = Py_tss_NEEDS_INIT, \ .parser = _parser_runtime_state_INIT, \ - .imports = { \ - .extensions = { \ - .main_tstate = _PyThreadState_INIT, \ - }, \ - }, \ .ceval = { \ .perf = _PyEval_RUNTIME_PERF_INIT, \ }, \ diff --git a/Python/import.c b/Python/import.c index 192b5a44680815..54945321241b9d 100644 --- a/Python/import.c +++ b/Python/import.c @@ -2,6 +2,7 @@ #include "Python.h" +#include "pycore_hashtable.h" // _Py_hashtable_new_full() #include "pycore_import.h" // _PyImport_BootstrapImp() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // struct _import_runtime_state @@ -916,13 +917,28 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ +static Py_uhash_t +hashtable_hash_str(const void *key) +{ + return _Py_HashBytes(key, strlen((const char *)key)); +} + +static int +hashtable_compare_str(const void *key1, const void *key2) +{ + return strcmp((const char *)key1, (const char *)key2) == 0; +} + static void -_extensions_cache_init(void) +hashtable_destroy_str(void *ptr) { - /* The runtime (i.e. main interpreter) must be initializing, - so we don't need to worry about the lock. */ - _PyThreadState_InitDetached(&EXTENSIONS.main_tstate, - _PyInterpreterState_Main()); + PyMem_RawFree(ptr); +} + +static void +hashtable_destroy_str_table(void *ptr) +{ + _Py_hashtable_destroy((_Py_hashtable_t *)ptr); } static PyModuleDef * @@ -931,19 +947,24 @@ _extensions_cache_get(PyObject *filename, PyObject *name) PyModuleDef *def = NULL; extensions_lock_acquire(); - PyObject *key = PyTuple_Pack(2, filename, name); - if (key == NULL) { + if (EXTENSIONS.hashtable == NULL) { goto finally; } - PyObject *extensions = EXTENSIONS.dict; - if (extensions == NULL) { + _Py_hashtable_entry_t *entry; + entry = _Py_hashtable_get_entry(EXTENSIONS.hashtable, + PyUnicode_AsUTF8(filename)); + if (entry == NULL) { + goto finally; + } + entry = _Py_hashtable_get_entry((_Py_hashtable_t *)entry->value, + PyUnicode_AsUTF8(name)); + if (entry == NULL) { goto finally; } - def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); + def = (PyModuleDef *)entry->value; finally: - Py_XDECREF(key); extensions_lock_release(); return def; } @@ -952,124 +973,115 @@ static int _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) { int res = -1; - PyThreadState *oldts = NULL; extensions_lock_acquire(); - /* Swap to the main interpreter, if necessary. This matters if - the dict hasn't been created yet or if the item isn't in the - dict yet. In both cases we must ensure the relevant objects - are created using the main interpreter. */ - PyThreadState *main_tstate = &EXTENSIONS.main_tstate; - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_Py_IsMainInterpreter(interp)) { - _PyThreadState_BindDetached(main_tstate); - oldts = _PyThreadState_Swap(interp->runtime, main_tstate); - assert(!_Py_IsMainInterpreter(oldts->interp)); - - /* Make sure the name and filename objects are owned - by the main interpreter. */ - name = PyUnicode_InternFromString(PyUnicode_AsUTF8(name)); - assert(name != NULL); - filename = PyUnicode_InternFromString(PyUnicode_AsUTF8(filename)); - assert(filename != NULL); + if (EXTENSIONS.hashtable == NULL) { + EXTENSIONS.hashtable = _Py_hashtable_new_full( + hashtable_hash_str, + hashtable_compare_str, + hashtable_destroy_str, + hashtable_destroy_str_table, + NULL + ); + if (EXTENSIONS.hashtable == NULL) { + goto finally; + } } - PyObject *key = PyTuple_Pack(2, filename, name); - if (key == NULL) { - goto finally; + _Py_hashtable_t *byname; + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( + EXTENSIONS.hashtable, PyUnicode_AsUTF8(filename)); + if (entry == NULL) { + byname = _Py_hashtable_new_full( + hashtable_hash_str, + hashtable_compare_str, + hashtable_destroy_str, + NULL, + NULL + ); + if (byname == NULL) { + goto finally; + } + char *filenamestr = PyMem_RawMalloc( + strlen(PyUnicode_AsUTF8(filename)) + 1); + if (filenamestr == NULL) { + _Py_hashtable_destroy(byname); + goto finally; + } + strncpy(filenamestr, PyUnicode_AsUTF8(filename), + strlen(PyUnicode_AsUTF8(filename)) + 1); + if (_Py_hashtable_set(EXTENSIONS.hashtable, filenamestr, byname) < 0) { + _Py_hashtable_destroy(byname); + PyMem_RawFree(filenamestr); + goto finally; + } } - - PyObject *extensions = EXTENSIONS.dict; - if (extensions == NULL) { - extensions = PyDict_New(); - if (extensions == NULL) { + else { + _Py_hashtable_entry_t *subentry = _Py_hashtable_get_entry( + (_Py_hashtable_t *)entry->value, PyUnicode_AsUTF8(name)); + if (subentry != NULL) { + if (subentry->value == NULL) { + subentry->value = Py_NewRef(def); + } + else { + /* We expect it to be static, so it must be the same pointer. */ + assert((PyModuleDef *)subentry->value == def); + } + res = 0; goto finally; } - EXTENSIONS.dict = extensions; } - PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); - if (PyErr_Occurred()) { + char *namestr = PyMem_RawMalloc(strlen(PyUnicode_AsUTF8(name)) + 1); + if (namestr == NULL) { goto finally; } - else if (actual != NULL) { - /* We expect it to be static, so it must be the same pointer. */ - assert(def == actual); - res = 0; - goto finally; - } - - /* This might trigger a resize, which is why we must switch - to the main interpreter. */ - res = PyDict_SetItem(extensions, key, (PyObject *)def); - if (res < 0) { - res = -1; + strncpy(namestr, PyUnicode_AsUTF8(name), strlen(PyUnicode_AsUTF8(name)) + 1); + if (_Py_hashtable_set(byname, namestr, Py_NewRef(def)) < 0) { + PyMem_RawFree(namestr); goto finally; } res = 0; finally: - Py_XDECREF(key); - if (oldts != NULL) { - _PyThreadState_Swap(interp->runtime, oldts); - _PyThreadState_UnbindDetached(main_tstate); - Py_DECREF(name); - Py_DECREF(filename); - } extensions_lock_release(); return res; } -static int +static void _extensions_cache_delete(PyObject *filename, PyObject *name) { - int res = -1; - PyThreadState *oldts = NULL; extensions_lock_acquire(); - PyObject *key = PyTuple_Pack(2, filename, name); - if (key == NULL) { - goto finally; - } - - PyObject *extensions = EXTENSIONS.dict; - if (extensions == NULL) { - res = 0; + if (EXTENSIONS.hashtable == NULL) { + /* It was never added. */ goto finally; } - PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); - if (PyErr_Occurred()) { + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( + EXTENSIONS.hashtable, PyUnicode_AsUTF8(filename)); + if (entry == NULL) { + /* It was never added. */ goto finally; } - else if (actual == NULL) { - /* It was already removed or never added. */ - res = 0; + _Py_hashtable_entry_t *subentry = _Py_hashtable_get_entry( + (_Py_hashtable_t *)entry->value, PyUnicode_AsUTF8(name)); + if (subentry == NULL) { + /* It never added. */ goto finally; } - - /* Swap to the main interpreter, if necessary. */ - PyThreadState *main_tstate = &EXTENSIONS.main_tstate; - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_Py_IsMainInterpreter(interp)) { - _PyThreadState_BindDetached(main_tstate); - oldts = _PyThreadState_Swap(interp->runtime, main_tstate); - assert(!_Py_IsMainInterpreter(oldts->interp)); - } - - if (PyDict_DelItem(extensions, key) < 0) { + if (subentry->value == NULL) { + /* It was already removed. */ goto finally; } - res = 0; + /* This decref would be problematic if the module def were + dynamically allocated, it were the last ref, and this function + were called with an interpreter other than the def's owner. */ + Py_DECREF(subentry->value); + subentry->value = NULL; finally: - if (oldts != NULL) { - _PyThreadState_Swap(interp->runtime, oldts); - _PyThreadState_UnbindDetached(main_tstate); - } - Py_XDECREF(key); extensions_lock_release(); - return res; } static void @@ -1077,9 +1089,8 @@ _extensions_cache_clear_all(void) { /* The runtime (i.e. main interpreter) must be finalizing, so we don't need to worry about the lock. */ - // XXX assert(_Py_IsMainInterpreter(_PyInterpreterState_GET())); - Py_CLEAR(EXTENSIONS.dict); - _PyThreadState_ClearDetached(&EXTENSIONS.main_tstate); + _Py_hashtable_destroy(EXTENSIONS.hashtable); + EXTENSIONS.hashtable = NULL; } @@ -1304,9 +1315,7 @@ clear_singlephase_extension(PyInterpreterState *interp, } /* Clear the cached module def. */ - if (_extensions_cache_delete(filename, name) < 0) { - return -1; - } + _extensions_cache_delete(filename, name); return 0; } @@ -3094,10 +3103,6 @@ _PyImport_Fini2(void) PyStatus _PyImport_InitCore(PyThreadState *tstate, PyObject *sysmod, int importlib) { - if (_Py_IsMainInterpreter(tstate->interp)) { - _extensions_cache_init(); - } - // XXX Initialize here: interp->modules and interp->import_func. // XXX Initialize here: sys.modules and sys.meta_path. From 77d5e8f298ae4dcfd5aa5e8cf7f6cd3f0254e8fc Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 19 Jul 2023 09:34:44 -0600 Subject: [PATCH 04/13] Drop _PyThreadState_*Detached(). --- Include/internal/pycore_pystate.h | 5 --- Python/pystate.c | 69 ------------------------------- 2 files changed, 74 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 0659084194d293..2dee4dfec72325 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -125,11 +125,6 @@ PyAPI_FUNC(PyThreadState *) _PyThreadState_New(PyInterpreterState *interp); PyAPI_FUNC(void) _PyThreadState_Bind(PyThreadState *tstate); PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate); -extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *); -extern void _PyThreadState_ClearDetached(PyThreadState *); -extern void _PyThreadState_BindDetached(PyThreadState *); -extern void _PyThreadState_UnbindDetached(PyThreadState *); - PyAPI_FUNC(PyObject*) _PyThreadState_GetDict(PyThreadState *tstate); /* The implementation of sys._current_frames() Returns a dict mapping diff --git a/Python/pystate.c b/Python/pystate.c index a9b404bd5c93e3..c48a2e99e4d878 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1640,75 +1640,6 @@ _PyThreadState_DeleteExcept(PyThreadState *tstate) } -//------------------------- -// "detached" thread states -//------------------------- - -void -_PyThreadState_InitDetached(PyThreadState *tstate, PyInterpreterState *interp) -{ - _PyRuntimeState *runtime = interp->runtime; - - HEAD_LOCK(runtime); - interp->threads.next_unique_id += 1; - uint64_t id = interp->threads.next_unique_id; - HEAD_UNLOCK(runtime); - - init_threadstate(tstate, interp, id); - // We do not call add_threadstate(). -} - -void -_PyThreadState_ClearDetached(PyThreadState *tstate) -{ - assert(!tstate->_status.bound); - assert(!tstate->_status.bound_gilstate); - assert(tstate->datastack_chunk == NULL); - assert(tstate->thread_id == 0); - assert(tstate->native_thread_id == 0); - assert(tstate->next == NULL); - assert(tstate->prev == NULL); - - PyThreadState_Clear(tstate); - clear_datastack(tstate); -} - -void -_PyThreadState_BindDetached(PyThreadState *tstate) -{ - assert(!_Py_IsMainInterpreter( - current_fast_get(tstate->interp->runtime)->interp)); - assert(_Py_IsMainInterpreter(tstate->interp)); - bind_tstate(tstate); - /* Unlike _PyThreadState_Bind(), we do not modify gilstate TSS. */ -} - -void -_PyThreadState_UnbindDetached(PyThreadState *tstate) -{ - assert(!_Py_IsMainInterpreter( - current_fast_get(tstate->interp->runtime)->interp)); - assert(_Py_IsMainInterpreter(tstate->interp)); - assert(tstate_is_alive(tstate)); - assert(!tstate->_status.active); - assert(gilstate_tss_get(tstate->interp->runtime) != tstate); - - unbind_tstate(tstate); - - /* This thread state may be bound/unbound repeatedly, - so we must erase evidence that it was ever bound (or unbound). */ - tstate->_status.bound = 0; - tstate->_status.unbound = 0; - - /* We must fully unlink the thread state from any OS thread, - to allow it to be bound more than once. */ - tstate->thread_id = 0; -#ifdef PY_HAVE_THREAD_NATIVE_ID - tstate->native_thread_id = 0; -#endif -} - - //---------- // accessors //---------- From 04a19c456b6b63aab9a1272d0ea08c66b6e46875 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 24 Jul 2023 11:38:15 -0600 Subject: [PATCH 05/13] Set "byname" properly for the reload case. --- Python/import.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/import.c b/Python/import.c index 54945321241b9d..25d025845a2a97 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1017,8 +1017,9 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) } } else { + byname = (_Py_hashtable_t *)entry->value; _Py_hashtable_entry_t *subentry = _Py_hashtable_get_entry( - (_Py_hashtable_t *)entry->value, PyUnicode_AsUTF8(name)); + byname, PyUnicode_AsUTF8(name)); if (subentry != NULL) { if (subentry->value == NULL) { subentry->value = Py_NewRef(def); From 5fde127bbd6fe6d332f45f91fc655ebd6289cea4 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 27 Jul 2023 11:50:29 -0600 Subject: [PATCH 06/13] Use a flat hashtable instead of a nested one. --- Python/import.c | 127 ++++++++++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 70 deletions(-) diff --git a/Python/import.c b/Python/import.c index 25d025845a2a97..4f466b01aa53b6 100644 --- a/Python/import.c +++ b/Python/import.c @@ -917,6 +917,25 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ +static void * +hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) +{ + Py_ssize_t str1_len = strlen(PyUnicode_AsUTF8(str1)); + Py_ssize_t str2_len = strlen(PyUnicode_AsUTF8(str2)); + assert(SIZE_MAX - str1_len - 1 - str2_len > 0); + size_t size = str1_len + 1 + str2_len + 1; + + char *key = PyMem_RawMalloc(size); + if (key == NULL) { + return NULL; + } + + strncpy(key, PyUnicode_AsUTF8(str1), str1_len); + key[str1_len] = sep; + strncpy(key + str1_len + 1, PyUnicode_AsUTF8(str2), str2_len + 1); + return key; +} + static Py_uhash_t hashtable_hash_str(const void *key) { @@ -935,30 +954,23 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } -static void -hashtable_destroy_str_table(void *ptr) -{ - _Py_hashtable_destroy((_Py_hashtable_t *)ptr); -} - static PyModuleDef * _extensions_cache_get(PyObject *filename, PyObject *name) { PyModuleDef *def = NULL; + void *key = NULL; extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { goto finally; } - _Py_hashtable_entry_t *entry; - entry = _Py_hashtable_get_entry(EXTENSIONS.hashtable, - PyUnicode_AsUTF8(filename)); - if (entry == NULL) { + key = hashtable_key_from_2_strings(filename, name, ':'); + if (key == NULL) { goto finally; } - entry = _Py_hashtable_get_entry((_Py_hashtable_t *)entry->value, - PyUnicode_AsUTF8(name)); + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( + EXTENSIONS.hashtable, key); if (entry == NULL) { goto finally; } @@ -966,6 +978,9 @@ _extensions_cache_get(PyObject *filename, PyObject *name) finally: extensions_lock_release(); + if (key != NULL) { + PyMem_RawFree(key); + } return def; } @@ -979,8 +994,8 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) EXTENSIONS.hashtable = _Py_hashtable_new_full( hashtable_hash_str, hashtable_compare_str, - hashtable_destroy_str, - hashtable_destroy_str_table, + hashtable_destroy_str, // key + NULL, // value NULL ); if (EXTENSIONS.hashtable == NULL) { @@ -988,59 +1003,28 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) } } - _Py_hashtable_t *byname; + void *key = hashtable_key_from_2_strings(filename, name, ':'); + if (key == NULL) { + goto finally; + } + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( - EXTENSIONS.hashtable, PyUnicode_AsUTF8(filename)); + EXTENSIONS.hashtable, key); if (entry == NULL) { - byname = _Py_hashtable_new_full( - hashtable_hash_str, - hashtable_compare_str, - hashtable_destroy_str, - NULL, - NULL - ); - if (byname == NULL) { - goto finally; - } - char *filenamestr = PyMem_RawMalloc( - strlen(PyUnicode_AsUTF8(filename)) + 1); - if (filenamestr == NULL) { - _Py_hashtable_destroy(byname); - goto finally; - } - strncpy(filenamestr, PyUnicode_AsUTF8(filename), - strlen(PyUnicode_AsUTF8(filename)) + 1); - if (_Py_hashtable_set(EXTENSIONS.hashtable, filenamestr, byname) < 0) { - _Py_hashtable_destroy(byname); - PyMem_RawFree(filenamestr); + if (_Py_hashtable_set(EXTENSIONS.hashtable, key, Py_NewRef(def)) < 0) { + PyMem_RawFree(key); goto finally; } } else { - byname = (_Py_hashtable_t *)entry->value; - _Py_hashtable_entry_t *subentry = _Py_hashtable_get_entry( - byname, PyUnicode_AsUTF8(name)); - if (subentry != NULL) { - if (subentry->value == NULL) { - subentry->value = Py_NewRef(def); - } - else { - /* We expect it to be static, so it must be the same pointer. */ - assert((PyModuleDef *)subentry->value == def); - } - res = 0; - goto finally; + if (entry->value == NULL) { + entry->value = Py_NewRef(def); } - } - - char *namestr = PyMem_RawMalloc(strlen(PyUnicode_AsUTF8(name)) + 1); - if (namestr == NULL) { - goto finally; - } - strncpy(namestr, PyUnicode_AsUTF8(name), strlen(PyUnicode_AsUTF8(name)) + 1); - if (_Py_hashtable_set(byname, namestr, Py_NewRef(def)) < 0) { - PyMem_RawFree(namestr); - goto finally; + else { + /* We expect it to be static, so it must be the same pointer. */ + assert((PyModuleDef *)entry->value == def); + } + PyMem_RawFree(key); } res = 0; @@ -1052,6 +1036,7 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) static void _extensions_cache_delete(PyObject *filename, PyObject *name) { + void *key = NULL; extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { @@ -1059,30 +1044,32 @@ _extensions_cache_delete(PyObject *filename, PyObject *name) goto finally; } + key = hashtable_key_from_2_strings(filename, name, ':'); + if (key == NULL) { + goto finally; + } + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( - EXTENSIONS.hashtable, PyUnicode_AsUTF8(filename)); + EXTENSIONS.hashtable, key); if (entry == NULL) { /* It was never added. */ goto finally; } - _Py_hashtable_entry_t *subentry = _Py_hashtable_get_entry( - (_Py_hashtable_t *)entry->value, PyUnicode_AsUTF8(name)); - if (subentry == NULL) { - /* It never added. */ - goto finally; - } - if (subentry->value == NULL) { + if (entry->value == NULL) { /* It was already removed. */ goto finally; } /* This decref would be problematic if the module def were dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ - Py_DECREF(subentry->value); - subentry->value = NULL; + Py_DECREF(entry->value); + entry->value = NULL; finally: extensions_lock_release(); + if (key != NULL) { + PyMem_RawFree(key); + } } static void From e83b0cccc1429b92091f6576349b33e13b6ec5bd Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 27 Jul 2023 12:30:59 -0600 Subject: [PATCH 07/13] Use the right allocator. --- Python/import.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/import.c b/Python/import.c index 87de1c2349c29b..e06557d0e1d839 100644 --- a/Python/import.c +++ b/Python/import.c @@ -980,12 +980,13 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { + _Py_hashtable_allocator_t alloc = {PyMem_RawMalloc, PyMem_RawFree}; EXTENSIONS.hashtable = _Py_hashtable_new_full( hashtable_hash_str, hashtable_compare_str, hashtable_destroy_str, // key NULL, // value - NULL + &alloc ); if (EXTENSIONS.hashtable == NULL) { goto finally; From 9e81cd5bcf45c147c05de6503d4339c1dde2692e Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 28 Jul 2023 10:01:08 -0600 Subject: [PATCH 08/13] Use PyUnicode_AsUTF8AndSize() (and fix the assert). Co-authored-by: T. Wouters --- Python/import.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Python/import.c b/Python/import.c index e06557d0e1d839..44ced28ace65c7 100644 --- a/Python/import.c +++ b/Python/import.c @@ -909,9 +909,13 @@ extensions_lock_release(void) static void * hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) { - Py_ssize_t str1_len = strlen(PyUnicode_AsUTF8(str1)); - Py_ssize_t str2_len = strlen(PyUnicode_AsUTF8(str2)); - assert(SIZE_MAX - str1_len - 1 - str2_len > 0); + Py_ssize_t str1_len, str2_len; + const char *str1_data = PyUnicode_AsUTF8AndSize(str1, &str1_len); + const char *str2_data = PyUnicode_AsUTF8AndSize(str2, &str2_len); + if (str1_data == NULL || str2_data == NULL) { + return NULL; + } + assert(SIZE_MAX - str1_len - str2_len > 2); size_t size = str1_len + 1 + str2_len + 1; char *key = PyMem_RawMalloc(size); @@ -919,9 +923,10 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) return NULL; } - strncpy(key, PyUnicode_AsUTF8(str1), str1_len); + strncpy(key, str1_data, str1_len); key[str1_len] = sep; - strncpy(key + str1_len + 1, PyUnicode_AsUTF8(str2), str2_len + 1); + strncpy(key + str1_len + 1, str2_data, str2_len + 1); + assert(strlen(key) == size - 1); return key; } From 430716b1b27f5ead37fc6d765e68292d0f401cba Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 28 Jul 2023 10:04:07 -0600 Subject: [PATCH 09/13] Raise MemoryError where appropriate. --- Python/import.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/import.c b/Python/import.c index 44ced28ace65c7..acd5659b3dd559 100644 --- a/Python/import.c +++ b/Python/import.c @@ -920,6 +920,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) char *key = PyMem_RawMalloc(size); if (key == NULL) { + PyErr_NoMemory(); return NULL; } @@ -994,6 +995,7 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) &alloc ); if (EXTENSIONS.hashtable == NULL) { + PyErr_NoMemory(); goto finally; } } @@ -1008,6 +1010,7 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) if (entry == NULL) { if (_Py_hashtable_set(EXTENSIONS.hashtable, key, Py_NewRef(def)) < 0) { PyMem_RawFree(key); + PyErr_NoMemory(); goto finally; } } From 647fbc2c81dbd09b00aa39d489ead78580fdcf00 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 28 Jul 2023 10:09:08 -0600 Subject: [PATCH 10/13] Add a comment. --- Python/import.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/import.c b/Python/import.c index acd5659b3dd559..a85e62b7e43bc0 100644 --- a/Python/import.c +++ b/Python/import.c @@ -915,6 +915,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) if (str1_data == NULL || str2_data == NULL) { return NULL; } + /* Make sure sep and the NULL byte won't cause an overflow. */ assert(SIZE_MAX - str1_len - str2_len > 2); size_t size = str1_len + 1 + str2_len + 1; From bbe5eda9b7f833ac487f05591798295601bcd966 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 28 Jul 2023 10:10:24 -0600 Subject: [PATCH 11/13] Use a constant for the cache separator character. --- Python/import.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Python/import.c b/Python/import.c index a85e62b7e43bc0..b943f3514d7aff 100644 --- a/Python/import.c +++ b/Python/import.c @@ -950,6 +950,8 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } +#define HTSEP ':' + static PyModuleDef * _extensions_cache_get(PyObject *filename, PyObject *name) { @@ -961,7 +963,7 @@ _extensions_cache_get(PyObject *filename, PyObject *name) goto finally; } - key = hashtable_key_from_2_strings(filename, name, ':'); + key = hashtable_key_from_2_strings(filename, name, HTSEP); if (key == NULL) { goto finally; } @@ -1001,7 +1003,7 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) } } - void *key = hashtable_key_from_2_strings(filename, name, ':'); + void *key = hashtable_key_from_2_strings(filename, name, HTSEP); if (key == NULL) { goto finally; } @@ -1043,7 +1045,7 @@ _extensions_cache_delete(PyObject *filename, PyObject *name) goto finally; } - key = hashtable_key_from_2_strings(filename, name, ':'); + key = hashtable_key_from_2_strings(filename, name, HTSEP); if (key == NULL) { goto finally; } @@ -1080,6 +1082,8 @@ _extensions_cache_clear_all(void) EXTENSIONS.hashtable = NULL; } +#undef HTSEP + static bool check_multi_interp_extensions(PyInterpreterState *interp) From 1d7cbd00cc85c154567f4718fd965b981944a5b3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 28 Jul 2023 10:15:16 -0600 Subject: [PATCH 12/13] Decref when clearing the hashtable. --- Python/import.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Python/import.c b/Python/import.c index b943f3514d7aff..5413d1db5cf7fa 100644 --- a/Python/import.c +++ b/Python/import.c @@ -950,6 +950,12 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } +static void +hashtable_destroy_module_def(void *ptr) +{ + Py_XDECREF((PyObject *)ptr); +} + #define HTSEP ':' static PyModuleDef * @@ -994,7 +1000,7 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) hashtable_hash_str, hashtable_compare_str, hashtable_destroy_str, // key - NULL, // value + hashtable_destroy_module_def, // value &alloc ); if (EXTENSIONS.hashtable == NULL) { From 20c8b9d9415c81a65bd292c2474b050e531e1dc9 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 28 Jul 2023 10:37:14 -0600 Subject: [PATCH 13/13] Make the cached module defs immortal. --- Python/import.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/Python/import.c b/Python/import.c index 5413d1db5cf7fa..4a5bd163fcf595 100644 --- a/Python/import.c +++ b/Python/import.c @@ -7,6 +7,7 @@ #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // struct _import_runtime_state #include "pycore_namespace.h" // _PyNamespace_Type +#include "pycore_object.h" // _Py_SetImmortal() #include "pycore_pyerrors.h" // _PyErr_SetString() #include "pycore_pyhash.h" // _Py_KeyedHash() #include "pycore_pylifecycle.h" @@ -950,12 +951,6 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } -static void -hashtable_destroy_module_def(void *ptr) -{ - Py_XDECREF((PyObject *)ptr); -} - #define HTSEP ':' static PyModuleDef * @@ -1000,7 +995,8 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) hashtable_hash_str, hashtable_compare_str, hashtable_destroy_str, // key - hashtable_destroy_module_def, // value + /* There's no need to decref the def since it's immortal. */ + NULL, // value &alloc ); if (EXTENSIONS.hashtable == NULL) { @@ -1014,10 +1010,11 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) goto finally; } + int already_set = 0; _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( EXTENSIONS.hashtable, key); if (entry == NULL) { - if (_Py_hashtable_set(EXTENSIONS.hashtable, key, Py_NewRef(def)) < 0) { + if (_Py_hashtable_set(EXTENSIONS.hashtable, key, def) < 0) { PyMem_RawFree(key); PyErr_NoMemory(); goto finally; @@ -1025,14 +1022,20 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) } else { if (entry->value == NULL) { - entry->value = Py_NewRef(def); + entry->value = def; } else { /* We expect it to be static, so it must be the same pointer. */ assert((PyModuleDef *)entry->value == def); + already_set = 1; } PyMem_RawFree(key); } + if (!already_set) { + /* We assume that all module defs are statically allocated + and will never be freed. Otherwise, we would incref here. */ + _Py_SetImmortal(def); + } res = 0; finally: @@ -1066,10 +1069,10 @@ _extensions_cache_delete(PyObject *filename, PyObject *name) /* It was already removed. */ goto finally; } - /* This decref would be problematic if the module def were + /* If we hadn't made the stored defs immortal, we would decref here. + However, this decref would be problematic if the module def were dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ - Py_DECREF(entry->value); entry->value = NULL; finally: