-
-
Notifications
You must be signed in to change notification settings - Fork 32.2k
gh-105699: Use a Linked List for the PyModuleDef Cache #106899
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
31414a3
a6f6c0c
4c99023
d5b3aa4
fb5a9d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Python no longer crashes due to an infrequent race when simultaneously | ||
initializing the sys/builtins modules in multiple subinterpreters that have | ||
their own GILs. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -919,167 +919,134 @@ extensions_lock_release(void) | |
static void | ||
_extensions_cache_init(void) | ||
{ | ||
/* The runtime (i.e. main interpreter) must be initializing, | ||
so we don't need to worry about the lock. */ | ||
_PyThreadState_InitDetached(&EXTENSIONS.main_tstate, | ||
_PyInterpreterState_Main()); | ||
/* The lock is initialized directly with the general runtime state. */ | ||
assert(EXTENSIONS.mutex != NULL); | ||
//assert(EXTENSIONS.head == NULL); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leftover line I presume. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sort of. I had expected the assert to work (and it would in a simpler world). I was going to circle back to it but, having slept on it, don't see the need. It can be dropped. |
||
} | ||
|
||
static PyModuleDef * | ||
_extensions_cache_get(PyObject *filename, PyObject *name) | ||
static struct _cached_module_def * | ||
_extensions_cache_lookup(PyObject *filename, PyObject *name, | ||
struct _cached_module_def **prev_p) | ||
{ | ||
PyModuleDef *def = NULL; | ||
extensions_lock_acquire(); | ||
|
||
PyObject *key = PyTuple_Pack(2, filename, name); | ||
if (key == NULL) { | ||
goto finally; | ||
struct _cached_module_def *prev = NULL; | ||
struct _cached_module_def *cached = EXTENSIONS.head; | ||
while (cached != NULL) { | ||
if (PyUnicode_CompareWithASCIIString(filename, cached->filename) == 0) { | ||
if (PyUnicode_CompareWithASCIIString(name, cached->name) == 0) { | ||
break; | ||
} | ||
} | ||
prev = cached; | ||
cached = cached->next; | ||
} | ||
|
||
PyObject *extensions = EXTENSIONS.dict; | ||
if (extensions == NULL) { | ||
goto finally; | ||
if (prev_p != NULL) { | ||
*prev_p = prev; | ||
} | ||
def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); | ||
return cached; | ||
} | ||
|
||
finally: | ||
Py_XDECREF(key); | ||
static PyModuleDef * | ||
_extensions_cache_get(PyObject *filename, PyObject *name) | ||
{ | ||
extensions_lock_acquire(); | ||
struct _cached_module_def *found = | ||
_extensions_cache_lookup(filename, name, NULL); | ||
extensions_lock_release(); | ||
return def; | ||
return (found != NULL) ? found->def : NULL; | ||
} | ||
|
||
static int | ||
_extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) | ||
{ | ||
int res = -1; | ||
PyThreadState *oldts = NULL; | ||
extensions_lock_acquire(); | ||
|
||
/* Swap to the main interpreter, if necessary. This matters if | ||
the dict hasn't been created yet or if the item isn't in the | ||
dict yet. In both cases we must ensure the relevant objects | ||
are created using the main interpreter. */ | ||
PyThreadState *main_tstate = &EXTENSIONS.main_tstate; | ||
PyInterpreterState *interp = _PyInterpreterState_GET(); | ||
if (!_Py_IsMainInterpreter(interp)) { | ||
_PyThreadState_BindDetached(main_tstate); | ||
oldts = _PyThreadState_Swap(interp->runtime, main_tstate); | ||
assert(!_Py_IsMainInterpreter(oldts->interp)); | ||
|
||
/* Make sure the name and filename objects are owned | ||
by the main interpreter. */ | ||
name = PyUnicode_InternFromString(PyUnicode_AsUTF8(name)); | ||
assert(name != NULL); | ||
filename = PyUnicode_InternFromString(PyUnicode_AsUTF8(filename)); | ||
assert(filename != NULL); | ||
} | ||
|
||
PyObject *key = PyTuple_Pack(2, filename, name); | ||
if (key == NULL) { | ||
struct _cached_module_def *found = | ||
_extensions_cache_lookup(filename, name, NULL); | ||
if (found != NULL) { | ||
assert(found->def == def); | ||
res = 0; | ||
goto finally; | ||
} | ||
|
||
PyObject *extensions = EXTENSIONS.dict; | ||
if (extensions == NULL) { | ||
extensions = PyDict_New(); | ||
if (extensions == NULL) { | ||
goto finally; | ||
} | ||
EXTENSIONS.dict = extensions; | ||
} | ||
|
||
PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); | ||
if (PyErr_Occurred()) { | ||
/* Create and populate the new entry. */ | ||
struct _cached_module_def *cached = PyMem_RawMalloc( | ||
sizeof(struct _cached_module_def)); | ||
if (cached == NULL) { | ||
PyErr_NoMemory(); | ||
goto finally; | ||
} | ||
else if (actual != NULL) { | ||
/* We expect it to be static, so it must be the same pointer. */ | ||
assert(def == actual); | ||
res = 0; | ||
cached->filename = PyMem_RawMalloc(strlen(PyUnicode_AsUTF8(filename)) + 1); | ||
if (cached->filename == NULL) { | ||
PyMem_RawFree(cached); | ||
PyErr_NoMemory(); | ||
goto finally; | ||
} | ||
|
||
/* This might trigger a resize, which is why we must switch | ||
to the main interpreter. */ | ||
res = PyDict_SetItem(extensions, key, (PyObject *)def); | ||
if (res < 0) { | ||
res = -1; | ||
cached->name = PyMem_RawMalloc(strlen(PyUnicode_AsUTF8(name)) + 1); | ||
if (cached->name == NULL) { | ||
PyMem_RawFree(cached->filename); | ||
PyMem_RawFree(cached); | ||
PyErr_NoMemory(); | ||
goto finally; | ||
} | ||
strcpy(cached->filename, PyUnicode_AsUTF8(filename)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use strncpy to avoid the deprecation in some compilers (macOS IIRC?) |
||
strcpy(cached->name, PyUnicode_AsUTF8(name)); | ||
cached->def = (PyModuleDef *)Py_NewRef(def); | ||
|
||
/* Add it to the list. */ | ||
cached->next = EXTENSIONS.head; | ||
EXTENSIONS.head = cached; | ||
|
||
res = 0; | ||
|
||
finally: | ||
Py_XDECREF(key); | ||
if (oldts != NULL) { | ||
_PyThreadState_Swap(interp->runtime, oldts); | ||
_PyThreadState_UnbindDetached(main_tstate); | ||
Py_DECREF(name); | ||
Py_DECREF(filename); | ||
} | ||
extensions_lock_release(); | ||
return res; | ||
} | ||
|
||
static int | ||
static void | ||
_extensions_cache_delete(PyObject *filename, PyObject *name) | ||
{ | ||
int res = -1; | ||
PyThreadState *oldts = NULL; | ||
extensions_lock_acquire(); | ||
|
||
PyObject *key = PyTuple_Pack(2, filename, name); | ||
if (key == NULL) { | ||
goto finally; | ||
} | ||
|
||
PyObject *extensions = EXTENSIONS.dict; | ||
if (extensions == NULL) { | ||
res = 0; | ||
goto finally; | ||
} | ||
|
||
PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); | ||
if (PyErr_Occurred()) { | ||
goto finally; | ||
} | ||
else if (actual == NULL) { | ||
struct _cached_module_def *prev = NULL; | ||
struct _cached_module_def *found = | ||
_extensions_cache_lookup(filename, name, &prev); | ||
if (found == NULL) { | ||
/* It was already removed or never added. */ | ||
res = 0; | ||
goto finally; | ||
} | ||
|
||
/* Swap to the main interpreter, if necessary. */ | ||
PyThreadState *main_tstate = &EXTENSIONS.main_tstate; | ||
PyInterpreterState *interp = _PyInterpreterState_GET(); | ||
if (!_Py_IsMainInterpreter(interp)) { | ||
_PyThreadState_BindDetached(main_tstate); | ||
oldts = _PyThreadState_Swap(interp->runtime, main_tstate); | ||
assert(!_Py_IsMainInterpreter(oldts->interp)); | ||
/* Drop it from the list. */ | ||
if (prev == NULL) { | ||
assert(EXTENSIONS.head == found); | ||
EXTENSIONS.head = found->next; | ||
} | ||
|
||
if (PyDict_DelItem(extensions, key) < 0) { | ||
goto finally; | ||
else { | ||
prev->next = found->next; | ||
} | ||
res = 0; | ||
|
||
/* Destroy the cache data. */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this DECREF There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch |
||
PyMem_RawFree(found->filename); | ||
PyMem_RawFree(found->name); | ||
PyMem_RawFree(found); | ||
|
||
finally: | ||
if (oldts != NULL) { | ||
_PyThreadState_Swap(interp->runtime, oldts); | ||
_PyThreadState_UnbindDetached(main_tstate); | ||
} | ||
Py_XDECREF(key); | ||
extensions_lock_release(); | ||
return res; | ||
} | ||
|
||
static void | ||
_extensions_cache_clear_all(void) | ||
{ | ||
/* The runtime (i.e. main interpreter) must be finalizing, | ||
so we don't need to worry about the lock. */ | ||
// XXX assert(_Py_IsMainInterpreter(_PyInterpreterState_GET())); | ||
Py_CLEAR(EXTENSIONS.dict); | ||
_PyThreadState_ClearDetached(&EXTENSIONS.main_tstate); | ||
struct _cached_module_def *cached = EXTENSIONS.head; | ||
while (cached != NULL) { | ||
struct _cached_module_def *last = cached; | ||
cached = cached->next; | ||
PyMem_RawFree(last->filename); | ||
PyMem_RawFree(last->name); | ||
Py_DECREF(last->def); | ||
PyMem_RawFree(last); | ||
} | ||
} | ||
|
||
|
||
|
@@ -1233,6 +1200,8 @@ import_find_extension(PyThreadState *tstate, PyObject *name, | |
PyObject *m_copy = def->m_base.m_copy; | ||
/* Module does not support repeated initialization */ | ||
if (m_copy == NULL) { | ||
/* It might be a core module (e.g. sys & builtins), | ||
for which we don't set m_copy. */ | ||
m_copy = get_core_module_dict(tstate->interp, name, filename); | ||
if (m_copy == NULL) { | ||
return NULL; | ||
|
@@ -1302,9 +1271,7 @@ clear_singlephase_extension(PyInterpreterState *interp, | |
} | ||
|
||
/* Clear the cached module def. */ | ||
if (_extensions_cache_delete(filename, name) < 0) { | ||
return -1; | ||
} | ||
_extensions_cache_delete(filename, name); | ||
|
||
return 0; | ||
} | ||
|
@@ -3053,6 +3020,8 @@ void | |
_PyImport_Fini(void) | ||
{ | ||
/* Destroy the database used by _PyImport_{Fixup,Find}Extension */ | ||
// XXX Should we actually leave them (mostly) intact, since we don't | ||
// ever dlclose() the module files? | ||
_extensions_cache_clear_all(); | ||
|
||
/* Use the same memory allocator as _PyImport_Init(). */ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why have this function at all, now that it's just a single assert?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Honestly, I figured I'd end up finding a better solution than a linked list that would revive the need for this function, so I kept it around. The point is a bit moot now though. 😄