Skip to content

gh-105699: Use a Linked List for the PyModuleDef Cache #106899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions Include/internal/pycore_import.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ PyAPI_FUNC(PyObject *) _PyImport_GetModuleAttr(PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) _PyImport_GetModuleAttrString(const char *, const char *);


struct _cached_module_def {
char *filename;
char *name;
PyModuleDef *def;
struct _cached_module_def *next;
};

struct _import_runtime_state {
/* The builtin modules (defined in config.c). */
struct _inittab *inittab;
Expand All @@ -37,19 +44,15 @@ struct _import_runtime_state {
See PyInterpreterState.modules_by_index for more info. */
Py_ssize_t last_module_index;
struct {
/* A thread state tied to the main interpreter,
used exclusively for when the extensions dict is access/modified
from an arbitrary thread. */
PyThreadState main_tstate;
/* A lock to guard the dict. */
/* A lock to guard the cache. */
PyThread_type_lock mutex;
/* A dict mapping (filename, name) to PyModuleDef for modules.
/* The actual cache of (filename, name, PyModuleDef) for modules.
Only legacy (single-phase init) extension modules are added
and only if they support multiple initialization (m_size >- 0)
or are imported in the main interpreter.
This is initialized lazily in _PyImport_FixupExtensionObject().
This is added to in _PyImport_FixupExtensionObject().
Modules are added there and looked up in _imp.find_extension(). */
PyObject *dict;
struct _cached_module_def *head;
} extensions;
/* Package context -- the full module name for package imports */
const char * pkgcontext;
Expand Down
5 changes: 0 additions & 5 deletions Include/internal/pycore_pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,6 @@ PyAPI_FUNC(PyThreadState *) _PyThreadState_New(PyInterpreterState *interp);
PyAPI_FUNC(void) _PyThreadState_Bind(PyThreadState *tstate);
PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate);

extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *);
extern void _PyThreadState_ClearDetached(PyThreadState *);
extern void _PyThreadState_BindDetached(PyThreadState *);
extern void _PyThreadState_UnbindDetached(PyThreadState *);

PyAPI_FUNC(PyObject*) _PyThreadState_GetDict(PyThreadState *tstate);

/* The implementation of sys._current_frames() Returns a dict mapping
Expand Down
5 changes: 0 additions & 5 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,6 @@ extern PyTypeObject _PyExc_MemoryError;
in accordance with the specification. */ \
.autoTSSkey = Py_tss_NEEDS_INIT, \
.parser = _parser_runtime_state_INIT, \
.imports = { \
.extensions = { \
.main_tstate = _PyThreadState_INIT, \
}, \
}, \
.ceval = { \
.perf = _PyEval_RUNTIME_PERF_INIT, \
}, \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Python no longer crashes due to an infrequent race when simultaneously
initializing the sys/builtins modules in multiple subinterpreters that have
their own GILs.
199 changes: 84 additions & 115 deletions Python/import.c
Original file line number Diff line number Diff line change
Expand Up @@ -919,167 +919,134 @@ extensions_lock_release(void)
static void
_extensions_cache_init(void)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why have this function at all, now that it's just a single assert?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, I figured I'd end up finding a better solution than a linked list that would revive the need for this function, so I kept it around. The point is a bit moot now though. 😄

{
/* The runtime (i.e. main interpreter) must be initializing,
so we don't need to worry about the lock. */
_PyThreadState_InitDetached(&EXTENSIONS.main_tstate,
_PyInterpreterState_Main());
/* The lock is initialized directly with the general runtime state. */
assert(EXTENSIONS.mutex != NULL);
//assert(EXTENSIONS.head == NULL);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Leftover line I presume.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sort of. I had expected the assert to work (and it would in a simpler world). I was going to circle back to it but, having slept on it, don't see the need. It can be dropped.

}

static PyModuleDef *
_extensions_cache_get(PyObject *filename, PyObject *name)
static struct _cached_module_def *
_extensions_cache_lookup(PyObject *filename, PyObject *name,
struct _cached_module_def **prev_p)
{
PyModuleDef *def = NULL;
extensions_lock_acquire();

PyObject *key = PyTuple_Pack(2, filename, name);
if (key == NULL) {
goto finally;
struct _cached_module_def *prev = NULL;
struct _cached_module_def *cached = EXTENSIONS.head;
while (cached != NULL) {
if (PyUnicode_CompareWithASCIIString(filename, cached->filename) == 0) {
if (PyUnicode_CompareWithASCIIString(name, cached->name) == 0) {
break;
}
}
prev = cached;
cached = cached->next;
}

PyObject *extensions = EXTENSIONS.dict;
if (extensions == NULL) {
goto finally;
if (prev_p != NULL) {
*prev_p = prev;
}
def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
return cached;
}

finally:
Py_XDECREF(key);
static PyModuleDef *
_extensions_cache_get(PyObject *filename, PyObject *name)
{
extensions_lock_acquire();
struct _cached_module_def *found =
_extensions_cache_lookup(filename, name, NULL);
extensions_lock_release();
return def;
return (found != NULL) ? found->def : NULL;
}

static int
_extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def)
{
int res = -1;
PyThreadState *oldts = NULL;
extensions_lock_acquire();

/* Swap to the main interpreter, if necessary. This matters if
the dict hasn't been created yet or if the item isn't in the
dict yet. In both cases we must ensure the relevant objects
are created using the main interpreter. */
PyThreadState *main_tstate = &EXTENSIONS.main_tstate;
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!_Py_IsMainInterpreter(interp)) {
_PyThreadState_BindDetached(main_tstate);
oldts = _PyThreadState_Swap(interp->runtime, main_tstate);
assert(!_Py_IsMainInterpreter(oldts->interp));

/* Make sure the name and filename objects are owned
by the main interpreter. */
name = PyUnicode_InternFromString(PyUnicode_AsUTF8(name));
assert(name != NULL);
filename = PyUnicode_InternFromString(PyUnicode_AsUTF8(filename));
assert(filename != NULL);
}

PyObject *key = PyTuple_Pack(2, filename, name);
if (key == NULL) {
struct _cached_module_def *found =
_extensions_cache_lookup(filename, name, NULL);
if (found != NULL) {
assert(found->def == def);
res = 0;
goto finally;
}

PyObject *extensions = EXTENSIONS.dict;
if (extensions == NULL) {
extensions = PyDict_New();
if (extensions == NULL) {
goto finally;
}
EXTENSIONS.dict = extensions;
}

PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
if (PyErr_Occurred()) {
/* Create and populate the new entry. */
struct _cached_module_def *cached = PyMem_RawMalloc(
sizeof(struct _cached_module_def));
if (cached == NULL) {
PyErr_NoMemory();
goto finally;
}
else if (actual != NULL) {
/* We expect it to be static, so it must be the same pointer. */
assert(def == actual);
res = 0;
cached->filename = PyMem_RawMalloc(strlen(PyUnicode_AsUTF8(filename)) + 1);
if (cached->filename == NULL) {
PyMem_RawFree(cached);
PyErr_NoMemory();
goto finally;
}

/* This might trigger a resize, which is why we must switch
to the main interpreter. */
res = PyDict_SetItem(extensions, key, (PyObject *)def);
if (res < 0) {
res = -1;
cached->name = PyMem_RawMalloc(strlen(PyUnicode_AsUTF8(name)) + 1);
if (cached->name == NULL) {
PyMem_RawFree(cached->filename);
PyMem_RawFree(cached);
PyErr_NoMemory();
goto finally;
}
strcpy(cached->filename, PyUnicode_AsUTF8(filename));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use strncpy to avoid the deprecation in some compilers (macOS IIRC?)

strcpy(cached->name, PyUnicode_AsUTF8(name));
cached->def = (PyModuleDef *)Py_NewRef(def);

/* Add it to the list. */
cached->next = EXTENSIONS.head;
EXTENSIONS.head = cached;

res = 0;

finally:
Py_XDECREF(key);
if (oldts != NULL) {
_PyThreadState_Swap(interp->runtime, oldts);
_PyThreadState_UnbindDetached(main_tstate);
Py_DECREF(name);
Py_DECREF(filename);
}
extensions_lock_release();
return res;
}

static int
static void
_extensions_cache_delete(PyObject *filename, PyObject *name)
{
int res = -1;
PyThreadState *oldts = NULL;
extensions_lock_acquire();

PyObject *key = PyTuple_Pack(2, filename, name);
if (key == NULL) {
goto finally;
}

PyObject *extensions = EXTENSIONS.dict;
if (extensions == NULL) {
res = 0;
goto finally;
}

PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
if (PyErr_Occurred()) {
goto finally;
}
else if (actual == NULL) {
struct _cached_module_def *prev = NULL;
struct _cached_module_def *found =
_extensions_cache_lookup(filename, name, &prev);
if (found == NULL) {
/* It was already removed or never added. */
res = 0;
goto finally;
}

/* Swap to the main interpreter, if necessary. */
PyThreadState *main_tstate = &EXTENSIONS.main_tstate;
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!_Py_IsMainInterpreter(interp)) {
_PyThreadState_BindDetached(main_tstate);
oldts = _PyThreadState_Swap(interp->runtime, main_tstate);
assert(!_Py_IsMainInterpreter(oldts->interp));
/* Drop it from the list. */
if (prev == NULL) {
assert(EXTENSIONS.head == found);
EXTENSIONS.head = found->next;
}

if (PyDict_DelItem(extensions, key) < 0) {
goto finally;
else {
prev->next = found->next;
}
res = 0;

/* Destroy the cache data. */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this DECREF found->def ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch

PyMem_RawFree(found->filename);
PyMem_RawFree(found->name);
PyMem_RawFree(found);

finally:
if (oldts != NULL) {
_PyThreadState_Swap(interp->runtime, oldts);
_PyThreadState_UnbindDetached(main_tstate);
}
Py_XDECREF(key);
extensions_lock_release();
return res;
}

static void
_extensions_cache_clear_all(void)
{
/* The runtime (i.e. main interpreter) must be finalizing,
so we don't need to worry about the lock. */
// XXX assert(_Py_IsMainInterpreter(_PyInterpreterState_GET()));
Py_CLEAR(EXTENSIONS.dict);
_PyThreadState_ClearDetached(&EXTENSIONS.main_tstate);
struct _cached_module_def *cached = EXTENSIONS.head;
while (cached != NULL) {
struct _cached_module_def *last = cached;
cached = cached->next;
PyMem_RawFree(last->filename);
PyMem_RawFree(last->name);
Py_DECREF(last->def);
PyMem_RawFree(last);
}
}


Expand Down Expand Up @@ -1233,6 +1200,8 @@ import_find_extension(PyThreadState *tstate, PyObject *name,
PyObject *m_copy = def->m_base.m_copy;
/* Module does not support repeated initialization */
if (m_copy == NULL) {
/* It might be a core module (e.g. sys & builtins),
for which we don't set m_copy. */
m_copy = get_core_module_dict(tstate->interp, name, filename);
if (m_copy == NULL) {
return NULL;
Expand Down Expand Up @@ -1302,9 +1271,7 @@ clear_singlephase_extension(PyInterpreterState *interp,
}

/* Clear the cached module def. */
if (_extensions_cache_delete(filename, name) < 0) {
return -1;
}
_extensions_cache_delete(filename, name);

return 0;
}
Expand Down Expand Up @@ -3053,6 +3020,8 @@ void
_PyImport_Fini(void)
{
/* Destroy the database used by _PyImport_{Fixup,Find}Extension */
// XXX Should we actually leave them (mostly) intact, since we don't
// ever dlclose() the module files?
_extensions_cache_clear_all();

/* Use the same memory allocator as _PyImport_Init(). */
Expand Down
Loading