From 158c10212298ba96c1990b09394649d73f0e7ca8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 13 Apr 2021 21:48:39 +0200 Subject: [PATCH] bpo-40137: Micro-optimize _PyType_GetModuleByDef() Make _PyType_GetModuleByDef() 2.3 ns faster. Benchmark: 43.2 ns +- 0.7 ns -> 40.9 ns +- 1.0 ns: 1.05x faster ./python -m pyperf timeit \ --duplicate=4096 -s "from functools import lru_cache; f = lru_cache(lambda: 42)" \ "f()" Changes: * _PyType_GetModuleByDef(): add fast-path for tp_mro[0] and use _PyType_HasFeature(). * Add a new pycore_moduleobject.h internal C API header file. * Add _PyModule_GetDef() and _PyModule_GetState() which can be inlined without LTO and don't check invalid argument at runtime. * Replace PyModule_GetState() with _PyModule_GetState() in _abc, _array, _operator, _pickle, _queue, _random, _struct and os extension modules. The _array, _queue and _struct extensions are now built with Py_BUILD_CORE_MODULE macro defined. --- Include/internal/pycore_moduleobject.h | 34 +++++++++++++++++++ Makefile.pre.in | 1 + .../2021-04-13-22-48-56.bpo-40137.Z77R2P.rst | 2 ++ Modules/_abc.c | 3 +- Modules/_functoolsmodule.c | 6 ++-- Modules/_operator.c | 4 +-- Modules/_pickle.c | 3 +- Modules/_queuemodule.c | 3 +- Modules/_randommodule.c | 5 +-- Modules/_struct.c | 3 +- Modules/arraymodule.c | 3 +- Modules/posixmodule.c | 3 +- Objects/moduleobject.c | 14 ++------ Objects/typeobject.c | 29 ++++++++++++---- PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 ++ setup.py | 9 +++-- 17 files changed, 93 insertions(+), 33 deletions(-) create mode 100644 Include/internal/pycore_moduleobject.h create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-04-13-22-48-56.bpo-40137.Z77R2P.rst diff --git a/Include/internal/pycore_moduleobject.h b/Include/internal/pycore_moduleobject.h new file mode 100644 index 00000000000000..ce00e2d7519a54 --- /dev/null +++ b/Include/internal/pycore_moduleobject.h @@ -0,0 +1,34 @@ +#ifndef Py_INTERNAL_MODULEOBJECT_H +#define Py_INTERNAL_MODULEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +typedef struct { + PyObject_HEAD + PyObject *md_dict; + struct PyModuleDef *md_def; + void *md_state; + PyObject *md_weaklist; + PyObject *md_name; /* for logging purposes after md_dict is cleared */ +} PyModuleObject; + +static inline PyModuleDef* _PyModule_GetDef(PyObject *m) { + assert(PyModule_Check(m)); + return ((PyModuleObject *)m)->md_def; +} + +static inline void* _PyModule_GetState(PyObject* m) { + assert(PyModule_Check(m)); + return ((PyModuleObject *)m)->md_state; +} + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_MODULEOBJECT_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 365449d644583f..f49bc51c63dbe9 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1160,6 +1160,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_interp.h \ $(srcdir)/Include/internal/pycore_list.h \ $(srcdir)/Include/internal/pycore_long.h \ + $(srcdir)/Include/internal/pycore_moduleobject.h \ $(srcdir)/Include/internal/pycore_object.h \ $(srcdir)/Include/internal/pycore_pathconfig.h \ $(srcdir)/Include/internal/pycore_pyarena.h \ diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-04-13-22-48-56.bpo-40137.Z77R2P.rst b/Misc/NEWS.d/next/Core and Builtins/2021-04-13-22-48-56.bpo-40137.Z77R2P.rst new file mode 100644 index 00000000000000..1506c3fd9c6b76 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-04-13-22-48-56.bpo-40137.Z77R2P.rst @@ -0,0 +1,2 @@ +Micro-optimize _PyType_GetModuleByDef() to make it 2.3 ns faster (43.2 ns +- +0.7 ns -> 40.9 ns +- 1.0 ns). Patch by Victor Stinner. diff --git a/Modules/_abc.c b/Modules/_abc.c index 7afaa759b2bfec..0ddc2abeee1e0e 100644 --- a/Modules/_abc.c +++ b/Modules/_abc.c @@ -1,6 +1,7 @@ /* ABCMeta implementation */ #include "Python.h" +#include "pycore_moduleobject.h" // _PyModule_GetState() #include "clinic/_abc.c.h" /*[clinic input] @@ -27,7 +28,7 @@ typedef struct { static inline _abcmodule_state* get_abc_state(PyObject *module) { - void *state = PyModule_GetState(module); + void *state = _PyModule_GetState(module); assert(state != NULL); return (_abcmodule_state *)state; } diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 1fcaf299e67bc6..512b245e4ee05e 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -1,5 +1,6 @@ #include "Python.h" #include "pycore_long.h" // _PyLong_GetZero() +#include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_object.h" // _PyObject_GC_TRACK #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_tuple.h" // _PyTuple_ITEMS() @@ -35,7 +36,7 @@ typedef struct _functools_state { static inline _functools_state * get_functools_state(PyObject *module) { - void *state = PyModule_GetState(module); + void *state = _PyModule_GetState(module); assert(state != NULL); return (_functools_state *)state; } @@ -52,8 +53,7 @@ get_functools_state_by_type(PyTypeObject *type) if (module == NULL) { return NULL; } - _functools_state *state = get_functools_state(module); - return state; + return get_functools_state(module); } static PyObject * diff --git a/Modules/_operator.c b/Modules/_operator.c index da1e43158ee0ac..e7e0ad8ff91427 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1,6 +1,6 @@ #include "Python.h" - +#include "pycore_moduleobject.h" // _PyModule_GetState() #include "clinic/_operator.c.h" typedef struct { @@ -12,7 +12,7 @@ typedef struct { static inline _operator_state* get_operator_state(PyObject *module) { - void *state = PyModule_GetState(module); + void *state = _PyModule_GetState(module); assert(state != NULL); return (_operator_state *)state; } diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 5a8aad9de7679d..691d4a293e8498 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -9,6 +9,7 @@ #endif #include "Python.h" +#include "pycore_moduleobject.h" // _PyModule_GetState() #include "structmember.h" // PyMemberDef PyDoc_STRVAR(pickle_module_doc, @@ -182,7 +183,7 @@ static struct PyModuleDef _picklemodule; static PickleState * _Pickle_GetState(PyObject *module) { - return (PickleState *)PyModule_GetState(module); + return (PickleState *)_PyModule_GetState(module); } /* Find the module instance imported in the currently running sub-interpreter diff --git a/Modules/_queuemodule.c b/Modules/_queuemodule.c index 7a52617ade5b09..c27fb1a001d21e 100644 --- a/Modules/_queuemodule.c +++ b/Modules/_queuemodule.c @@ -1,4 +1,5 @@ #include "Python.h" +#include "pycore_moduleobject.h" // _PyModule_GetState() #include "structmember.h" // PyMemberDef #include // offsetof() @@ -10,7 +11,7 @@ typedef struct { static simplequeue_state * simplequeue_get_state(PyObject *module) { - simplequeue_state *state = PyModule_GetState(module); + simplequeue_state *state = _PyModule_GetState(module); assert(state); return state; } diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c index 99be69c06556e6..cae49a009cbcc8 100644 --- a/Modules/_randommodule.c +++ b/Modules/_randommodule.c @@ -67,6 +67,7 @@ /* ---------------------------------------------------------------*/ #include "Python.h" +#include "pycore_moduleobject.h" // _PyModule_GetState() #ifdef HAVE_PROCESS_H # include // getpid() #endif @@ -86,7 +87,7 @@ typedef struct { static inline _randomstate* get_random_state(PyObject *module) { - void *state = PyModule_GetState(module); + void *state = _PyModule_GetState(module); assert(state != NULL); return (_randomstate *)state; } @@ -538,7 +539,7 @@ random_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (PyTuple_GET_SIZE(args) == 1) arg = PyTuple_GET_ITEM(args, 0); - + tmp = random_seed(self, arg); if (tmp == NULL) { Py_DECREF(self); diff --git a/Modules/_struct.c b/Modules/_struct.c index 1a5e0ae28e876b..30ad9f2b79d8f3 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -6,6 +6,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" +#include "pycore_moduleobject.h" // _PyModule_GetState() #include "structmember.h" // PyMemberDef #include @@ -24,7 +25,7 @@ typedef struct { static inline _structmodulestate* get_struct_state(PyObject *module) { - void *state = PyModule_GetState(module); + void *state = _PyModule_GetState(module); assert(state != NULL); return (_structmodulestate *)state; } diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index fb9ebbe9f4870f..f5326789521d30 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -5,6 +5,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" +#include "pycore_moduleobject.h" // _PyModule_GetState() #include "structmember.h" // PyMemberDef #include // offsetof() @@ -63,7 +64,7 @@ typedef struct { static array_state * get_array_state(PyObject *module) { - return (array_state *)PyModule_GetState(module); + return (array_state *)_PyModule_GetState(module); } #define find_array_state_by_type(tp) \ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 65e8d5e7bd984d..8ce62c88216e3d 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -11,6 +11,7 @@ #include "Python.h" #include "pycore_fileutils.h" +#include "pycore_moduleobject.h" // _PyModule_GetState() #ifdef MS_WINDOWS /* include early to avoid conflict with pycore_condvar.h: @@ -994,7 +995,7 @@ typedef struct { static inline _posixstate* get_posix_state(PyObject *module) { - void *state = PyModule_GetState(module); + void *state = _PyModule_GetState(module); assert(state != NULL); return (_posixstate *)state; } diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index e57ea86e7694ce..4e20d409f42994 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_interp.h" // PyInterpreterState.importlib #include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_moduleobject.h" // _PyModule_GetDef() #include "structmember.h" // PyMemberDef static Py_ssize_t max_module_number; @@ -12,15 +13,6 @@ _Py_IDENTIFIER(__doc__); _Py_IDENTIFIER(__name__); _Py_IDENTIFIER(__spec__); -typedef struct { - PyObject_HEAD - PyObject *md_dict; - struct PyModuleDef *md_def; - void *md_state; - PyObject *md_weaklist; - PyObject *md_name; /* for logging purposes after md_dict is cleared */ -} PyModuleObject; - static PyMemberDef module_members[] = { {"__dict__", T_OBJECT, offsetof(PyModuleObject, md_dict), READONLY}, {0} @@ -556,7 +548,7 @@ PyModule_GetDef(PyObject* m) PyErr_BadArgument(); return NULL; } - return ((PyModuleObject *)m)->md_def; + return _PyModule_GetDef(m); } void* @@ -566,7 +558,7 @@ PyModule_GetState(PyObject* m) PyErr_BadArgument(); return NULL; } - return ((PyModuleObject *)m)->md_state; + return _PyModule_GetState(m); } void diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a957c832b90bd6..8958f72b8c6c5e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4,6 +4,7 @@ #include "pycore_call.h" #include "pycore_compile.h" // _Py_Mangle() #include "pycore_initconfig.h" +#include "pycore_moduleobject.h" // _PyModule_GetDef() #include "pycore_object.h" #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() @@ -3591,10 +3592,25 @@ _PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def) { assert(PyType_Check(type)); assert(type->tp_mro); - int i; - for (i = 0; i < PyTuple_GET_SIZE(type->tp_mro); i++) { - PyObject *super = PyTuple_GET_ITEM(type->tp_mro, i); - if (!PyType_HasFeature((PyTypeObject *)super, Py_TPFLAGS_HEAPTYPE)) { + + // Fast path for type->tp_mro[0]=type. Calling _PyType_GetModuleByDef() on + // the defining type is the most common type. + assert(PyTuple_GET_ITEM(type->tp_mro, 0) == type); + // A static type cannot inherit from a heap type, since heap types are + // created at runtime. _PyType_GetModuleByDef() is used on heap types + // created by PyType_FromModuleAndSpec(), and on their subclasses. + assert(_PyType_HasFeature((PyTypeObject *)type, Py_TPFLAGS_HEAPTYPE)); + PyHeapTypeObject *ht = (PyHeapTypeObject*)type; + if (ht->ht_module && _PyModule_GetDef(ht->ht_module) == def) { + return ht->ht_module; + } + + // Slow path + PyObject *mro = type->tp_mro; + Py_ssize_t len = PyTuple_GET_SIZE(mro); + for (Py_ssize_t i = 1; i < len; i++) { + PyObject *super = PyTuple_GET_ITEM(mro, i); + if (!_PyType_HasFeature((PyTypeObject *)super, Py_TPFLAGS_HEAPTYPE)) { /* Currently, there's no way for static types to inherit * from heap types, but to allow that possibility, * we `continue` rather than `break`. @@ -3603,11 +3619,12 @@ _PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def) */ continue; } - PyHeapTypeObject *ht = (PyHeapTypeObject*)super; - if (ht->ht_module && PyModule_GetDef(ht->ht_module) == def) { + ht = (PyHeapTypeObject*)super; + if (ht->ht_module && _PyModule_GetDef(ht->ht_module) == def) { return ht->ht_module; } } + PyErr_Format( PyExc_TypeError, "_PyType_GetModuleByDef: No superclass of '%s' has the given module", diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 2c1cc0d4cc80f7..29387bd57b4e49 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -199,6 +199,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 8c104bf7b31034..55b57ef29dc0c9 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -558,6 +558,9 @@ Include\internal + + Include\internal + Include\internal diff --git a/setup.py b/setup.py index 253053da7fb47a..ea672ae63847ff 100644 --- a/setup.py +++ b/setup.py @@ -866,7 +866,8 @@ def detect_simple_extensions(self): # # array objects - self.add(Extension('array', ['arraymodule.c'])) + self.add(Extension('array', ['arraymodule.c'], + extra_compile_args=['-DPy_BUILD_CORE_MODULE'])) # Context Variables self.add(Extension('_contextvars', ['_contextvarsmodule.c'])) @@ -933,7 +934,8 @@ def detect_simple_extensions(self): # _abc speedups self.add(Extension("_abc", ["_abc.c"])) # _queue module - self.add(Extension("_queue", ["_queuemodule.c"])) + self.add(Extension("_queue", ["_queuemodule.c"], + extra_compile_args=['-DPy_BUILD_CORE_MODULE'])) # _statistics module self.add(Extension("_statistics", ["_statisticsmodule.c"])) @@ -2711,7 +2713,8 @@ class DummyProcess: 'install_lib': PyBuildInstallLib}, # The struct module is defined here, because build_ext won't be # called unless there's at least one extension module defined. - ext_modules=[Extension('_struct', ['_struct.c'])], + ext_modules=[Extension('_struct', ['_struct.c'], + extra_compile_args=['-DPy_BUILD_CORE_MODULE'])], # If you change the scripts installed here, you also need to # check the PyBuildScripts command above, and change the links