From 3f211b6a8205db838f71f533f59e2c5789a8a64b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 3 Sep 2021 16:46:32 +0200 Subject: [PATCH 1/2] bpo-45094: Add Py_ALWAYS_INLINE macro Add Py_ALWAYS_INLINE macro to ask the compiler to always inline a static inline function. Use Py_ALWAYS_INLINE on static inline functions: * Py_DECREF(), Py_XDECREF() * Py_INCREF(), Py_XINCREF() * Py_IS_TYPE() * Py_NewRef(), Py_XNewRef() * Py_REFCNT() * Py_SET_REFCNT(), Py_SET_SIZE(), Py_SET_TYPE() --- Doc/c-api/intro.rst | 20 +++++++++++++++++ Include/object.h | 22 +++++++++---------- Include/pyport.h | 22 +++++++++++++++++++ .../2021-09-03-16-52-23.bpo-45094.IrTcnE.rst | 2 ++ 4 files changed, 55 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2021-09-03-16-52-23.bpo-45094.IrTcnE.rst diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index 83824bb474fbd8..c8f0ec51fb546c 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -111,6 +111,26 @@ complete listing. .. versionadded:: 3.3 +.. c:macro:: Py_ALWAYS_INLINE + + Ask the compiler to always inline a static inline function. The compiler is + free is ignored this "hint". + + This attribute can be used to avoid increasing the stack memory usage when + building Python in debug mode with function inlining disabled. For example, + MSC disables function inlining when building in debug mode. It should be + used on the most commonly used static inline functions. + + Marking blindly a static inline function with Py_ALWAYS_INLINE can result in + worse performances (due to increased code size for example). The compiler is + usually smarter than the developer for the cost/benefit analysis. + + Usage:: + + static inline int Py_ALWAYS_INLINE random(void) { return 4; } + + .. versionadded:: 3.11 + .. c:macro:: Py_CHARMASK(c) Argument must be a character or an integer in the range [-128, 127] or [0, diff --git a/Include/object.h b/Include/object.h index fb8a63fc7dbe84..f3d15a02cc2ff7 100644 --- a/Include/object.h +++ b/Include/object.h @@ -127,7 +127,7 @@ PyAPI_FUNC(int) Py_Is(PyObject *x, PyObject *y); #define Py_Is(x, y) ((x) == (y)) -static inline Py_ssize_t _Py_REFCNT(const PyObject *ob) { +static inline Py_ssize_t Py_ALWAYS_INLINE _Py_REFCNT(const PyObject *ob) { return ob->ob_refcnt; } #define Py_REFCNT(ob) _Py_REFCNT(_PyObject_CAST_CONST(ob)) @@ -140,7 +140,7 @@ static inline Py_ssize_t _Py_REFCNT(const PyObject *ob) { #define Py_SIZE(ob) (_PyVarObject_CAST(ob)->ob_size) -static inline int _Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) { +static inline int Py_ALWAYS_INLINE _Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) { // bpo-44378: Don't use Py_TYPE() since Py_TYPE() requires a non-const // object. return ob->ob_type == type; @@ -148,19 +148,19 @@ static inline int _Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) { #define Py_IS_TYPE(ob, type) _Py_IS_TYPE(_PyObject_CAST_CONST(ob), type) -static inline void _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { +static inline void Py_ALWAYS_INLINE _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { ob->ob_refcnt = refcnt; } #define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT(_PyObject_CAST(ob), refcnt) -static inline void _Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { +static inline void Py_ALWAYS_INLINE _Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { ob->ob_type = type; } #define Py_SET_TYPE(ob, type) _Py_SET_TYPE(_PyObject_CAST(ob), type) -static inline void _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { +static inline void Py_ALWAYS_INLINE _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { ob->ob_size = size; } #define Py_SET_SIZE(ob, size) _Py_SET_SIZE(_PyVarObject_CAST(ob), size) @@ -462,7 +462,7 @@ PyAPI_FUNC(void) Py_DecRef(PyObject *); PyAPI_FUNC(void) _Py_IncRef(PyObject *); PyAPI_FUNC(void) _Py_DecRef(PyObject *); -static inline void _Py_INCREF(PyObject *op) +static inline void Py_ALWAYS_INLINE Py_ALWAYS_INLINE _Py_INCREF(PyObject *op) { #if defined(Py_REF_DEBUG) && defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030A0000 // Stable ABI for Python 3.10 built in debug mode. @@ -478,7 +478,7 @@ static inline void _Py_INCREF(PyObject *op) } #define Py_INCREF(op) _Py_INCREF(_PyObject_CAST(op)) -static inline void _Py_DECREF( +static inline void Py_ALWAYS_INLINE _Py_DECREF( #if defined(Py_REF_DEBUG) && !(defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030A0000) const char *filename, int lineno, #endif @@ -556,7 +556,7 @@ static inline void _Py_DECREF( } while (0) /* Function to use in case the object pointer can be NULL: */ -static inline void _Py_XINCREF(PyObject *op) +static inline void Py_ALWAYS_INLINE _Py_XINCREF(PyObject *op) { if (op != NULL) { Py_INCREF(op); @@ -565,7 +565,7 @@ static inline void _Py_XINCREF(PyObject *op) #define Py_XINCREF(op) _Py_XINCREF(_PyObject_CAST(op)) -static inline void _Py_XDECREF(PyObject *op) +static inline void Py_ALWAYS_INLINE _Py_XDECREF(PyObject *op) { if (op != NULL) { Py_DECREF(op); @@ -581,13 +581,13 @@ PyAPI_FUNC(PyObject*) Py_NewRef(PyObject *obj); // Similar to Py_NewRef(), but the object can be NULL. PyAPI_FUNC(PyObject*) Py_XNewRef(PyObject *obj); -static inline PyObject* _Py_NewRef(PyObject *obj) +static inline PyObject* Py_ALWAYS_INLINE _Py_NewRef(PyObject *obj) { Py_INCREF(obj); return obj; } -static inline PyObject* _Py_XNewRef(PyObject *obj) +static inline PyObject* Py_ALWAYS_INLINE _Py_XNewRef(PyObject *obj) { Py_XINCREF(obj); return obj; diff --git a/Include/pyport.h b/Include/pyport.h index 0aaa4eedd31a1c..24ff432e60af97 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -557,6 +557,28 @@ extern "C" { #define _Py_HOT_FUNCTION #endif +// Ask the compiler to always inline a static inline function. The compiler is +// free is ignored this "hint". This attribute can be used to avoid increasing +// the stack memory usage when building Python in debug mode with function +// inlining disabled. For example, MSC disables function inlining when building +// in debug mode. It should be used on the most commonly used static inline +// functions. +// +// Marking blindly a static inline function with Py_ALWAYS_INLINE can result in +// worse performances (due to increased code size for example). The compiler is +// usually smarter than the developer for the cost/benefit analysis. +// +// Usage: +// +// static inline int Py_ALWAYS_INLINE random(void) { return 4; } +#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) +# define Py_ALWAYS_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define Py_ALWAYS_INLINE __forceinline +#else +# define Py_ALWAYS_INLINE +#endif + // Py_NO_INLINE // Disable inlining on a function. For example, it reduces the C stack // consumption: useful on LTO+PGO builds which heavily inline code (see diff --git a/Misc/NEWS.d/next/C API/2021-09-03-16-52-23.bpo-45094.IrTcnE.rst b/Misc/NEWS.d/next/C API/2021-09-03-16-52-23.bpo-45094.IrTcnE.rst new file mode 100644 index 00000000000000..62ed05727b711c --- /dev/null +++ b/Misc/NEWS.d/next/C API/2021-09-03-16-52-23.bpo-45094.IrTcnE.rst @@ -0,0 +1,2 @@ +Add :c:macro:`Py_ALWAYS_INLINE` macro to ask the compiler to always inline a +static inline function. Patch by Victor Stinner. From c438765b8d11fee474a9cf6c38b6a71e76150482 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 4 Sep 2021 01:21:06 +0200 Subject: [PATCH 2/2] Address review --- Doc/c-api/intro.rst | 6 ++++-- Include/object.h | 22 +++++++++++----------- Include/pyport.h | 6 ++++-- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index c8f0ec51fb546c..d1db9ba0a98e11 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -114,7 +114,7 @@ complete listing. .. c:macro:: Py_ALWAYS_INLINE Ask the compiler to always inline a static inline function. The compiler is - free is ignored this "hint". + free to ignore this "hint". This attribute can be used to avoid increasing the stack memory usage when building Python in debug mode with function inlining disabled. For example, @@ -125,9 +125,11 @@ complete listing. worse performances (due to increased code size for example). The compiler is usually smarter than the developer for the cost/benefit analysis. + It must be specified before the function return type. + Usage:: - static inline int Py_ALWAYS_INLINE random(void) { return 4; } + static inline Py_ALWAYS_INLINE int random(void) { return 4; } .. versionadded:: 3.11 diff --git a/Include/object.h b/Include/object.h index f3d15a02cc2ff7..056e78a5b230eb 100644 --- a/Include/object.h +++ b/Include/object.h @@ -127,7 +127,7 @@ PyAPI_FUNC(int) Py_Is(PyObject *x, PyObject *y); #define Py_Is(x, y) ((x) == (y)) -static inline Py_ssize_t Py_ALWAYS_INLINE _Py_REFCNT(const PyObject *ob) { +static inline Py_ALWAYS_INLINE Py_ssize_t _Py_REFCNT(const PyObject *ob) { return ob->ob_refcnt; } #define Py_REFCNT(ob) _Py_REFCNT(_PyObject_CAST_CONST(ob)) @@ -140,7 +140,7 @@ static inline Py_ssize_t Py_ALWAYS_INLINE _Py_REFCNT(const PyObject *ob) { #define Py_SIZE(ob) (_PyVarObject_CAST(ob)->ob_size) -static inline int Py_ALWAYS_INLINE _Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) { +static inline Py_ALWAYS_INLINE int _Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) { // bpo-44378: Don't use Py_TYPE() since Py_TYPE() requires a non-const // object. return ob->ob_type == type; @@ -148,19 +148,19 @@ static inline int Py_ALWAYS_INLINE _Py_IS_TYPE(const PyObject *ob, const PyTypeO #define Py_IS_TYPE(ob, type) _Py_IS_TYPE(_PyObject_CAST_CONST(ob), type) -static inline void Py_ALWAYS_INLINE _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { +static inline Py_ALWAYS_INLINE void _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { ob->ob_refcnt = refcnt; } #define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT(_PyObject_CAST(ob), refcnt) -static inline void Py_ALWAYS_INLINE _Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { +static inline Py_ALWAYS_INLINE void _Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { ob->ob_type = type; } #define Py_SET_TYPE(ob, type) _Py_SET_TYPE(_PyObject_CAST(ob), type) -static inline void Py_ALWAYS_INLINE _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { +static inline Py_ALWAYS_INLINE void _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { ob->ob_size = size; } #define Py_SET_SIZE(ob, size) _Py_SET_SIZE(_PyVarObject_CAST(ob), size) @@ -462,7 +462,7 @@ PyAPI_FUNC(void) Py_DecRef(PyObject *); PyAPI_FUNC(void) _Py_IncRef(PyObject *); PyAPI_FUNC(void) _Py_DecRef(PyObject *); -static inline void Py_ALWAYS_INLINE Py_ALWAYS_INLINE _Py_INCREF(PyObject *op) +static inline Py_ALWAYS_INLINE void _Py_INCREF(PyObject *op) { #if defined(Py_REF_DEBUG) && defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030A0000 // Stable ABI for Python 3.10 built in debug mode. @@ -478,7 +478,7 @@ static inline void Py_ALWAYS_INLINE Py_ALWAYS_INLINE _Py_INCREF(PyObject *op) } #define Py_INCREF(op) _Py_INCREF(_PyObject_CAST(op)) -static inline void Py_ALWAYS_INLINE _Py_DECREF( +static inline Py_ALWAYS_INLINE void _Py_DECREF( #if defined(Py_REF_DEBUG) && !(defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030A0000) const char *filename, int lineno, #endif @@ -556,7 +556,7 @@ static inline void Py_ALWAYS_INLINE _Py_DECREF( } while (0) /* Function to use in case the object pointer can be NULL: */ -static inline void Py_ALWAYS_INLINE _Py_XINCREF(PyObject *op) +static inline Py_ALWAYS_INLINE void _Py_XINCREF(PyObject *op) { if (op != NULL) { Py_INCREF(op); @@ -565,7 +565,7 @@ static inline void Py_ALWAYS_INLINE _Py_XINCREF(PyObject *op) #define Py_XINCREF(op) _Py_XINCREF(_PyObject_CAST(op)) -static inline void Py_ALWAYS_INLINE _Py_XDECREF(PyObject *op) +static inline Py_ALWAYS_INLINE void _Py_XDECREF(PyObject *op) { if (op != NULL) { Py_DECREF(op); @@ -581,13 +581,13 @@ PyAPI_FUNC(PyObject*) Py_NewRef(PyObject *obj); // Similar to Py_NewRef(), but the object can be NULL. PyAPI_FUNC(PyObject*) Py_XNewRef(PyObject *obj); -static inline PyObject* Py_ALWAYS_INLINE _Py_NewRef(PyObject *obj) +static inline Py_ALWAYS_INLINE PyObject* _Py_NewRef(PyObject *obj) { Py_INCREF(obj); return obj; } -static inline PyObject* Py_ALWAYS_INLINE _Py_XNewRef(PyObject *obj) +static inline Py_ALWAYS_INLINE PyObject* _Py_XNewRef(PyObject *obj) { Py_XINCREF(obj); return obj; diff --git a/Include/pyport.h b/Include/pyport.h index 24ff432e60af97..835110a653c4ba 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -558,7 +558,7 @@ extern "C" { #endif // Ask the compiler to always inline a static inline function. The compiler is -// free is ignored this "hint". This attribute can be used to avoid increasing +// free to ignore this "hint". This attribute can be used to avoid increasing // the stack memory usage when building Python in debug mode with function // inlining disabled. For example, MSC disables function inlining when building // in debug mode. It should be used on the most commonly used static inline @@ -568,9 +568,11 @@ extern "C" { // worse performances (due to increased code size for example). The compiler is // usually smarter than the developer for the cost/benefit analysis. // +// It must be specified before the function return type. +// // Usage: // -// static inline int Py_ALWAYS_INLINE random(void) { return 4; } +// static inline Py_ALWAYS_INLINE int random(void) { return 4; } #if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) # define Py_ALWAYS_INLINE __attribute__((always_inline)) #elif defined(_MSC_VER)