From e953c5735758fc5283730702667b8eb2ec6043f3 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 30 Jan 2023 23:28:32 +0000 Subject: [PATCH 01/18] freelist for ints --- Include/internal/pycore_interp.h | 30 +++++++++++++++ Include/internal/pycore_pymem.h | 62 +++++++++++++++++++++++++++++++ Objects/longobject.c | 56 +++++++++++++++++++++------- Objects/obmalloc.c | 64 ++++++++++++++++++++++++++++++++ Python/pystate.c | 12 ++++++ 5 files changed, 211 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 0e3d46852f2e6d..1ab341d5e2d3a2 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -18,6 +18,7 @@ extern "C" { #include "pycore_dict_state.h" // struct _Py_dict_state #include "pycore_exceptions.h" // struct _Py_exc_state #include "pycore_floatobject.h" // struct _Py_float_state +#include "pycore_pymem.h" // free lists #include "pycore_function.h" // FUNC_MAX_WATCHERS #include "pycore_genobject.h" // struct _Py_async_gen_state #include "pycore_gc.h" // struct _gc_runtime_state @@ -49,6 +50,10 @@ struct _Py_long_state { /* interpreter state */ +#define WITH_FREELISTS 1 + +#define SMALL_OBJECT_FREELIST_SIZE 1024 +#define INTERP_NUM_FREELISTS 30 /* PyInterpreterState holds the global state for one of the runtime's interpreters. Typically the initial (main) interpreter is the only one. @@ -178,6 +183,9 @@ struct _is { struct _Py_context_state context; struct _Py_exc_state exc_state; +#if WITH_FREELISTS + _PyFreeList freelists[INTERP_NUM_FREELISTS]; +#endif struct ast_state ast; struct types_state types; struct callable_cache callable_cache; @@ -230,6 +238,28 @@ PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); + +#if WITH_FREELISTS +static inline PyObject* +_PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, int size) { + assert(size >= 4); + assert((size & 0x1) == 0); + int index = (size-4)/2; + return _PyFreeList_Alloc(&interp->freelists[index]); +} + +static inline void +_PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, int size) { + /* todo: assert the size is correct? */ + assert(size >= 4); + assert((size & 0x1) == 0); + int index = (size-4)/2; + _PyFreeList_Alloc(&interp->freelists[index]); +} + +#endif /* WITH_FREELISTS */ + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 4cc953d8d779c9..e21f8ec54b7887 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -90,6 +90,68 @@ PyAPI_FUNC(int) _PyMem_GetAllocatorName( PYMEM_ALLOCATOR_NOT_SET does nothing. */ PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator); +#define WITH_FREELISTS 1 + +#if WITH_FREELISTS +/* Free lists. + * + * Free lists have a pointer to their first entry and + * the amount of space available allowing fast checks + * for emptiness and fullness. + * When empty they are half filled and when full they are + * completely emptied. This helps the underlying allocator + * avoid fragmentation and helps performance. + */ + +typedef struct _freelist { + void *ptr; + uint32_t space; + uint16_t size; + uint16_t capacity; +} _PyFreeList; + +extern void *_PyFreeList_HalfFillAndAllocate(_PyFreeList *list); +extern void _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr); + + +static inline void * +_PyFreeList_Alloc(_PyFreeList *list) { + if (list->ptr != NULL) { +#ifdef Py_STATS + if (_py_stats) _py_stats->object_stats.from_freelist++; +#endif + void *result = list->ptr; + list->ptr = *((void **)result); + list->space++; + return result; + } + return _PyFreeList_HalfFillAndAllocate(list); +} + +static inline void +_PyFreeList_Free(_PyFreeList *list, void *ptr) { + if (list->space) { +#ifdef Py_STATS + if (_py_stats) _py_stats->object_stats.to_freelist++; +#endif + *((void **)ptr) = list->ptr; + list->ptr = ptr; + list->space--; + return; + } + _PyFreeList_FreeToFull(list, ptr); +} + +static inline void +_PyFreeList_Init(_PyFreeList *list, int size, int capacity) +{ + list->ptr = NULL; + list->space = list->capacity = capacity; + list->size = size; +} + +extern void _PyFreeList_Clear(_PyFreeList *list); +#endif /* WITH_FREELISTS */ #ifdef __cplusplus } diff --git a/Objects/longobject.c b/Objects/longobject.c index 65bf15648b07fb..335781b397b646 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6,6 +6,7 @@ #include "pycore_bitutils.h" // _Py_popcount32() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _Py_SmallInts +#include "pycore_pymem.h" // Free lists #include "pycore_object.h" // _PyObject_InitVar() #include "pycore_pystate.h" // _Py_IsMainInterpreter() #include "pycore_runtime.h" // _PY_NSMALLPOSINTS @@ -152,16 +153,26 @@ _PyLong_New(Py_ssize_t size) "too many digits in integer"); return NULL; } - /* Fast operations for single digit integers (including zero) - * assume that there is always at least one digit present. */ - Py_ssize_t ndigits = size ? size : 1; - /* Number of bytes needed is: offsetof(PyLongObject, ob_digit) + - sizeof(digit)*size. Previous incarnations of this code used - sizeof(PyVarObject) instead of the offsetof, but this risks being - incorrect in the presence of padding between the PyVarObject header - and the digits. */ - result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) + - ndigits*sizeof(digit)); + assert(size >= 0); +#if WITH_FREELISTS + if (size <= 1) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + result = (PyLongObject *)_PyInterpreterState_FreelistAlloc(interp, sizeof(PyLongObject)); + } +#else + if (size == 0) { + result = PyObject_Malloc(sizeof(PyLongObject)); + } +#endif + else { + /* Number of bytes needed is: offsetof(PyLongObject, ob_digit) + + sizeof(digit)*size. Previous incarnations of this code used + sizeof(PyVarObject) instead of the offsetof, but this risks being + incorrect in the presence of padding between the PyVarObject header + and the digits. */ + result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) + + size*sizeof(digit)); + } if (!result) { PyErr_NoMemory(); return NULL; @@ -202,10 +213,14 @@ _PyLong_FromMedium(sdigit x) assert(!IS_SMALL_INT(x)); assert(is_medium_int(x)); /* We could use a freelist here */ +#if WITH_FREELISTS + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyLongObject *v = (PyLongObject *)_PyInterpreterState_FreelistAlloc(interp, sizeof(PyLongObject)); +#else PyLongObject *v = PyObject_Malloc(sizeof(PyLongObject)); +#endif if (v == NULL) { - PyErr_NoMemory(); - return NULL; + return PyErr_NoMemory(); } Py_ssize_t sign = x < 0 ? -1: 1; digit abs_x = x < 0 ? -x : x; @@ -267,6 +282,21 @@ _PyLong_FromSTwoDigits(stwodigits x) return _PyLong_FromLarge(x); } +static void +int_dealloc(PyLongObject *op) +{ +#if WITH_FREELISTS + if (PyLong_CheckExact(op) && IS_MEDIUM_VALUE(op)) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyInterpreterState_FreelistFree(interp, (PyObject*)op, sizeof(PyLongObject)); + } + else +#endif + { + Py_TYPE(op)->tp_free((PyObject *)op); + } +} + int _PyLong_AssignValue(PyObject **target, Py_ssize_t value) { @@ -6289,7 +6319,7 @@ PyTypeObject PyLong_Type = { "int", /* tp_name */ offsetof(PyLongObject, long_value.ob_digit), /* tp_basicsize */ sizeof(digit), /* tp_itemsize */ - 0, /* tp_dealloc */ + (destructor)int_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 276c5a276c06e6..9730386e83c583 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -717,6 +717,70 @@ PyObject_Free(void *ptr) # define LIKELY(value) (value) #endif + +#if WITH_FREELISTS +void * +_PyFreeList_HalfFillAndAllocate(_PyFreeList *list) +{ + assert(list->ptr == NULL); + if (list->capacity < 4) { + return PyObject_Malloc(list->size); + } + uint32_t i = 0; + for (; i < list->space>>1; i++) { + void* ptr = PyObject_Malloc(list->size); + if (ptr == NULL) { + break; + } + *((void**)ptr) = list->ptr; + list->ptr = ptr; + } + if (i == 0) { + return NULL; + } + list->space -= (i-1); + void *result = list->ptr; + list->ptr = *((void **)result); + return result; +} + +void +_PyFreeList_Clear(_PyFreeList *list) +{ + int space = 0; + void *head = list->ptr; + while (head) { + void *next = *((void**)head); + PyObject_Free(head); + head = next; + space++; + } + list->ptr = NULL; + list->space += space; +} + +void +_PyFreeList_FreeToFull(_PyFreeList *list, void *ptr) +{ + assert(list->space == 0); + if (list->ptr == NULL) { + PyObject_Free(ptr); + return; + } + int space = 0; + void *head = list->ptr; + while (head) { + void *next = *((void**)head); + PyObject_Free(head); + head = next; + space++; + } + list->ptr = ptr; + *((void **)ptr) = NULL; + list->space = space-1; +} +#endif /* WITH_FREELISTS */ + #ifdef WITH_PYMALLOC #ifdef WITH_VALGRIND diff --git a/Python/pystate.c b/Python/pystate.c index bf7688fd32134b..4f137c31c36268 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -652,6 +652,12 @@ PyInterpreterState_New(void) init_interpreter(interp, runtime, id, old_head, pending_lock); +#if WITH_FREELISTS + for (int i=0; i < INTERP_NUM_FREELISTS; i++) { + _PyFreeList_Init(&interp->freelists[i], 4 + 2*i, SMALL_OBJECT_FREELIST_SIZE); + } +#endif + HEAD_UNLOCK(runtime); return interp; @@ -681,6 +687,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) } HEAD_UNLOCK(runtime); +#if WITH_FREELISTS + for (int i=0; i < INTERP_NUM_FREELISTS; i++) { + _PyFreeList_Clear(&interp->freelists[i]); + } +#endif + Py_CLEAR(interp->audit_hooks); PyConfig_Clear(&interp->config); From cf99ee400307037bf7b6fa9267235c36a5c09927 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 31 Jan 2023 15:36:12 +0000 Subject: [PATCH 02/18] moved things around a little --- Include/internal/pycore_interp.h | 27 ++++++++++++++++----------- Include/internal/pycore_pymem.h | 1 - Objects/obmalloc.c | 2 +- Python/pystate.c | 4 +++- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 1ab341d5e2d3a2..3208c13d95ad3e 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -50,7 +50,6 @@ struct _Py_long_state { /* interpreter state */ -#define WITH_FREELISTS 1 #define SMALL_OBJECT_FREELIST_SIZE 1024 #define INTERP_NUM_FREELISTS 30 @@ -238,26 +237,32 @@ PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); +#define SIZE_TO_FREELIST_INDEX(size) ((size-4)/2) +#define FREELIST_INDEX_TO_SIZE(idx) (2*(idx) + 4) -#if WITH_FREELISTS static inline PyObject* -_PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, int size) { - assert(size >= 4); - assert((size & 0x1) == 0); - int index = (size-4)/2; +_PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) { +#if WITH_FREELISTS + int index = SIZE_TO_FREELIST_INDEX(size); + assert(index >= 0 && index < INTERP_NUM_FREELISTS); return _PyFreeList_Alloc(&interp->freelists[index]); +#else + return PyObject_Malloc(size); +#endif } static inline void -_PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, int size) { +_PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, Py_ssize_t size) { +#if WITH_FREELISTS /* todo: assert the size is correct? */ - assert(size >= 4); - assert((size & 0x1) == 0); - int index = (size-4)/2; + int index = SIZE_TO_FREELIST_INDEX(size); + assert(index >= 0 && index < INTERP_NUM_FREELISTS); _PyFreeList_Alloc(&interp->freelists[index]); +#else + Py_TYPE(op)->tp_free((PyObject *)op); +#endif } -#endif /* WITH_FREELISTS */ #ifdef __cplusplus diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index e21f8ec54b7887..e93844f54f5e96 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -90,7 +90,6 @@ PyAPI_FUNC(int) _PyMem_GetAllocatorName( PYMEM_ALLOCATOR_NOT_SET does nothing. */ PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator); -#define WITH_FREELISTS 1 #if WITH_FREELISTS /* Free lists. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 9730386e83c583..4ef2670fc39383 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -738,9 +738,9 @@ _PyFreeList_HalfFillAndAllocate(_PyFreeList *list) if (i == 0) { return NULL; } - list->space -= (i-1); void *result = list->ptr; list->ptr = *((void **)result); + list->space -= (i-1); return result; } diff --git a/Python/pystate.c b/Python/pystate.c index 4f137c31c36268..334c5864cf73dd 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -654,7 +654,9 @@ PyInterpreterState_New(void) #if WITH_FREELISTS for (int i=0; i < INTERP_NUM_FREELISTS; i++) { - _PyFreeList_Init(&interp->freelists[i], 4 + 2*i, SMALL_OBJECT_FREELIST_SIZE); + _PyFreeList_Init(&interp->freelists[i], + FREELIST_INDEX_TO_SIZE(i), + SMALL_OBJECT_FREELIST_SIZE); } #endif From 8a6e6a3767da800b69c9cb4fb6909c4dd6ed1177 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 31 Jan 2023 16:25:58 +0000 Subject: [PATCH 03/18] remove WITH_FREELISTS in longobject.c --- Objects/longobject.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 335781b397b646..d8556a51e92b6a 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -154,16 +154,10 @@ _PyLong_New(Py_ssize_t size) return NULL; } assert(size >= 0); -#if WITH_FREELISTS if (size <= 1) { PyInterpreterState *interp = _PyInterpreterState_GET(); result = (PyLongObject *)_PyInterpreterState_FreelistAlloc(interp, sizeof(PyLongObject)); } -#else - if (size == 0) { - result = PyObject_Malloc(sizeof(PyLongObject)); - } -#endif else { /* Number of bytes needed is: offsetof(PyLongObject, ob_digit) + sizeof(digit)*size. Previous incarnations of this code used @@ -212,13 +206,9 @@ _PyLong_FromMedium(sdigit x) { assert(!IS_SMALL_INT(x)); assert(is_medium_int(x)); - /* We could use a freelist here */ -#if WITH_FREELISTS PyInterpreterState *interp = _PyInterpreterState_GET(); - PyLongObject *v = (PyLongObject *)_PyInterpreterState_FreelistAlloc(interp, sizeof(PyLongObject)); -#else - PyLongObject *v = PyObject_Malloc(sizeof(PyLongObject)); -#endif + PyLongObject *v = (PyLongObject *)_PyInterpreterState_FreelistAlloc( + interp, sizeof(PyLongObject)); if (v == NULL) { return PyErr_NoMemory(); } @@ -285,13 +275,11 @@ _PyLong_FromSTwoDigits(stwodigits x) static void int_dealloc(PyLongObject *op) { -#if WITH_FREELISTS if (PyLong_CheckExact(op) && IS_MEDIUM_VALUE(op)) { PyInterpreterState *interp = _PyInterpreterState_GET(); _PyInterpreterState_FreelistFree(interp, (PyObject*)op, sizeof(PyLongObject)); } else -#endif { Py_TYPE(op)->tp_free((PyObject *)op); } From fe65f492a1269f3341a23ef7d6862b6f634d0565 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 31 Jan 2023 17:25:31 +0000 Subject: [PATCH 04/18] FreeToFull leaves the list empty --- Objects/obmalloc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 4ef2670fc39383..ae303cdaa71727 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -763,8 +763,8 @@ void _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr) { assert(list->space == 0); + PyObject_Free(ptr); if (list->ptr == NULL) { - PyObject_Free(ptr); return; } int space = 0; @@ -776,8 +776,7 @@ _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr) space++; } list->ptr = ptr; - *((void **)ptr) = NULL; - list->space = space-1; + list->space = space; } #endif /* WITH_FREELISTS */ From a33061f0471d637d903028afe0657caa95bfff8f Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 3 Feb 2023 14:31:53 +0000 Subject: [PATCH 05/18] reuse _PyFreeList_Clear --- Objects/obmalloc.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index ae303cdaa71727..784855fddc8767 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -767,16 +767,7 @@ _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr) if (list->ptr == NULL) { return; } - int space = 0; - void *head = list->ptr; - while (head) { - void *next = *((void**)head); - PyObject_Free(head); - head = next; - space++; - } - list->ptr = ptr; - list->space = space; + _PyFreeList_Clear(list); } #endif /* WITH_FREELISTS */ From 6b312e0da1acdedd0f104392578a16c249d09345 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 3 Feb 2023 14:32:55 +0000 Subject: [PATCH 06/18] fix free bug and remove ifdef in a few places --- Include/internal/pycore_interp.h | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 3208c13d95ad3e..b462c8204abe58 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -182,9 +182,7 @@ struct _is { struct _Py_context_state context; struct _Py_exc_state exc_state; -#if WITH_FREELISTS _PyFreeList freelists[INTERP_NUM_FREELISTS]; -#endif struct ast_state ast; struct types_state types; struct callable_cache callable_cache; @@ -237,30 +235,23 @@ PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); -#define SIZE_TO_FREELIST_INDEX(size) ((size-4)/2) -#define FREELIST_INDEX_TO_SIZE(idx) (2*(idx) + 4) +#define FREELIST_QUANTUM (2*sizeof(void*)) +#define SIZE_TO_FREELIST_INDEX(size) ((size-4)/FREELIST_QUANTUM) +#define FREELIST_INDEX_TO_SIZE(idx) (FREELIST_QUANTUM*(idx) + 4) static inline PyObject* _PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) { -#if WITH_FREELISTS int index = SIZE_TO_FREELIST_INDEX(size); assert(index >= 0 && index < INTERP_NUM_FREELISTS); return _PyFreeList_Alloc(&interp->freelists[index]); -#else - return PyObject_Malloc(size); -#endif } static inline void _PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, Py_ssize_t size) { -#if WITH_FREELISTS /* todo: assert the size is correct? */ int index = SIZE_TO_FREELIST_INDEX(size); assert(index >= 0 && index < INTERP_NUM_FREELISTS); - _PyFreeList_Alloc(&interp->freelists[index]); -#else - Py_TYPE(op)->tp_free((PyObject *)op); -#endif + _PyFreeList_Free(&interp->freelists[index], op); } From 1d811986848b84e3ce191fc283acd59638d563ce Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 3 Feb 2023 15:14:21 +0000 Subject: [PATCH 07/18] use _PyLong_Free in specialized bytecodes --- Include/internal/pycore_long.h | 2 ++ Objects/longobject.c | 6 +++--- Python/bytecodes.c | 22 +++++++++++----------- Python/generated_cases.c.h | 22 +++++++++++----------- 4 files changed, 27 insertions(+), 25 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 8c1d017bb95e4e..9a93184cc0c28a 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -129,6 +129,8 @@ _PyLong_IsPositiveSingleDigit(PyObject* sub) { return ((size_t)signed_size) <= 1; } +void _PyLong_Free(PyLongObject *op); + #ifdef __cplusplus } #endif diff --git a/Objects/longobject.c b/Objects/longobject.c index d8556a51e92b6a..c560d5844cfc11 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -272,8 +272,8 @@ _PyLong_FromSTwoDigits(stwodigits x) return _PyLong_FromLarge(x); } -static void -int_dealloc(PyLongObject *op) +void +_PyLong_Free(PyLongObject *op) { if (PyLong_CheckExact(op) && IS_MEDIUM_VALUE(op)) { PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -6307,7 +6307,7 @@ PyTypeObject PyLong_Type = { "int", /* tp_name */ offsetof(PyLongObject, long_value.ob_digit), /* tp_basicsize */ sizeof(digit), /* tp_itemsize */ - (destructor)int_dealloc, /* tp_dealloc */ + (destructor)_PyLong_Free, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d1e59f7908b580..e889375794aef6 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -183,8 +183,8 @@ dummy_func( DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); ERROR_IF(prod == NULL, error); } @@ -207,8 +207,8 @@ dummy_func( DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); ERROR_IF(sub == NULL, error); } @@ -290,8 +290,8 @@ dummy_func( DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); ERROR_IF(sum == NULL, error); } @@ -364,7 +364,7 @@ dummy_func( res = PyList_GET_ITEM(list, index); assert(res != NULL); Py_INCREF(res); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(list); } @@ -382,7 +382,7 @@ dummy_func( res = PyTuple_GET_ITEM(tuple, index); assert(res != NULL); Py_INCREF(res); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(tuple); } @@ -478,7 +478,7 @@ dummy_func( PyList_SET_ITEM(list, index, value); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(list); } @@ -1838,8 +1838,8 @@ dummy_func( Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->long_value.ob_digit[0]; // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); if (sign_ish & oparg) { int offset = _Py_OPARG(next_instr[1]); JUMPBY(offset); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3ee30ae8df9e3c..4e01afb996448b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -248,8 +248,8 @@ DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); if (prod == NULL) goto pop_2_error; STACK_SHRINK(1); POKE(1, prod); @@ -286,8 +286,8 @@ DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); if (sub == NULL) goto pop_2_error; STACK_SHRINK(1); POKE(1, sub); @@ -395,8 +395,8 @@ DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); if (sum == NULL) goto pop_2_error; STACK_SHRINK(1); POKE(1, sum); @@ -491,7 +491,7 @@ res = PyList_GET_ITEM(list, index); assert(res != NULL); Py_INCREF(res); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(list); STACK_SHRINK(1); POKE(1, res); @@ -516,7 +516,7 @@ res = PyTuple_GET_ITEM(tuple, index); assert(res != NULL); Py_INCREF(res); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(tuple); STACK_SHRINK(1); POKE(1, res); @@ -643,7 +643,7 @@ PyList_SET_ITEM(list, index, value); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(list); STACK_SHRINK(3); JUMPBY(1); @@ -2183,8 +2183,8 @@ Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->long_value.ob_digit[0]; // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); if (sign_ish & oparg) { int offset = _Py_OPARG(next_instr[1]); JUMPBY(offset); From 5609e30205acef07964c6d50c29e9238ec24e69c Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 3 Feb 2023 16:53:17 +0000 Subject: [PATCH 08/18] use _PyLong_Free in _Py_DECREF_INT --- Objects/longobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index c560d5844cfc11..69ce179097e13a 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -47,7 +47,7 @@ static inline void _Py_DECREF_INT(PyLongObject *op) { assert(PyLong_CheckExact(op)); - _Py_DECREF_SPECIALIZED((PyObject *)op, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED((PyObject *)op, (destructor)_PyLong_Free); } static inline int From b8b1879c281ccb543eadd80036e13eae0899aec9 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 5 Feb 2023 21:14:44 +0000 Subject: [PATCH 09/18] stats for generic freelist --- Include/internal/pycore_pymem.h | 10 ++++++++-- Include/pystats.h | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index e93844f54f5e96..f868eecfe99322 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -117,13 +117,16 @@ static inline void * _PyFreeList_Alloc(_PyFreeList *list) { if (list->ptr != NULL) { #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.from_freelist++; + if (_py_stats) _py_stats->object_stats.from_generic_freelist++; #endif void *result = list->ptr; list->ptr = *((void **)result); list->space++; return result; } +#ifdef Py_STATS + if (_py_stats) _py_stats->object_stats.generic_freelist_empty++; +#endif return _PyFreeList_HalfFillAndAllocate(list); } @@ -131,13 +134,16 @@ static inline void _PyFreeList_Free(_PyFreeList *list, void *ptr) { if (list->space) { #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.to_freelist++; + if (_py_stats) _py_stats->object_stats.to_generic_freelist++; #endif *((void **)ptr) = list->ptr; list->ptr = ptr; list->space--; return; } +#ifdef Py_STATS + if (_py_stats) _py_stats->object_stats.generic_freelist_full++; +#endif _PyFreeList_FreeToFull(list, ptr); } diff --git a/Include/pystats.h b/Include/pystats.h index 25ed4bddc7240c..eba50419b43bbd 100644 --- a/Include/pystats.h +++ b/Include/pystats.h @@ -60,6 +60,10 @@ typedef struct _object_stats { uint64_t frees; uint64_t to_freelist; uint64_t from_freelist; + uint64_t to_generic_freelist; + uint64_t from_generic_freelist; + uint64_t generic_freelist_empty; + uint64_t generic_freelist_full; uint64_t new_values; uint64_t dict_materialized_on_request; uint64_t dict_materialized_new_key; From 460d12c746b468b251cf9678c56de22b171859eb Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 6 Feb 2023 10:54:27 +0000 Subject: [PATCH 10/18] print new stats --- Python/specialize.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/specialize.c b/Python/specialize.c index 096687f5fdf023..91a83f8a733db7 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -174,6 +174,10 @@ print_object_stats(FILE *out, ObjectStats *stats) { fprintf(out, "Object allocations from freelist: %" PRIu64 "\n", stats->from_freelist); fprintf(out, "Object frees to freelist: %" PRIu64 "\n", stats->to_freelist); + fprintf(out, "Object allocations from generic freelist: %" PRIu64 "\n", stats->from_generic_freelist); + fprintf(out, "Object frees to generic freelist: %" PRIu64 "\n", stats->to_generic_freelist); + fprintf(out, "Object allocations when generic freelist is empty: %" PRIu64 "\n", stats->generic_freelist_empty); + fprintf(out, "Object frees when generic freelist is full: %" PRIu64 "\n", stats->generic_freelist_full); fprintf(out, "Object allocations: %" PRIu64 "\n", stats->allocations); fprintf(out, "Object allocations to 512 bytes: %" PRIu64 "\n", stats->allocations512); fprintf(out, "Object allocations to 4 kbytes: %" PRIu64 "\n", stats->allocations4k); From 2e2a8614a8681df56186170f0b76b4058754e045 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 6 Feb 2023 19:01:48 +0000 Subject: [PATCH 11/18] fix index mapping --- Include/internal/pycore_interp.h | 4 ++-- Python/pystate.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index b462c8204abe58..e72fd9d2747f55 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -236,8 +236,8 @@ PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); #define FREELIST_QUANTUM (2*sizeof(void*)) -#define SIZE_TO_FREELIST_INDEX(size) ((size-4)/FREELIST_QUANTUM) -#define FREELIST_INDEX_TO_SIZE(idx) (FREELIST_QUANTUM*(idx) + 4) +#define SIZE_TO_FREELIST_INDEX(size) (((size) + FREELIST_QUANTUM - 1)/FREELIST_QUANTUM) +#define FREELIST_INDEX_TO_ALLOCATED_SIZE(idx) ((idx) * FREELIST_QUANTUM) static inline PyObject* _PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) { diff --git a/Python/pystate.c b/Python/pystate.c index 334c5864cf73dd..45f61dca1ff547 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -655,7 +655,7 @@ PyInterpreterState_New(void) #if WITH_FREELISTS for (int i=0; i < INTERP_NUM_FREELISTS; i++) { _PyFreeList_Init(&interp->freelists[i], - FREELIST_INDEX_TO_SIZE(i), + FREELIST_INDEX_TO_ALLOCATED_SIZE(i), SMALL_OBJECT_FREELIST_SIZE); } #endif From 42ee27f68508d0ea792aeb843ee14a68d19b6d97 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 7 Feb 2023 11:26:18 +0000 Subject: [PATCH 12/18] tweak the stats --- Include/internal/pycore_pymem.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index f868eecfe99322..320692a7ff75fc 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -115,34 +115,34 @@ extern void _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr); static inline void * _PyFreeList_Alloc(_PyFreeList *list) { - if (list->ptr != NULL) { #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.from_generic_freelist++; + if (_py_stats) _py_stats->object_stats.from_generic_freelist++; #endif + if (list->ptr != NULL) { void *result = list->ptr; list->ptr = *((void **)result); list->space++; return result; } #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.generic_freelist_empty++; + if (_py_stats) _py_stats->object_stats.generic_freelist_empty++; #endif return _PyFreeList_HalfFillAndAllocate(list); } static inline void _PyFreeList_Free(_PyFreeList *list, void *ptr) { - if (list->space) { #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.to_generic_freelist++; + if (_py_stats) _py_stats->object_stats.to_generic_freelist++; #endif + if (list->space) { *((void **)ptr) = list->ptr; list->ptr = ptr; list->space--; return; } #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.generic_freelist_full++; + if (_py_stats) _py_stats->object_stats.generic_freelist_full++; #endif _PyFreeList_FreeToFull(list, ptr); } From a9e76ad15182cdd43ca0c062b687936c93c8ceaf Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 8 Feb 2023 18:25:54 +0000 Subject: [PATCH 13/18] shift instead of div --- Include/internal/pycore_interp.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index e72fd9d2747f55..2dea411d399bea 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -235,8 +235,17 @@ PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); -#define FREELIST_QUANTUM (2*sizeof(void*)) -#define SIZE_TO_FREELIST_INDEX(size) (((size) + FREELIST_QUANTUM - 1)/FREELIST_QUANTUM) +#if SIZEOF_VOID_P == 4 +#define LOG_BASE_2_OF_FREELIST_QUANTUM 3 +#elif SIZEOF_VOID_P == 8 +#define LOG_BASE_2_OF_FREELIST_QUANTUM 4 +#else +#error "void pointer size not in (32, 64)" +#endif + +#define FREELIST_QUANTUM (2*SIZEOF_VOID_P) +#define SIZE_TO_FREELIST_INDEX(size) (((size) + FREELIST_QUANTUM - 1) >> \ + LOG_BASE_2_OF_FREELIST_QUANTUM) #define FREELIST_INDEX_TO_ALLOCATED_SIZE(idx) ((idx) * FREELIST_QUANTUM) static inline PyObject* From 78592b98c7b8b9a4817bab63381346fd71904374 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 8 Feb 2023 19:17:41 +0000 Subject: [PATCH 14/18] int --> Py_ssize_t --- Include/internal/pycore_interp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 2dea411d399bea..a630cb314c4ea7 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -250,7 +250,7 @@ PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); static inline PyObject* _PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) { - int index = SIZE_TO_FREELIST_INDEX(size); + Py_ssize_t index = SIZE_TO_FREELIST_INDEX(size); assert(index >= 0 && index < INTERP_NUM_FREELISTS); return _PyFreeList_Alloc(&interp->freelists[index]); } @@ -258,7 +258,7 @@ _PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) { static inline void _PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, Py_ssize_t size) { /* todo: assert the size is correct? */ - int index = SIZE_TO_FREELIST_INDEX(size); + Py_ssize_t index = SIZE_TO_FREELIST_INDEX(size); assert(index >= 0 && index < INTERP_NUM_FREELISTS); _PyFreeList_Free(&interp->freelists[index], op); } From 044743e0ee10ec3404266a7215bf0af8f751bd77 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 8 Feb 2023 22:07:29 +0000 Subject: [PATCH 15/18] disable the freelist after clearing it in interpreter_clear --- Python/pystate.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Python/pystate.c b/Python/pystate.c index f5adbea51244a2..b6dd2702b026f4 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -717,13 +717,11 @@ PyInterpreterState_New(void) init_interpreter(interp, runtime, id, old_head, pending_lock); -#if WITH_FREELISTS for (int i=0; i < INTERP_NUM_FREELISTS; i++) { _PyFreeList_Init(&interp->freelists[i], FREELIST_INDEX_TO_ALLOCATED_SIZE(i), SMALL_OBJECT_FREELIST_SIZE); } -#endif HEAD_UNLOCK(runtime); return interp; @@ -767,11 +765,11 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) } HEAD_UNLOCK(runtime); -#if WITH_FREELISTS for (int i=0; i < INTERP_NUM_FREELISTS; i++) { _PyFreeList_Clear(&interp->freelists[i]); + interp->freelists[i].space = 0; + interp->freelists[i].capacity = 0; } -#endif /* It is possible that any of the objects below have a finalizer that runs Python code or otherwise relies on a thread state From 827b81b97427dbd7c1da8258e4c75c189bb59ba4 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 9 Feb 2023 13:26:51 +0000 Subject: [PATCH 16/18] add _PyFreeList_Disable --- Include/internal/pycore_pymem.h | 13 +++++++------ Objects/obmalloc.c | 8 ++++++-- Python/pystate.c | 3 +-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 320692a7ff75fc..aef41c0b315f5c 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -91,7 +91,6 @@ PyAPI_FUNC(int) _PyMem_GetAllocatorName( PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator); -#if WITH_FREELISTS /* Free lists. * * Free lists have a pointer to their first entry and @@ -111,7 +110,8 @@ typedef struct _freelist { extern void *_PyFreeList_HalfFillAndAllocate(_PyFreeList *list); extern void _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr); - +extern void _PyFreeList_Clear(_PyFreeList *list); +extern void _PyFreeList_Disable(_PyFreeList *list); static inline void * _PyFreeList_Alloc(_PyFreeList *list) { @@ -151,13 +151,14 @@ static inline void _PyFreeList_Init(_PyFreeList *list, int size, int capacity) { list->ptr = NULL; - list->space = list->capacity = capacity; list->size = size; +#if WITH_FREELISTS + list->space = list->capacity = capacity; +#else + _PyFreeList_Disable(list); +#endif } -extern void _PyFreeList_Clear(_PyFreeList *list); -#endif /* WITH_FREELISTS */ - #ifdef __cplusplus } #endif diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 784855fddc8767..73cbccc980b9a9 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -718,7 +718,6 @@ PyObject_Free(void *ptr) #endif -#if WITH_FREELISTS void * _PyFreeList_HalfFillAndAllocate(_PyFreeList *list) { @@ -759,6 +758,12 @@ _PyFreeList_Clear(_PyFreeList *list) list->space += space; } +void +_PyFreeList_Disable(_PyFreeList *list) +{ + list->space = list->capacity = 0; +} + void _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr) { @@ -769,7 +774,6 @@ _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr) } _PyFreeList_Clear(list); } -#endif /* WITH_FREELISTS */ #ifdef WITH_PYMALLOC diff --git a/Python/pystate.c b/Python/pystate.c index b6dd2702b026f4..543c7cb598351b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -767,8 +767,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) for (int i=0; i < INTERP_NUM_FREELISTS; i++) { _PyFreeList_Clear(&interp->freelists[i]); - interp->freelists[i].space = 0; - interp->freelists[i].capacity = 0; + _PyFreeList_Disable(&interp->freelists[i]); } /* It is possible that any of the objects below have a finalizer From 51c6c3c16136bf410e61ecebebf3ee70241ea6db Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Feb 2023 11:32:46 +0000 Subject: [PATCH 17/18] rename INDEX --> SIZE_CLASS --- Include/internal/pycore_interp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index a630cb314c4ea7..5a54608ae3db81 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -244,13 +244,13 @@ PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); #endif #define FREELIST_QUANTUM (2*SIZEOF_VOID_P) -#define SIZE_TO_FREELIST_INDEX(size) (((size) + FREELIST_QUANTUM - 1) >> \ +#define SIZE_TO_FREELIST_SIZE_CLASS(size) (((size) + FREELIST_QUANTUM - 1) >> \ LOG_BASE_2_OF_FREELIST_QUANTUM) #define FREELIST_INDEX_TO_ALLOCATED_SIZE(idx) ((idx) * FREELIST_QUANTUM) static inline PyObject* _PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) { - Py_ssize_t index = SIZE_TO_FREELIST_INDEX(size); + Py_ssize_t index = SIZE_TO_FREELIST_SIZE_CLASS(size); assert(index >= 0 && index < INTERP_NUM_FREELISTS); return _PyFreeList_Alloc(&interp->freelists[index]); } @@ -258,7 +258,7 @@ _PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) { static inline void _PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, Py_ssize_t size) { /* todo: assert the size is correct? */ - Py_ssize_t index = SIZE_TO_FREELIST_INDEX(size); + Py_ssize_t index = SIZE_TO_FREELIST_SIZE_CLASS(size); assert(index >= 0 && index < INTERP_NUM_FREELISTS); _PyFreeList_Free(&interp->freelists[index], op); } From cfb886d552ecb40272b9003dd1fbfb5037d62736 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Feb 2023 13:09:39 +0000 Subject: [PATCH 18/18] stats per size class --- Include/internal/pycore_pymem.h | 16 +++++++++++----- Include/pystats.h | 12 ++++++++---- Python/pystate.c | 2 +- Python/specialize.c | 19 +++++++++++++++---- 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index aef41c0b315f5c..86e0ea1c5e1a02 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -106,6 +106,9 @@ typedef struct _freelist { uint32_t space; uint16_t size; uint16_t capacity; +#ifdef Py_STATS + int size_class; +#endif } _PyFreeList; extern void *_PyFreeList_HalfFillAndAllocate(_PyFreeList *list); @@ -116,7 +119,7 @@ extern void _PyFreeList_Disable(_PyFreeList *list); static inline void * _PyFreeList_Alloc(_PyFreeList *list) { #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.from_generic_freelist++; + if (_py_stats) _py_stats->freelist_stats[list->size_class].allocations++; #endif if (list->ptr != NULL) { void *result = list->ptr; @@ -125,7 +128,7 @@ _PyFreeList_Alloc(_PyFreeList *list) { return result; } #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.generic_freelist_empty++; + if (_py_stats) _py_stats->freelist_stats[list->size_class].empty++; #endif return _PyFreeList_HalfFillAndAllocate(list); } @@ -133,7 +136,7 @@ _PyFreeList_Alloc(_PyFreeList *list) { static inline void _PyFreeList_Free(_PyFreeList *list, void *ptr) { #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.to_generic_freelist++; + if (_py_stats) _py_stats->freelist_stats[list->size_class].frees++; #endif if (list->space) { *((void **)ptr) = list->ptr; @@ -142,16 +145,19 @@ _PyFreeList_Free(_PyFreeList *list, void *ptr) { return; } #ifdef Py_STATS - if (_py_stats) _py_stats->object_stats.generic_freelist_full++; + if (_py_stats) _py_stats->freelist_stats[list->size_class].full++; #endif _PyFreeList_FreeToFull(list, ptr); } static inline void -_PyFreeList_Init(_PyFreeList *list, int size, int capacity) +_PyFreeList_Init(_PyFreeList *list, int size_class, int size, int capacity) { list->ptr = NULL; list->size = size; +#ifdef Py_STATS + list->size_class = size_class; +#endif #if WITH_FREELISTS list->space = list->capacity = capacity; #else diff --git a/Include/pystats.h b/Include/pystats.h index eba50419b43bbd..1d14960d31cbdc 100644 --- a/Include/pystats.h +++ b/Include/pystats.h @@ -48,6 +48,13 @@ typedef struct _call_stats { uint64_t eval_calls[EVAL_CALL_KINDS]; } CallStats; +typedef struct _generic_freelist_stats { + uint64_t allocations; + uint64_t frees; + uint64_t empty; + uint64_t full; +} GenericFreelistStats; + typedef struct _object_stats { uint64_t increfs; uint64_t decrefs; @@ -60,10 +67,6 @@ typedef struct _object_stats { uint64_t frees; uint64_t to_freelist; uint64_t from_freelist; - uint64_t to_generic_freelist; - uint64_t from_generic_freelist; - uint64_t generic_freelist_empty; - uint64_t generic_freelist_full; uint64_t new_values; uint64_t dict_materialized_on_request; uint64_t dict_materialized_new_key; @@ -82,6 +85,7 @@ typedef struct _stats { OpcodeStats opcode_stats[256]; CallStats call_stats; ObjectStats object_stats; + GenericFreelistStats freelist_stats[30]; // INTERP_NUM_FREELISTS } PyStats; diff --git a/Python/pystate.c b/Python/pystate.c index 543c7cb598351b..d0a52beb35eb68 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -718,7 +718,7 @@ PyInterpreterState_New(void) init_interpreter(interp, runtime, id, old_head, pending_lock); for (int i=0; i < INTERP_NUM_FREELISTS; i++) { - _PyFreeList_Init(&interp->freelists[i], + _PyFreeList_Init(&interp->freelists[i], i, FREELIST_INDEX_TO_ALLOCATED_SIZE(i), SMALL_OBJECT_FREELIST_SIZE); } diff --git a/Python/specialize.c b/Python/specialize.c index bb66b47faa4b57..8ec4978b3a886f 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -174,10 +174,6 @@ print_object_stats(FILE *out, ObjectStats *stats) { fprintf(out, "Object allocations from freelist: %" PRIu64 "\n", stats->from_freelist); fprintf(out, "Object frees to freelist: %" PRIu64 "\n", stats->to_freelist); - fprintf(out, "Object allocations from generic freelist: %" PRIu64 "\n", stats->from_generic_freelist); - fprintf(out, "Object frees to generic freelist: %" PRIu64 "\n", stats->to_generic_freelist); - fprintf(out, "Object allocations when generic freelist is empty: %" PRIu64 "\n", stats->generic_freelist_empty); - fprintf(out, "Object frees when generic freelist is full: %" PRIu64 "\n", stats->generic_freelist_full); fprintf(out, "Object allocations: %" PRIu64 "\n", stats->allocations); fprintf(out, "Object allocations to 512 bytes: %" PRIu64 "\n", stats->allocations512); fprintf(out, "Object allocations to 4 kbytes: %" PRIu64 "\n", stats->allocations4k); @@ -199,11 +195,26 @@ print_object_stats(FILE *out, ObjectStats *stats) fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses); } +static void +print_freelist_stats(FILE *out, GenericFreelistStats freelist_stats[INTERP_NUM_FREELISTS]) +{ + for (int i=0; iallocations > 0) { + fprintf(out, "Allocations from freelist[%d]: %" PRIu64 "\n", i, stats->allocations); + fprintf(out, "Frees into freelist[%d]: %" PRIu64 "\n", i, stats->frees); + fprintf(out, "Freelist[%d] empty: %" PRIu64 "\n", i, stats->empty); + fprintf(out, "Freelist[%d] full: %" PRIu64 "\n", i, stats->full); + } + } +} + static void print_stats(FILE *out, PyStats *stats) { print_spec_stats(out, stats->opcode_stats); print_call_stats(out, &stats->call_stats); print_object_stats(out, &stats->object_stats); + print_freelist_stats(out, stats->freelist_stats); } void