From 715823da9332916095d9a5b60c2edc298b21531c Mon Sep 17 00:00:00 2001 From: Kevin Modzelewski Date: Thu, 18 Aug 2022 16:17:50 -0400 Subject: [PATCH] Add _PyTuple_New_Nonzeroed A faster, but unsafe, version of PyTuple_New for use in performance- critical paths. Zeroing the contents of a tuple is surprisingly expensive, so for cases where the contents of the tuple will immediately be overwritten, time can be saved by calling this non-zeroing version. This tuple is not safe to be seen by any other code since it will have garbage where PyObject pointers are expected. If there is any chance that the initialization code will raise an exception or do a GC collection, it is not safe to use this function. perf says that approximately 0.75% of the time in the pyperformance benchmarks is spent in PyTuple_New, and 0.42% for the Pyston web macrobenchmarks. It also says that roughly 35% of the time of PyTuple_New is spent in the loop that zeroes the tuple contents, so if we are able to remove this then we can save 0.26% / 0.15% respectively. I went through all calls to PyTuple_New and converted them if it was safe to do so and if it seemed reasonable that the code could be performance-sensitive. This migrates roughly 90% of the PyTuple_New time to _PyTuple_New_Nonzeroed. Changes this small are difficult to measure, but looking at the new macrobenchmarks profile I see that PyTuple_New + _PyTuple_New_Nonzeroed time is 0.26%, down from 0.45%. pyperformance and Pyston-macrobenchmarks performance both improved by about 0.5% but I don't trust those numbers. --- Include/internal/pycore_tuple.h | 14 ++++++++++++++ Modules/_json.c | 2 +- Objects/dictobject.c | 4 ++-- Objects/enumobject.c | 4 ++-- Objects/longobject.c | 4 ++-- Objects/odictobject.c | 2 +- Objects/stringlib/partition.h | 4 ++-- Objects/tupleobject.c | 15 +++++++++++++++ Objects/typeobject.c | 4 ++-- Python/ceval.c | 4 ++-- 10 files changed, 43 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h index 1efe4fa2bdef94..5f2cb8d85377ce 100644 --- a/Include/internal/pycore_tuple.h +++ b/Include/internal/pycore_tuple.h @@ -67,6 +67,20 @@ struct _Py_tuple_state { extern PyObject *_PyTuple_FromArray(PyObject *const *, Py_ssize_t); extern PyObject *_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t); +/* A faster, but unsafe, version of PyTuple_New for use in performance- + * critical paths. + * + * Zeroing the contents of a tuple is surprisingly expensive, so for + * cases where the contents of the tuple will immediately be overwritten, + * time can be saved by calling this non-zeroing version. + * + * This tuple is not safe to be seen by any other code since it will + * have garbage where PyObject pointers are expected. If there is any + * chance that the initialization code will raise an exception or do + * a GC collection, it is not safe to use this function. + */ +PyAPI_FUNC(PyObject *) _PyTuple_New_Nonzeroed(Py_ssize_t size); + #ifdef __cplusplus } #endif diff --git a/Modules/_json.c b/Modules/_json.c index 1c39b46937d792..8362696a3bb933 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -353,7 +353,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { Py_DECREF(rval); return NULL; } - tpl = PyTuple_New(2); + tpl = _PyTuple_New_Nonzeroed(2); if (tpl == NULL) { Py_DECREF(pyidx); Py_DECREF(rval); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 0cb95d52360ef1..b19e0dd2359f18 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -4281,7 +4281,7 @@ dictiter_iternextitem(dictiterobject *di) } } else { - result = PyTuple_New(2); + result = _PyTuple_New_Nonzeroed(2); if (result == NULL) return NULL; PyTuple_SET_ITEM(result, 0, key); /* steals reference */ @@ -4415,7 +4415,7 @@ dictreviter_iternext(dictiterobject *di) } } else { - result = PyTuple_New(2); + result = _PyTuple_New_Nonzeroed(2); if (result == NULL) { return NULL; } diff --git a/Objects/enumobject.c b/Objects/enumobject.c index d84bac6f4c9af2..d1e3a19792dcf6 100644 --- a/Objects/enumobject.c +++ b/Objects/enumobject.c @@ -207,7 +207,7 @@ enum_next_long(enumobject *en, PyObject* next_item) } return result; } - result = PyTuple_New(2); + result = _PyTuple_New_Nonzeroed(2); if (result == NULL) { Py_DECREF(next_index); Py_DECREF(next_item); @@ -257,7 +257,7 @@ enum_next(enumobject *en) } return result; } - result = PyTuple_New(2); + result = _PyTuple_New_Nonzeroed(2); if (result == NULL) { Py_DECREF(next_index); Py_DECREF(next_item); diff --git a/Objects/longobject.c b/Objects/longobject.c index 90ed02b8c27a19..0b937964eecf10 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4249,7 +4249,7 @@ long_divmod(PyObject *a, PyObject *b) if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, &mod) < 0) { return NULL; } - z = PyTuple_New(2); + z = _PyTuple_New_Nonzeroed(2); if (z != NULL) { PyTuple_SET_ITEM(z, 0, (PyObject *) div); PyTuple_SET_ITEM(z, 1, (PyObject *) mod); @@ -5544,7 +5544,7 @@ _PyLong_DivmodNear(PyObject *a, PyObject *b) goto error; } - result = PyTuple_New(2); + result = _PyTuple_New_Nonzeroed(2); if (result == NULL) goto error; diff --git a/Objects/odictobject.c b/Objects/odictobject.c index bd2a7677fe1cf4..631e27380ef2cb 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -1770,7 +1770,7 @@ odictiter_iternext(odictiterobject *di) } } else { - result = PyTuple_New(2); + result = _PyTuple_New_Nonzeroed(2); if (result == NULL) { Py_DECREF(key); Py_DECREF(value); diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h index bcc217697b2e9c..1249a5c80d7585 100644 --- a/Objects/stringlib/partition.h +++ b/Objects/stringlib/partition.h @@ -23,7 +23,7 @@ STRINGLIB(partition)(PyObject* str_obj, return NULL; } - out = PyTuple_New(3); + out = _PyTuple_New_Nonzeroed(3); if (!out) return NULL; @@ -80,7 +80,7 @@ STRINGLIB(rpartition)(PyObject* str_obj, return NULL; } - out = PyTuple_New(3); + out = _PyTuple_New_Nonzeroed(3); if (!out) return NULL; diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 240af0a9075271..1172a39fa393c7 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -83,6 +83,21 @@ PyTuple_New(Py_ssize_t size) return (PyObject *) op; } +PyObject * +_PyTuple_New_Nonzeroed(Py_ssize_t size) +{ + PyTupleObject *op; + if (size == 0) { + return tuple_get_empty(); + } + op = tuple_alloc(size); + if (op == NULL) { + return NULL; + } + _PyObject_GC_TRACK(op); + return (PyObject *) op; +} + Py_ssize_t PyTuple_Size(PyObject *op) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 40d42947dbcfd4..6513be2a9925e9 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2051,7 +2051,7 @@ mro_implementation(PyTypeObject *type) */ PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(bases, 0)); Py_ssize_t k = PyTuple_GET_SIZE(base->tp_mro); - PyObject *result = PyTuple_New(k + 1); + PyObject *result = _PyTuple_New_Nonzeroed(k + 1); if (result == NULL) { return NULL; } @@ -5639,7 +5639,7 @@ reduce_newobj(PyObject *obj) return NULL; } n = args ? PyTuple_GET_SIZE(args) : 0; - newargs = PyTuple_New(n+1); + newargs = _PyTuple_New_Nonzeroed(n+1); if (newargs == NULL) { Py_XDECREF(args); Py_DECREF(newobj); diff --git a/Python/ceval.c b/Python/ceval.c index b3a0a3640eb97d..a864fb841dafea 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2647,7 +2647,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } TARGET(BUILD_TUPLE) { - PyObject *tup = PyTuple_New(oparg); + PyObject *tup = _PyTuple_New_Nonzeroed(oparg); if (tup == NULL) goto error; while (--oparg >= 0) { @@ -5937,7 +5937,7 @@ PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals, allargs = args; } else { - kwnames = PyTuple_New(kwcount); + kwnames = _PyTuple_New_Nonzeroed(kwcount); if (kwnames == NULL) { goto fail; }