From 0296c20f0084d6e57b95029af163d3daa7e65159 Mon Sep 17 00:00:00 2001
From: Kevin Modzelewski <kevmod@gmail.com>
Date: Fri, 2 Sep 2022 12:03:39 -0400
Subject: [PATCH] Replace a call to PyTuple_New with _PyTuple_FromArraySteal

PyTuple_New will zero out the tuple before returning to the caller, and a
surprising amount of time can be saved by not doing this zeroing.  One option
is to add a non-zeroing version of PyTuple_New, which I did in #96446, but
there was resistance to the unsafety of it.

Fortunately it looks like most of the tuple-zeroing happens directly from the
BUILD_TUPLE opcode in the interpreter, which already has the arguments in an
appropriate array, so we can just convert this to _PyTuple_FromArraySteal

This seems to result in a ~0.2% speedup on macrobenchmarks.
---
 Python/ceval.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/Python/ceval.c b/Python/ceval.c
index b3a0a3640eb97d..ae16c96c3c6f4c 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -2647,13 +2647,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
         }
 
         TARGET(BUILD_TUPLE) {
-            PyObject *tup = PyTuple_New(oparg);
+            STACK_SHRINK(oparg);
+            PyObject *tup = _PyTuple_FromArraySteal(stack_pointer, oparg);
             if (tup == NULL)
                 goto error;
-            while (--oparg >= 0) {
-                PyObject *item = POP();
-                PyTuple_SET_ITEM(tup, oparg, item);
-            }
             PUSH(tup);
             DISPATCH();
         }