Skip to content

bpo-46939: Specialize calls to Python classes #31707

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ struct _typeobject {
* by code other than the specializer and interpreter. */
struct _specialization_cache {
PyObject *getitem;
PyObject *init;
};

/* The *real* layout of a type object when allocated on the heap */
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ typedef struct {

typedef struct {
_Py_CODEUNIT counter;
_Py_CODEUNIT type_version[2];
} _PyPrecallCache;

#define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache)
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ typedef struct _PyInterpreterFrame {
PyFrameState f_state; /* What state the frame is in */
bool is_entry; // Whether this is the "root" frame for the current _PyCFrame.
bool is_generator;
PyObject *self; /* Borrowed reference used by __init__ frames to return self in RETURN_VALUE */
PyObject *localsplus[1];
} _PyInterpreterFrame;

Expand Down Expand Up @@ -118,6 +119,7 @@ _PyFrame_InitializeSpecials(
frame->f_state = FRAME_CREATED;
frame->is_entry = false;
frame->is_generator = false;
frame->self = NULL;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to reviewers: tied to frame state instead of some cache/call_shape so that subsequent nested calls don't destroy self (and we can identify which frame the self belongs to). Consider the following code:

class Tokenizer:
    def __init__(self):
        self.__next() # Kaboom!
    def __next(self):
        pass

for _ in range(10):
 print(Tokenizer())

}

/* Gets the pointer to the locals array
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_typeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ extern PyStatus _PyTypes_InitSlotDefs(void);

extern void _PyStaticType_Dealloc(PyTypeObject *type);

extern PyObject *_PyObject_New_Vector(PyTypeObject *type,
PyObject *const *args, Py_ssize_t nargs, PyObject *kwds);

#ifdef __cplusplus
}
Expand Down
31 changes: 16 additions & 15 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.11a5 3485 (Add an oparg to GET_AWAITABLE)
# Python 3.11a6 3486 (Use inline caching for PRECALL and CALL)
# Python 3.11a6 3487 (Remove the adaptive "oparg counter" mechanism)
# Python 3.11a6 3488 (Specialize PRECALL for Python classes)

# Python 3.12 will start with magic number 3500

Expand All @@ -409,7 +410,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3487).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3488).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

_PYCACHE = '__pycache__'
Expand Down
3 changes: 2 additions & 1 deletion Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def jabs_op(name, op, entries=0):
def_op('SET_UPDATE', 163)
def_op('DICT_MERGE', 164)
def_op('DICT_UPDATE', 165)
def_op('PRECALL', 166, 1)
def_op('PRECALL', 166, 3)

def_op('CALL', 171, 4)
def_op('KW_NAMES', 172)
Expand Down Expand Up @@ -285,6 +285,7 @@ def jabs_op(name, op, entries=0):
"PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST",
"PRECALL_BOUND_METHOD",
"PRECALL_PYFUNC",
"PRECALL_PY_CLASS",
"RESUME_QUICK",
"STORE_ATTR_ADAPTIVE",
"STORE_ATTR_INSTANCE_VALUE",
Expand Down
298 changes: 149 additions & 149 deletions Lib/test/test_dis.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,7 @@ class C(object): pass
def func():
return sys._getframe()
x = func()
check(x, size('3Pi3c7P2ic??2P'))
check(x, size('3Pi3c7P2ic??3P'))
# function
def func(): pass
check(func, size('14Pi'))
Expand All @@ -1414,7 +1414,7 @@ def bar(cls):
check(bar, size('PP'))
# generator
def get_gen(): yield 1
check(get_gen(), size('P2P4P4c7P2ic??P'))
check(get_gen(), size('P2P4P4c7P2ic??2P'))
# iterator
check(iter('abc'), size('lP'))
# callable-iterator
Expand Down Expand Up @@ -1506,7 +1506,7 @@ def delx(self): del self.__x
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P'
'1P' # Specializer cache
'2P' # Specializer cache
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Calls to Python classes are now specialized. Creating objects from Python
classes should now be faster. Patch by Ken Jin.
10 changes: 9 additions & 1 deletion Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -4514,7 +4514,15 @@ object_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *
object_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
if (excess_args(args, kwds)) {
return _PyObject_New_Vector(type, (_PyTuple_CAST(args)->ob_item),
PyTuple_GET_SIZE(args), kwds);
}

PyObject *
_PyObject_New_Vector(PyTypeObject *type, PyObject *const *args,
Py_ssize_t nargs, PyObject *kwds)
{
if (nargs || (kwds && PyDict_Check(kwds) && PyDict_GET_SIZE(kwds))) {
if (type->tp_new != object_new) {
PyErr_SetString(PyExc_TypeError,
"object.__new__() takes exactly one argument (the type to instantiate)");
Expand Down
46 changes: 24 additions & 22 deletions Programs/test_frozenmain.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 61 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,13 @@ eval_frame_handle_pending(PyThreadState *tstate)
STAT_INC(LOAD_##attr_or_method, hit); \
Py_INCREF(res);

#define CALL_PY_FRAME_PASS_SELF() \
if (call_shape.init_pass_self) { \
assert(frame->self == NULL); \
frame->self = Py_NewRef(frame->localsplus[0]); \
call_shape.init_pass_self = false; \
}

#define TRACE_FUNCTION_EXIT() \
if (cframe.use_tracing) { \
if (trace_function_exit(tstate, frame, retval)) { \
Expand Down Expand Up @@ -1588,6 +1595,11 @@ pop_frame(PyThreadState *tstate, _PyInterpreterFrame *frame)
*/
typedef struct {
PyObject *kwnames;
/* __init__ is special because while it returns None, we need to return self
This tells CALL to pass the current self to the new frame (the __init__ frame).
Where it is eventually consumed by RETURN_VALUE.
*/
bool init_pass_self;
} CallShape;

static inline bool
Expand Down Expand Up @@ -1619,6 +1631,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyCFrame cframe;
CallShape call_shape;
call_shape.kwnames = NULL; // Borrowed reference. Reset by CALL instructions.
call_shape.init_pass_self = 0;

/* WARNING: Because the _PyCFrame lives on the C stack,
* but can be accessed from a heap allocated object (tstate)
Expand Down Expand Up @@ -2391,6 +2404,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

TARGET(RETURN_VALUE) {
PyObject *retval = POP();
if (frame->self != NULL) {
if (Py_IsNone(retval)) {
Py_SETREF(retval, frame->self);
frame->self = NULL;
}
/* We need this to continue raising errors when bad-practice
__init__s return their non-None values. This is later
caught by the interpreter. */
else {
Py_CLEAR(frame->self);
}
}
assert(EMPTY());
frame->f_state = FRAME_RETURNED;
_PyFrame_SetStackPointer(frame, stack_pointer);
Expand Down Expand Up @@ -4611,6 +4636,37 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DISPATCH();
}

TARGET(PRECALL_PY_CLASS) {
_PyPrecallCache *cache = (_PyPrecallCache *)next_instr;
int is_method = (PEEK(oparg + 2) != NULL);
DEOPT_IF(is_method, PRECALL);
PyObject *cls = PEEK(oparg + 1);
DEOPT_IF(!PyType_Check(cls), PRECALL);
PyTypeObject *cls_t = (PyTypeObject *)cls;
DEOPT_IF(cls_t->tp_version_tag != read_u32(cache->type_version), PRECALL);
assert(cls_t->tp_flags & Py_TPFLAGS_HEAPTYPE);
PyObject *init = ((PyHeapTypeObject *)cls_t)->_spec_cache.init;
assert(PyFunction_Check(init));
DEOPT_IF(cls_t->tp_new != PyBaseObject_Type.tp_new, PRECALL);
STAT_INC(PRECALL, hit);

PyObject *self = _PyObject_New_Vector(cls_t, &PEEK(oparg),
(Py_ssize_t)oparg, call_shape.kwnames);
if (self == NULL) {
goto error;
}
Py_INCREF(init);
PEEK(oparg+1) = self;
PEEK(oparg+2) = init;
Py_DECREF(cls);

/* For use in RETURN_VALUE later */
assert(call_shape.init_pass_self == false);
call_shape.init_pass_self = true;
JUMPBY(INLINE_CACHE_ENTRIES_PRECALL);
DISPATCH();
}

TARGET(KW_NAMES) {
assert(call_shape.kwnames == NULL);
assert(oparg < PyTuple_GET_SIZE(consts));
Expand Down Expand Up @@ -4646,6 +4702,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
frame->f_lasti += INLINE_CACHE_ENTRIES_CALL;
new_frame->previous = frame;
cframe.current_frame = frame = new_frame;
CALL_PY_FRAME_PASS_SELF();
CALL_STAT_INC(inlined_py_calls);
goto start_frame;
}
Expand Down Expand Up @@ -4751,6 +4808,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
frame->f_lasti += INLINE_CACHE_ENTRIES_CALL;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
CALL_PY_FRAME_PASS_SELF();
goto start_frame;
}

Expand Down Expand Up @@ -4791,6 +4849,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
frame->f_lasti += INLINE_CACHE_ENTRIES_CALL;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
CALL_PY_FRAME_PASS_SELF();
goto start_frame;
}

Expand Down Expand Up @@ -5557,6 +5616,7 @@ MISS_WITH_INLINE_CACHE(STORE_SUBSCR)

error:
call_shape.kwnames = NULL;
call_shape.init_pass_self = false;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to reviewers: We don't set frame->self = NULL here because that means exceptions will destroy self. E.g. consider this:

class A:
  def __init__(self):
    try:
      A.a # Kaboom!
    except AttributeError:
      pass

for _ in range(10):
  print(A())

/* Double-check exception status. */
#ifdef NDEBUG
if (!_PyErr_Occurred(tstate)) {
Expand Down Expand Up @@ -5598,6 +5658,7 @@ MISS_WITH_INLINE_CACHE(STORE_SUBSCR)
assert(STACK_LEVEL() == 0);
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->f_state = FRAME_RAISED;
Py_CLEAR(frame->self);
TRACE_FUNCTION_UNWIND();
DTRACE_FUNCTION_EXIT();
goto exit_unwind;
Expand Down
Loading