Skip to content

bpo-43760: Speed up check for tracing in interpreter dispatch #25276

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,21 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *);
#define PyTrace_OPCODE 7


typedef struct _cframe {
/* This struct will be threaded through the C stack
* allowing fast access to per-thread state that needs
* to be accessed quickly by the interpreter, but can
* be modified outside of the interpreter.
*
* WARNING: This makes data on the C stack accessible from
* heap objects. Care must be taken to maintain stack
* discipline and make sure that instances of this struct cannot
* accessed outside of their lifetime.
*/
int use_tracing;
struct _cframe *previous;
} CFrame;

typedef struct _err_stackitem {
/* This struct represents an entry on the exception stack, which is a
* per-coroutine state. (Coroutine in the computer science sense,
Expand Down Expand Up @@ -61,7 +76,10 @@ struct _ts {
This is to prevent the actual trace/profile code from being recorded in
the trace/profile. */
int tracing;
int use_tracing;

/* Pointer to current CFrame in the C stack frame of the currently,
* or most recently, executing _PyEval_EvalFrameDefault. */
CFrame *cframe;

Py_tracefunc c_profilefunc;
Py_tracefunc c_tracefunc;
Expand Down Expand Up @@ -129,6 +147,8 @@ struct _ts {
/* Unique thread state id. */
uint64_t id;

CFrame root_cframe;

/* XXX signal handlers should also be here */

};
Expand Down
6 changes: 0 additions & 6 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,6 @@ struct _pending_calls {

struct _ceval_state {
int recursion_limit;
/* Records whether tracing is on for any thread. Counts the number
of threads for which tstate->c_tracefunc is non-NULL, so if the
value is 0, we know we don't have to check this thread's
c_tracefunc. This speeds up the if statement in
_PyEval_EvalFrameDefault() after fast_next_opcode. */
int tracing_possible;
/* This single variable consolidates all requests to break out of
the fast path in the eval loop. */
_Py_atomic_int eval_breaker;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Move the flag for checking whether tracing is enabled to the C stack, from the heap.
Should speed up dispatch in the interpreter.
55 changes: 32 additions & 23 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ typedef struct {
PyCodeObject *code; // The code object for the bounds. May be NULL.
int instr_prev; // Only valid if code != NULL.
PyCodeAddressRange bounds; // Only valid if code != NULL.
CFrame cframe;
} PyTraceInfo;


Expand Down Expand Up @@ -1110,8 +1111,6 @@ match_class(PyThreadState *tstate, PyObject *subject, PyObject *type,
static int do_raise(PyThreadState *tstate, PyObject *exc, PyObject *cause);
static int unpack_iterable(PyThreadState *, PyObject *, int, int, PyObject **);

#define _Py_TracingPossible(ceval) ((ceval)->tracing_possible)


PyObject *
PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
Expand Down Expand Up @@ -1308,7 +1307,7 @@ eval_frame_handle_pending(PyThreadState *tstate)

#define DISPATCH() \
{ \
if (_Py_TracingPossible(ceval2) OR_DTRACE_LINE OR_LLTRACE) { \
if (trace_info.cframe.use_tracing OR_DTRACE_LINE OR_LLTRACE) { \
goto tracing_dispatch; \
} \
f->f_lasti = INSTR_OFFSET(); \
Expand Down Expand Up @@ -1596,8 +1595,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
int oparg; /* Current opcode argument, if any */
PyObject **fastlocals, **freevars;
PyObject *retval = NULL; /* Return value */
struct _ceval_state * const ceval2 = &tstate->interp->ceval;
_Py_atomic_int * const eval_breaker = &ceval2->eval_breaker;
_Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker;
PyCodeObject *co;

const _Py_CODEUNIT *first_instr;
Expand All @@ -1617,11 +1615,20 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
/* Mark trace_info as uninitialized */
trace_info.code = NULL;

/* WARNING: Because the CFrame lives on the C stack,
* but can be accessed from a heap allocated object (tstate)
* strict stack discipline must be maintained.
*/
CFrame *prev_cframe = tstate->cframe;
trace_info.cframe.use_tracing = prev_cframe->use_tracing;
trace_info.cframe.previous = prev_cframe;
tstate->cframe = &trace_info.cframe;

/* push frame */
tstate->frame = f;
co = f->f_code;

if (tstate->use_tracing) {
if (trace_info.cframe.use_tracing) {
if (tstate->c_tracefunc != NULL) {
/* tstate->c_tracefunc, if defined, is a
function that will be called on *every* entry
Expand Down Expand Up @@ -1783,7 +1790,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)

/* line-by-line tracing support */

if (_Py_TracingPossible(ceval2) &&
if (trace_info.cframe.use_tracing &&
tstate->c_tracefunc != NULL && !tstate->tracing) {
int err;
/* see maybe_call_line_trace()
Expand Down Expand Up @@ -4544,7 +4551,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
PUSH(val);
PUSH(exc);
JUMPTO(handler);
if (_Py_TracingPossible(ceval2)) {
if (trace_info.cframe.use_tracing) {
trace_info.instr_prev = INT_MAX;
}
/* Resume normal execution */
Expand All @@ -4568,7 +4575,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
f->f_stackdepth = 0;
f->f_state = FRAME_RAISED;
exiting:
if (tstate->use_tracing) {
if (trace_info.cframe.use_tracing) {
if (tstate->c_tracefunc) {
if (call_trace_protected(tstate->c_tracefunc, tstate->c_traceobj,
tstate, f, &trace_info, PyTrace_RETURN, retval)) {
Expand All @@ -4585,6 +4592,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)

/* pop frame */
exit_eval_frame:
/* Restore previous cframe */
tstate->cframe = trace_info.cframe.previous;
tstate->cframe->use_tracing = trace_info.cframe.use_tracing;

if (PyDTrace_FUNCTION_RETURN_ENABLED())
dtrace_function_return(f);
_Py_LeaveRecursiveCall(tstate);
Expand Down Expand Up @@ -5508,7 +5519,7 @@ call_trace(Py_tracefunc func, PyObject *obj,
if (tstate->tracing)
return 0;
tstate->tracing++;
tstate->use_tracing = 0;
tstate->cframe->use_tracing = 0;
if (frame->f_lasti < 0) {
frame->f_lineno = frame->f_code->co_firstlineno;
}
Expand All @@ -5518,7 +5529,7 @@ call_trace(Py_tracefunc func, PyObject *obj,
}
result = func(obj, frame, what, arg);
frame->f_lineno = 0;
tstate->use_tracing = ((tstate->c_tracefunc != NULL)
tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL)
|| (tstate->c_profilefunc != NULL));
tstate->tracing--;
return result;
Expand All @@ -5529,15 +5540,15 @@ _PyEval_CallTracing(PyObject *func, PyObject *args)
{
PyThreadState *tstate = _PyThreadState_GET();
int save_tracing = tstate->tracing;
int save_use_tracing = tstate->use_tracing;
int save_use_tracing = tstate->cframe->use_tracing;
PyObject *result;

tstate->tracing = 0;
tstate->use_tracing = ((tstate->c_tracefunc != NULL)
tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL)
|| (tstate->c_profilefunc != NULL));
result = PyObject_Call(func, args, NULL);
tstate->tracing = save_tracing;
tstate->use_tracing = save_use_tracing;
tstate->cframe->use_tracing = save_use_tracing;
return result;
}

Expand Down Expand Up @@ -5591,15 +5602,15 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
tstate->c_profilefunc = NULL;
tstate->c_profileobj = NULL;
/* Must make sure that tracing is not ignored if 'profileobj' is freed */
tstate->use_tracing = tstate->c_tracefunc != NULL;
tstate->cframe->use_tracing = tstate->c_tracefunc != NULL;
Py_XDECREF(profileobj);

Py_XINCREF(arg);
tstate->c_profileobj = arg;
tstate->c_profilefunc = func;

/* Flag that tracing or profiling is turned on */
tstate->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL);
tstate->cframe->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL);
return 0;
}

Expand Down Expand Up @@ -5627,22 +5638,20 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
return -1;
}

struct _ceval_state *ceval2 = &tstate->interp->ceval;
PyObject *traceobj = tstate->c_traceobj;
ceval2->tracing_possible += (func != NULL) - (tstate->c_tracefunc != NULL);

tstate->c_tracefunc = NULL;
tstate->c_traceobj = NULL;
/* Must make sure that profiling is not ignored if 'traceobj' is freed */
tstate->use_tracing = (tstate->c_profilefunc != NULL);
tstate->cframe->use_tracing = (tstate->c_profilefunc != NULL);
Py_XDECREF(traceobj);

Py_XINCREF(arg);
tstate->c_traceobj = arg;
tstate->c_tracefunc = func;

/* Flag that tracing or profiling is turned on */
tstate->use_tracing = ((func != NULL)
tstate->cframe->use_tracing = ((func != NULL)
|| (tstate->c_profilefunc != NULL));

return 0;
Expand Down Expand Up @@ -5837,7 +5846,7 @@ PyEval_GetFuncDesc(PyObject *func)
}

#define C_TRACE(x, call) \
if (tstate->use_tracing && tstate->c_profilefunc) { \
if (trace_info->cframe.use_tracing && tstate->c_profilefunc) { \
if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
tstate, tstate->frame, trace_info, \
PyTrace_C_CALL, func)) { \
Expand Down Expand Up @@ -5918,7 +5927,7 @@ call_function(PyThreadState *tstate,
Py_ssize_t nargs = oparg - nkwargs;
PyObject **stack = (*pp_stack) - nargs - nkwargs;

if (tstate->use_tracing) {
if (trace_info->cframe.use_tracing) {
x = trace_call_function(tstate, trace_info, func, stack, nargs, kwnames);
}
else {
Expand Down Expand Up @@ -5951,7 +5960,7 @@ do_call_core(PyThreadState *tstate,
}
else if (Py_IS_TYPE(func, &PyMethodDescr_Type)) {
Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
if (nargs > 0 && tstate->use_tracing) {
if (nargs > 0 && trace_info->cframe.use_tracing) {
/* We need to create a temporary bound method as argument
for profiling.

Expand Down
3 changes: 2 additions & 1 deletion Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,8 @@ new_threadstate(PyInterpreterState *interp, int init)
tstate->recursion_headroom = 0;
tstate->stackcheck_counter = 0;
tstate->tracing = 0;
tstate->use_tracing = 0;
tstate->root_cframe.use_tracing = 0;
tstate->cframe = &tstate->root_cframe;
tstate->gilstate_counter = 0;
tstate->async_exc = NULL;
tstate->thread_id = PyThread_get_thread_ident();
Expand Down
8 changes: 4 additions & 4 deletions Python/sysmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ sys_audit_tstate(PyThreadState *ts, const char *event,

/* Disallow tracing in hooks unless explicitly enabled */
ts->tracing++;
ts->use_tracing = 0;
ts->cframe->use_tracing = 0;
while ((hook = PyIter_Next(hooks)) != NULL) {
_Py_IDENTIFIER(__cantrace__);
PyObject *o;
Expand All @@ -265,22 +265,22 @@ sys_audit_tstate(PyThreadState *ts, const char *event,
break;
}
if (canTrace) {
ts->use_tracing = (ts->c_tracefunc || ts->c_profilefunc);
ts->cframe->use_tracing = (ts->c_tracefunc || ts->c_profilefunc);
ts->tracing--;
}
PyObject* args[2] = {eventName, eventArgs};
o = _PyObject_FastCallTstate(ts, hook, args, 2);
if (canTrace) {
ts->tracing++;
ts->use_tracing = 0;
ts->cframe->use_tracing = 0;
}
if (!o) {
break;
}
Py_DECREF(o);
Py_CLEAR(hook);
}
ts->use_tracing = (ts->c_tracefunc || ts->c_profilefunc);
ts->cframe->use_tracing = (ts->c_tracefunc || ts->c_profilefunc);
ts->tracing--;
if (_PyErr_Occurred(ts)) {
goto exit;
Expand Down