Skip to content

gh-132775: Add _PyCode_VerifyStateless() #133221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,47 @@ PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts(
PyObject *globalsns,
PyObject *builtinsns);


/* "Stateless" code is a function or code object which does not rely on
* external state or internal state. It may rely on arguments and
* builtins, but not globals or a closure. Thus it does not rely
* on __globals__ or __closure__, and a stateless function
* is equivalent to its code object.
*
* Stateless code also does not keep any persistent state
* of its own, so it can't have any executors, monitoring,
* instrumentation, or "extras" (i.e. co_extra).
*
* Stateless code may create nested functions, including closures.
* However, nested functions must themselves be stateless, except they
* *can* close on the enclosing locals.
*
* Stateless code may return any value, including nested functions and closures.
*
* Stateless code that takes no arguments and doesn't return anything
* may be treated like a script.
*
* We consider stateless code to be "portable" if it does not return any
* any object that holds a reference to any of the code's locals. Thus
* generators and coroutines are not portable. Likewise a function
* that returns a closure is not portable. The concept of
* portability is useful in cases where the code is run
* in a different execution context than where
* the return value will be used. */

PyAPI_FUNC(int) _PyCode_CheckNoInternalState(PyCodeObject *, const char **);
PyAPI_FUNC(int) _PyCode_CheckNoExternalState(
PyCodeObject *,
_PyCode_var_counts_t *,
const char **);
PyAPI_FUNC(int) _PyCode_VerifyStateless(
PyThreadState *,
PyCodeObject *,
PyObject *globalnames,
PyObject *globalsns,
PyObject *builtinsns);

PyAPI_FUNC(int) _PyCode_CheckPureFunction(PyCodeObject *, const char **);
PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *);


Expand Down
7 changes: 7 additions & 0 deletions Include/internal/pycore_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_cod
extern PyObject *_Py_set_function_type_params(
PyThreadState* unused, PyObject *func, PyObject *type_params);


/* See pycore_code.h for explanation about what "stateless" means. */

PyAPI_FUNC(int)
_PyFunction_VerifyStateless(PyThreadState *, PyObject *);


#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_opcode_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ extern "C" {

#define IS_RETURN_OPCODE(opcode) \
(opcode == RETURN_VALUE)
#define IS_RAISE_OPCODE(opcode) \
(opcode == RAISE_VARARGS || opcode == RERAISE)


/* Flags used in the oparg for MAKE_FUNCTION */
Expand Down
63 changes: 63 additions & 0 deletions Lib/test/_code_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,40 @@ def spam_minimal():
return


def spam_with_builtins():
x = 42
values = (42,)
checks = tuple(callable(v) for v in values)
res = callable(values), tuple(values), list(values), checks
print(res)


def spam_with_globals_and_builtins():
func1 = spam
func2 = spam_minimal
funcs = (func1, func2)
checks = tuple(callable(f) for f in funcs)
res = callable(funcs), tuple(funcs), list(funcs), checks
print(res)


def spam_returns_arg(x):
return x


def spam_with_inner_not_closure():
def eggs():
pass
eggs()


def spam_with_inner_closure():
x = 42
def eggs():
print(x)
eggs()


def spam_full(a, b, /, c, d:int=1, *args, e, f:object=None, **kwargs) -> tuple:
# arg defaults, kwarg defaults
# annotations
Expand Down Expand Up @@ -98,6 +132,11 @@ def ham_C_closure(z):
TOP_FUNCTIONS = [
# shallow
spam_minimal,
spam_with_builtins,
spam_with_globals_and_builtins,
spam_returns_arg,
spam_with_inner_not_closure,
spam_with_inner_closure,
spam_full,
spam,
# outer func
Expand Down Expand Up @@ -127,6 +166,30 @@ def ham_C_closure(z):
*NESTED_FUNCTIONS,
]

STATELESS_FUNCTIONS = [
spam,
spam_minimal,
spam_with_builtins,
spam_returns_arg,
spam_with_inner_not_closure,
spam_with_inner_closure,
spam_N,
spam_C,
spam_NN,
spam_NC,
spam_CN,
spam_CC,
eggs_nested,
eggs_nested_N,
ham_nested,
ham_C_nested
]
STATELESS_CODE = [
*STATELESS_FUNCTIONS,
spam_with_globals_and_builtins,
spam_full,
]


# generators

Expand Down
95 changes: 75 additions & 20 deletions Lib/test/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@
import _testinternalcapi
except ModuleNotFoundError:
_testinternalcapi = None
import test._code_definitions as defs

COPY_FREE_VARS = opmap['COPY_FREE_VARS']

Expand Down Expand Up @@ -671,9 +672,31 @@ def test_local_kinds(self):
VARARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_POS
VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW

import test._code_definitions as defs
funcs = {
defs.spam_minimal: {},
defs.spam_with_builtins: {
'x': CO_FAST_LOCAL,
'values': CO_FAST_LOCAL,
'checks': CO_FAST_LOCAL,
'res': CO_FAST_LOCAL,
},
defs.spam_with_globals_and_builtins: {
'func1': CO_FAST_LOCAL,
'func2': CO_FAST_LOCAL,
'funcs': CO_FAST_LOCAL,
'checks': CO_FAST_LOCAL,
'res': CO_FAST_LOCAL,
},
defs.spam_returns_arg: {
'x': POSORKW,
},
defs.spam_with_inner_not_closure: {
'eggs': CO_FAST_LOCAL,
},
defs.spam_with_inner_closure: {
'x': CO_FAST_CELL,
'eggs': CO_FAST_LOCAL,
},
defs.spam_full: {
'a': POSONLY,
'b': POSONLY,
Expand Down Expand Up @@ -859,9 +882,26 @@ def new_var_counts(*,
},
}

import test._code_definitions as defs
funcs = {
defs.spam_minimal: new_var_counts(),
defs.spam_with_builtins: new_var_counts(
purelocals=4,
globalvars=4,
),
defs.spam_with_globals_and_builtins: new_var_counts(
purelocals=5,
globalvars=6,
),
defs.spam_returns_arg: new_var_counts(
posorkw=1,
),
defs.spam_with_inner_not_closure: new_var_counts(
purelocals=1,
),
defs.spam_with_inner_closure: new_var_counts(
othercells=1,
purelocals=1,
),
defs.spam_full: new_var_counts(
posonly=2,
posorkw=2,
Expand Down Expand Up @@ -958,55 +998,70 @@ def new_var_counts(*,
counts = _testinternalcapi.get_code_var_counts(func.__code__)
self.assertEqual(counts, expected)

def func_with_globals_and_builtins():
mod1 = _testinternalcapi
mod2 = dis
mods = (mod1, mod2)
checks = tuple(callable(m) for m in mods)
return callable(mod2), tuple(mods), list(mods), checks

func = func_with_globals_and_builtins
func = defs.spam_with_globals_and_builtins
with self.subTest(f'{func} code'):
expected = new_var_counts(
purelocals=4,
globalvars=5,
purelocals=5,
globalvars=6,
)
counts = _testinternalcapi.get_code_var_counts(func.__code__)
self.assertEqual(counts, expected)

with self.subTest(f'{func} with own globals and builtins'):
expected = new_var_counts(
purelocals=4,
globalvars=(2, 3),
purelocals=5,
globalvars=(2, 4),
)
counts = _testinternalcapi.get_code_var_counts(func)
self.assertEqual(counts, expected)

with self.subTest(f'{func} without globals'):
expected = new_var_counts(
purelocals=4,
globalvars=(0, 3, 2),
purelocals=5,
globalvars=(0, 4, 2),
)
counts = _testinternalcapi.get_code_var_counts(func, globalsns={})
self.assertEqual(counts, expected)

with self.subTest(f'{func} without both'):
expected = new_var_counts(
purelocals=4,
globalvars=5,
purelocals=5,
globalvars=6,
)
counts = _testinternalcapi.get_code_var_counts(func, globalsns={},
builtinsns={})
self.assertEqual(counts, expected)

with self.subTest(f'{func} without builtins'):
expected = new_var_counts(
purelocals=4,
globalvars=(2, 0, 3),
purelocals=5,
globalvars=(2, 0, 4),
)
counts = _testinternalcapi.get_code_var_counts(func, builtinsns={})
self.assertEqual(counts, expected)

@unittest.skipIf(_testinternalcapi is None, "missing _testinternalcapi")
def test_stateless(self):
self.maxDiff = None

for func in defs.STATELESS_CODE:
with self.subTest((func, '(code)')):
_testinternalcapi.verify_stateless_code(func.__code__)
for func in defs.STATELESS_FUNCTIONS:
with self.subTest((func, '(func)')):
_testinternalcapi.verify_stateless_code(func)

for func in defs.FUNCTIONS:
if func not in defs.STATELESS_CODE:
with self.subTest((func, '(code)')):
with self.assertRaises(Exception):
_testinternalcapi.verify_stateless_code(func.__code__)

if func not in defs.STATELESS_FUNCTIONS:
with self.subTest((func, '(func)')):
with self.assertRaises(Exception):
_testinternalcapi.verify_stateless_code(func)


def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])
Expand Down
44 changes: 44 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,48 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs)
return NULL;
}

static PyObject *
verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs)
{
PyThreadState *tstate = _PyThreadState_GET();
PyObject *codearg;
PyObject *globalnames = NULL;
PyObject *globalsns = NULL;
PyObject *builtinsns = NULL;
static char *kwlist[] = {"code", "globalnames",
"globalsns", "builtinsns", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"O|O!O!O!:get_code_var_counts", kwlist,
&codearg, &PySet_Type, &globalnames,
&PyDict_Type, &globalsns, &PyDict_Type, &builtinsns))
{
return NULL;
}
if (PyFunction_Check(codearg)) {
if (globalsns == NULL) {
globalsns = PyFunction_GET_GLOBALS(codearg);
}
if (builtinsns == NULL) {
builtinsns = PyFunction_GET_BUILTINS(codearg);
}
codearg = PyFunction_GET_CODE(codearg);
}
else if (!PyCode_Check(codearg)) {
PyErr_SetString(PyExc_TypeError,
"argument must be a code object or a function");
return NULL;
}
PyCodeObject *code = (PyCodeObject *)codearg;

if (_PyCode_VerifyStateless(
tstate, code, globalnames, globalsns, builtinsns) < 0)
{
return NULL;
}
Py_RETURN_NONE;
}


static PyObject *
jit_enabled(PyObject *self, PyObject *arg)
{
Expand Down Expand Up @@ -2288,6 +2330,8 @@ static PyMethodDef module_functions[] = {
{"get_co_localskinds", get_co_localskinds, METH_O, NULL},
{"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts),
METH_VARARGS | METH_KEYWORDS, NULL},
{"verify_stateless_code", _PyCFunction_CAST(verify_stateless_code),
METH_VARARGS | METH_KEYWORDS, NULL},
{"jit_enabled", jit_enabled, METH_NOARGS, NULL},
#ifdef _Py_TIER2
{"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL},
Expand Down
Loading