From 2f6ca5874000bb0c1d1383e651c0be6d840b8000 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 12 Mar 2024 14:08:22 -0700 Subject: [PATCH 1/7] tracing thread safety --- Include/internal/pycore_ceval_state.h | 1 + .../internal/pycore_pyatomic_ft_wrappers.h | 7 + Lib/test/test_free_threading/__init__.py | 7 + .../test_free_threading/test_monitoring.py | 232 ++++++++++++++++++ Makefile.pre.in | 1 + Python/instrumentation.c | 214 +++++++++++----- Python/legacy_tracing.c | 95 ++++--- Python/pystate.c | 1 + 8 files changed, 472 insertions(+), 86 deletions(-) create mode 100644 Lib/test/test_free_threading/__init__.py create mode 100644 Lib/test/test_free_threading/test_monitoring.py diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index b453328f15649e..168295534e036c 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -63,6 +63,7 @@ struct _ceval_runtime_state { } perf; /* Pending calls to be made only on the main thread. */ struct _pending_calls pending_mainthread; + PyMutex sys_trace_profile_mutex; }; #ifdef PY_HAVE_PERF_TRAMPOLINE diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 2514f51f1b0086..3f0c413ad31b5b 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -26,20 +26,27 @@ extern "C" { _Py_atomic_load_ssize_relaxed(&value) #define FT_ATOMIC_STORE_PTR(value, new_value) \ _Py_atomic_store_ptr(&value, new_value) +#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) \ + _Py_atomic_load_ptr_acquire(&value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ _Py_atomic_store_ptr_release(&value, new_value) #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ + _Py_atomic_store_uint8_relaxed(&value, new_value) #else #define FT_ATOMIC_LOAD_PTR(value) value #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value #define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value + #endif #ifdef __cplusplus diff --git a/Lib/test/test_free_threading/__init__.py b/Lib/test/test_free_threading/__init__.py new file mode 100644 index 00000000000000..9a89d27ba9f979 --- /dev/null +++ b/Lib/test/test_free_threading/__init__.py @@ -0,0 +1,7 @@ +import os + +from test import support + + +def load_tests(*args): + return support.load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py new file mode 100644 index 00000000000000..b10a0e39053c1c --- /dev/null +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -0,0 +1,232 @@ +"""Tests monitoring, sys.settrace, and sys.setprofile in a multi-threaded +environmenet to verify things are thread-safe in a free-threaded build""" + +import sys +import time +import unittest +import weakref + +from sys import monitoring +from test.support import is_wasi +from threading import Thread +from unittest import TestCase + + +class InstrumentationMultiThreadedMixin: + if not hasattr(sys, "gettotalrefcount"): + thread_count = 50 + func_count = 1000 + fib = 15 + else: + # Run a little faster in debug builds... + thread_count = 25 + func_count = 500 + fib = 15 + + def after_threads(self): + """Runs once after all the threads have started""" + pass + + def during_threads(self): + """Runs repeatedly while the threads are still running""" + pass + + def work(self, n, funcs): + """Fibonacci function which also calls a bunch of random functions""" + for func in funcs: + func() + if n < 2: + return n + return self.work(n - 1, funcs) + self.work(n - 2, funcs) + + def start_work(self, n, funcs): + # With the GIL builds we need to make sure that the hooks have + # a chance to run as it's possible to run w/o releasing the GIL. + time.sleep(1) + self.work(n, funcs) + + def after_test(self): + """Runs once after the test is done""" + pass + + def test_instrumention(self): + # Setup a bunch of functions which will need instrumentation... + funcs = [] + for i in range(self.func_count): + x = {} + exec("def f(): pass", x) + funcs.append(x["f"]) + + threads = [] + for i in range(self.thread_count): + # Each thread gets a copy of the func list to avoid contention + t = Thread(target=self.start_work, args=(self.fib, list(funcs))) + t.start() + threads.append(t) + + self.after_threads() + + while True: + any_alive = False + for t in threads: + if t.is_alive(): + any_alive = True + break + + if not any_alive: + break + + self.during_threads() + + self.after_test() + + +class MonitoringTestMixin: + def setUp(self): + for i in range(6): + if monitoring.get_tool(i) is None: + self.tool_id = i + monitoring.use_tool_id(i, self.__class__.__name__) + break + + def tearDown(self): + monitoring.free_tool_id(self.tool_id) + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class SetPreTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): + """Sets tracing one time after the threads have started""" + + def setUp(self): + super().setUp() + self.called = False + + def after_test(self): + self.assertTrue(self.called) + + def trace_func(self, frame, event, arg): + self.called = True + return self.trace_func + + def after_threads(self): + sys.settrace(self.trace_func) + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class MonitoringMultiThreaded( + MonitoringTestMixin, InstrumentationMultiThreadedMixin, TestCase +): + """Uses sys.monitoring and repeatedly toggles instrumentation on and off""" + + def setUp(self): + super().setUp() + self.set = False + self.called = False + monitoring.register_callback( + self.tool_id, monitoring.events.LINE, self.callback + ) + + def tearDown(self): + monitoring.set_events(self.tool_id, 0) + super().tearDown() + + def callback(self, *args): + self.called = True + + def after_test(self): + self.assertTrue(self.called) + + def during_threads(self): + if self.set: + monitoring.set_events( + self.tool_id, monitoring.events.CALL | monitoring.events.LINE + ) + else: + monitoring.set_events(self.tool_id, 0) + self.set = not self.set + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class SetTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): + """Uses sys.settrace and repeatedly toggles instrumentation on and off""" + + def setUp(self): + self.set = False + self.called = False + + def after_test(self): + self.assertTrue(self.called) + + def tearDown(self): + sys.settrace(None) + + def trace_func(self, frame, event, arg): + self.called = True + return self.trace_func + + def during_threads(self): + if self.set: + sys.settrace(self.trace_func) + else: + sys.settrace(None) + self.set = not self.set + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): + """Uses sys.setprofile and repeatedly toggles instrumentation on and off""" + thread_count = 25 + func_count = 200 + fib = 15 + + def setUp(self): + self.set = False + self.called = False + + def after_test(self): + self.assertTrue(self.called) + + def tearDown(self): + sys.setprofile(None) + + def trace_func(self, frame, event, arg): + self.called = True + return self.trace_func + + def during_threads(self): + if self.set: + sys.setprofile(self.trace_func) + else: + sys.setprofile(None) + self.set = not self.set + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class MonitoringMisc(MonitoringTestMixin, TestCase): + def register_callback(self): + def callback(*args): + pass + + for i in range(200): + monitoring.register_callback(self.tool_id, monitoring.events.LINE, callback) + + self.refs.append(weakref.ref(callback)) + + def test_register_callback(self): + self.refs = [] + threads = [] + for i in range(50): + t = Thread(target=self.register_callback) + t.start() + threads.append(t) + + for thread in threads: + thread.join() + + monitoring.register_callback(self.tool_id, monitoring.events.LINE, None) + for ref in self.refs: + self.assertEqual(ref(), None) + + +if __name__ == "__main__": + unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index fd8678cdaf8207..f7c21a380caa99 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2366,6 +2366,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_doctest \ test/test_email \ test/test_email/data \ + test/test_free_threading \ test/test_future_stmt \ test/test_gdb \ test/test_import \ diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 3866144a19bf74..4b743135f6d203 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -6,6 +6,7 @@ #include "pycore_call.h" #include "pycore_ceval.h" // _PY_EVAL_EVENTS_BITS #include "pycore_code.h" // _PyCode_Clear_Executors() +#include "pycore_critical_section.h" #include "pycore_frame.h" #include "pycore_interp.h" #include "pycore_long.h" @@ -13,12 +14,22 @@ #include "pycore_namespace.h" #include "pycore_object.h" #include "pycore_opcode_metadata.h" // IS_VALID_OPCODE, _PyOpcode_Caches +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_UINT8_RELAXED #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() /* Uncomment this to dump debugging output when assertions fail */ // #define INSTRUMENT_DEBUG 1 +#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) +#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) \ + if (!_PyInterpreterState_GET()->stoptheworld.world_stopped) { \ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); \ + } +#else +#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) +#endif + PyObject _PyInstrumentation_DISABLE = _PyObject_HEAD_INIT(&PyBaseObject_Type); PyObject _PyInstrumentation_MISSING = _PyObject_HEAD_INIT(&PyBaseObject_Type); @@ -275,17 +286,39 @@ compute_line(PyCodeObject *code, int offset, int8_t line_delta) return PyCode_Addr2Line(code, offset * sizeof(_Py_CODEUNIT)); } +static inline _PyCoMonitoringData * +get_monitoring(PyCodeObject *code) { + return FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring); +} + +static inline _PyCoLineInstrumentationData * +get_lines_data(PyCodeObject *code) +{ + _PyCoMonitoringData *monitoring = get_monitoring(code); + return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->lines); +} + +static inline uint8_t * +get_per_instruction_opcodes(PyCodeObject *code) +{ + _PyCoMonitoringData *monitoring = get_monitoring(code); + return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->per_instruction_opcodes); +} + int _PyInstruction_GetLength(PyCodeObject *code, int offset) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + + _PyCoMonitoringData *monitoring = get_monitoring(code); int opcode = _PyCode_CODE(code)[offset].op.code; assert(opcode != 0); assert(opcode != RESERVED); if (opcode == INSTRUMENTED_LINE) { - opcode = code->_co_monitoring->lines[offset].original_opcode; + opcode = monitoring->lines[offset].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode = code->_co_monitoring->per_instruction_opcodes[offset]; + opcode = monitoring->per_instruction_opcodes[offset]; } int deinstrumented = DE_INSTRUMENT[opcode]; if (deinstrumented) { @@ -424,6 +457,7 @@ dump_instrumentation_data(PyCodeObject *code, int star, FILE*out) } #define CHECK(test) do { \ + ASSERT_WORLD_STOPPED_OR_LOCKED(code); \ if (!(test)) { \ dump_instrumentation_data(code, i, stderr); \ } \ @@ -449,6 +483,8 @@ valid_opcode(int opcode) static void sanity_check_instrumentation(PyCodeObject *code) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + _PyCoMonitoringData *data = code->_co_monitoring; if (data == NULL) { return; @@ -551,10 +587,12 @@ int _Py_GetBaseOpcode(PyCodeObject *code, int i) { int opcode = _PyCode_CODE(code)[i].op.code; if (opcode == INSTRUMENTED_LINE) { - opcode = code->_co_monitoring->lines[i].original_opcode; + _PyCoLineInstrumentationData *lines = get_lines_data(code); + opcode = lines[i].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode = code->_co_monitoring->per_instruction_opcodes[i]; + uint8_t *per_instr_opcodes = get_per_instruction_opcodes(code); + opcode = per_instr_opcodes[i]; } CHECK(opcode != INSTRUMENTED_INSTRUCTION); CHECK(opcode != INSTRUMENTED_LINE); @@ -588,7 +626,7 @@ de_instrument(PyCodeObject *code, int i, int event) return; } CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented); - *opcode_ptr = deinstrumented; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, deinstrumented); if (_PyOpcode_Caches[deinstrumented]) { instr[1].counter = adaptive_counter_warmup(); } @@ -632,7 +670,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i) int original_opcode = code->_co_monitoring->per_instruction_opcodes[i]; CHECK(original_opcode != 0); CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); - *opcode_ptr = original_opcode; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, original_opcode); if (_PyOpcode_Caches[original_opcode]) { instr[1].counter = adaptive_counter_warmup(); } @@ -665,7 +703,7 @@ instrument(PyCodeObject *code, int i) int deopt = _PyOpcode_Deopt[opcode]; int instrumented = INSTRUMENTED_OPCODES[deopt]; assert(instrumented); - *opcode_ptr = instrumented; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, instrumented); if (_PyOpcode_Caches[deopt]) { instr[1].counter = adaptive_counter_warmup(); } @@ -683,7 +721,7 @@ instrument_line(PyCodeObject *code, int i) _PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; lines->original_opcode = _PyOpcode_Deopt[opcode]; CHECK(lines->original_opcode > 0); - *opcode_ptr = INSTRUMENTED_LINE; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_LINE); } static void @@ -712,12 +750,13 @@ instrument_per_instruction(PyCodeObject *code, int i) code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode]; } assert(code->_co_monitoring->per_instruction_opcodes[i] > 0); - *opcode_ptr = INSTRUMENTED_INSTRUCTION; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_INSTRUCTION); } static void remove_tools(PyCodeObject * code, int offset, int event, int tools) { + Py_BEGIN_CRITICAL_SECTION(code); assert(event != PY_MONITORING_EVENT_LINE); assert(event != PY_MONITORING_EVENT_INSTRUCTION); assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event)); @@ -737,6 +776,7 @@ remove_tools(PyCodeObject * code, int offset, int event, int tools) de_instrument(code, offset, event); } } + Py_END_CRITICAL_SECTION(); } #ifndef NDEBUG @@ -752,6 +792,7 @@ tools_is_subset_for_event(PyCodeObject * code, int event, int tools) static void remove_line_tools(PyCodeObject * code, int offset, int tools) { + Py_BEGIN_CRITICAL_SECTION(code); assert(code->_co_monitoring); if (code->_co_monitoring->line_tools) { @@ -769,11 +810,13 @@ remove_line_tools(PyCodeObject * code, int offset, int tools) de_instrument_line(code, offset); } } + Py_END_CRITICAL_SECTION(); } static void add_tools(PyCodeObject * code, int offset, int event, int tools) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); assert(event != PY_MONITORING_EVENT_LINE); assert(event != PY_MONITORING_EVENT_INSTRUCTION); assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event)); @@ -794,6 +837,8 @@ add_tools(PyCodeObject * code, int offset, int event, int tools) static void add_line_tools(PyCodeObject * code, int offset, int tools) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(tools_is_subset_for_event(code, PY_MONITORING_EVENT_LINE, tools)); assert(code->_co_monitoring); if (code->_co_monitoring->line_tools) { @@ -810,6 +855,8 @@ add_line_tools(PyCodeObject * code, int offset, int tools) static void add_per_instruction_tools(PyCodeObject * code, int offset, int tools) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(tools_is_subset_for_event(code, PY_MONITORING_EVENT_INSTRUCTION, tools)); assert(code->_co_monitoring); if (code->_co_monitoring->per_instruction_tools) { @@ -826,6 +873,8 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools) static void remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) { + Py_BEGIN_CRITICAL_SECTION(code); + assert(code->_co_monitoring); if (code->_co_monitoring->per_instruction_tools) { uint8_t *toolsptr = &code->_co_monitoring->per_instruction_tools[offset]; @@ -842,6 +891,7 @@ remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) de_instrument_per_instruction(code, offset); } } + Py_END_CRITICAL_SECTION(); } @@ -969,8 +1019,9 @@ get_tools_for_instruction(PyCodeObject *code, PyInterpreterState *interp, int i, if (PY_MONITORING_IS_INSTRUMENTED_EVENT(event)) { CHECK(is_version_up_to_date(code, interp)); CHECK(instrumentation_cross_checks(interp, code)); - if (code->_co_monitoring->tools) { - tools = code->_co_monitoring->tools[i]; + uint8_t *monitoring_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring->tools); + if (monitoring_tools) { + tools = monitoring_tools[i]; } else { tools = code->_co_monitoring->active_monitors.tools[event]; @@ -1149,7 +1200,7 @@ _Py_call_instrumentation_exc2( int _Py_Instrumentation_GetLine(PyCodeObject *code, int index) { - _PyCoMonitoringData *monitoring = code->_co_monitoring; + _PyCoMonitoringData *monitoring = get_monitoring(code); assert(monitoring != NULL); assert(monitoring->lines != NULL); assert(index >= code->_co_firsttraceable); @@ -1168,7 +1219,7 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, assert(instrumentation_cross_checks(tstate->interp, code)); int i = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *monitoring = code->_co_monitoring; + _PyCoMonitoringData *monitoring = get_monitoring(code); _PyCoLineInstrumentationData *line_data = &monitoring->lines[i]; if (tstate->tracing) { goto done; @@ -1189,10 +1240,12 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, goto done; } } - uint8_t tools = code->_co_monitoring->line_tools != NULL ? - code->_co_monitoring->line_tools[i] : + + uint8_t *line_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->line_tools); + uint8_t tools = line_tools != NULL ? + line_tools[i] : (interp->monitors.tools[PY_MONITORING_EVENT_LINE] | - code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] + monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] ); /* Special case sys.settrace to avoid boxing the line number, * only to immediately unbox it. */ @@ -1269,15 +1322,17 @@ _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* assert(is_version_up_to_date(code, tstate->interp)); assert(instrumentation_cross_checks(tstate->interp, code)); int offset = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *instrumentation_data = code->_co_monitoring; - assert(instrumentation_data->per_instruction_opcodes); - int next_opcode = instrumentation_data->per_instruction_opcodes[offset]; + _PyCoMonitoringData *instr_data = get_monitoring(code); + uint8_t *per_instr_opcodes = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_opcodes); + assert(per_instr_opcodes); + int next_opcode = per_instr_opcodes[offset]; if (tstate->tracing) { return next_opcode; } PyInterpreterState *interp = tstate->interp; - uint8_t tools = instrumentation_data->per_instruction_tools != NULL ? - instrumentation_data->per_instruction_tools[offset] : + uint8_t *per_instr_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_tools); + uint8_t tools = per_instr_tools != NULL ? + per_instr_tools[offset] : (interp->monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] | code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] ); @@ -1320,15 +1375,23 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj) PyInterpreterState *is = _PyInterpreterState_GET(); assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); assert(0 <= event_id && event_id < _PY_MONITORING_EVENTS); +#ifdef Py_GIL_DISABLED + PyObject *callback = _Py_atomic_exchange_ptr( + &is->monitoring_callables[tool_id][event_id], + Py_XNewRef(obj) + ); +#else PyObject *callback = is->monitoring_callables[tool_id][event_id]; is->monitoring_callables[tool_id][event_id] = Py_XNewRef(obj); +#endif return callback; } static void -initialize_tools(PyCodeObject *code) +initialize_tools(PyCodeObject *code, uint8_t* tools) { - uint8_t* tools = code->_co_monitoring->tools; + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(tools != NULL); int code_len = (int)Py_SIZE(code); for (int i = 0; i < code_len; i++) { @@ -1382,9 +1445,10 @@ initialize_tools(PyCodeObject *code) #define NO_LINE -128 static void -initialize_lines(PyCodeObject *code) +initialize_lines(PyCodeObject *code, _PyCoLineInstrumentationData *line_data) { - _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(line_data != NULL); int code_len = (int)Py_SIZE(code); PyCodeAddressRange range; @@ -1499,9 +1563,10 @@ initialize_lines(PyCodeObject *code) } static void -initialize_line_tools(PyCodeObject *code, _Py_LocalMonitors *all_events) +initialize_line_tools(PyCodeObject *code, uint8_t *line_tools, _Py_LocalMonitors *all_events) { - uint8_t *line_tools = code->_co_monitoring->line_tools; + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(line_tools != NULL); int code_len = (int)Py_SIZE(code); for (int i = 0; i < code_len; i++) { @@ -1514,18 +1579,19 @@ allocate_instrumentation_data(PyCodeObject *code) { if (code->_co_monitoring == NULL) { - code->_co_monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); - if (code->_co_monitoring == NULL) { + _PyCoMonitoringData *monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); + if (monitoring == NULL) { PyErr_NoMemory(); return -1; } - code->_co_monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; - code->_co_monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; - code->_co_monitoring->tools = NULL; - code->_co_monitoring->lines = NULL; - code->_co_monitoring->line_tools = NULL; - code->_co_monitoring->per_instruction_opcodes = NULL; - code->_co_monitoring->per_instruction_tools = NULL; + monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; + monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; + monitoring->tools = NULL; + monitoring->lines = NULL; + monitoring->line_tools = NULL; + monitoring->per_instruction_opcodes = NULL; + monitoring->per_instruction_tools = NULL; + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring, monitoring); } return 0; } @@ -1533,6 +1599,8 @@ allocate_instrumentation_data(PyCodeObject *code) static int update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + int code_len = (int)Py_SIZE(code); if (allocate_instrumentation_data(code)) { return -1; @@ -1542,61 +1610,70 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) code->_co_monitoring->local_monitors); bool multitools = multiple_tools(&all_events); if (code->_co_monitoring->tools == NULL && multitools) { - code->_co_monitoring->tools = PyMem_Malloc(code_len); - if (code->_co_monitoring->tools == NULL) { + uint8_t *tools = PyMem_Malloc(code_len); + if (tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_tools(code); + initialize_tools(code, tools); + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->tools, tools); } if (all_events.tools[PY_MONITORING_EVENT_LINE]) { if (code->_co_monitoring->lines == NULL) { - code->_co_monitoring->lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (code->_co_monitoring->lines == NULL) { + _PyCoLineInstrumentationData *lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (lines == NULL) { PyErr_NoMemory(); return -1; } - initialize_lines(code); + initialize_lines(code, lines); + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->lines, lines); } if (multitools && code->_co_monitoring->line_tools == NULL) { - code->_co_monitoring->line_tools = PyMem_Malloc(code_len); - if (code->_co_monitoring->line_tools == NULL) { + uint8_t *line_tools = PyMem_Malloc(code_len); + if (line_tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_line_tools(code, &all_events); + initialize_line_tools(code, line_tools, &all_events); + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->line_tools, line_tools); } } if (all_events.tools[PY_MONITORING_EVENT_INSTRUCTION]) { if (code->_co_monitoring->per_instruction_opcodes == NULL) { - code->_co_monitoring->per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (code->_co_monitoring->per_instruction_opcodes == NULL) { + uint8_t *per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (per_instruction_opcodes == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - code->_co_monitoring->per_instruction_opcodes[i] = 0; + per_instruction_opcodes[i] = 0; } + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_opcodes, + per_instruction_opcodes); } if (multitools && code->_co_monitoring->per_instruction_tools == NULL) { - code->_co_monitoring->per_instruction_tools = PyMem_Malloc(code_len); - if (code->_co_monitoring->per_instruction_tools == NULL) { + uint8_t *per_instruction_tools = PyMem_Malloc(code_len); + if (per_instruction_tools == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - code->_co_monitoring->per_instruction_tools[i] = 0; + per_instruction_tools[i] = 0; } + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_tools, + per_instruction_tools); } } return 0; } -int -_Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) +static int +instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + if (is_version_up_to_date(code, interp)) { assert( interp->ceval.instrumentation_version == 0 || @@ -1736,6 +1813,16 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) return 0; } +int +_Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(code); + res = instrument_lock_held(code, interp); + Py_END_CRITICAL_SECTION(); + return res; +} + #define C_RETURN_EVENTS \ ((1 << PY_MONITORING_EVENT_C_RETURN) | \ (1 << PY_MONITORING_EVENT_C_RAISE)) @@ -1746,6 +1833,10 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) static int instrument_all_executing_code_objects(PyInterpreterState *interp) { +#ifdef Py_GIL_DISABLED + assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); +#endif + _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); PyThreadState* ts = PyInterpreterState_ThreadHead(interp); @@ -1754,7 +1845,7 @@ instrument_all_executing_code_objects(PyInterpreterState *interp) { _PyInterpreterFrame *frame = ts->current_frame; while (frame) { if (frame->owner != FRAME_OWNED_BY_CSTACK) { - if (_Py_Instrument(_PyFrame_GetCode(frame), interp)) { + if (instrument_lock_held(_PyFrame_GetCode(frame), interp)) { return -1; } } @@ -1817,19 +1908,24 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) if (check_tool(interp, tool_id)) { return -1; } + _PyEval_StopTheWorld(interp); uint32_t existing_events = get_events(&interp->monitors, tool_id); if (existing_events == events) { + _PyEval_StartTheWorld(interp); return 0; } set_events(&interp->monitors, tool_id, events); uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT; if (new_version == 0) { PyErr_Format(PyExc_OverflowError, "events set too many times"); + _PyEval_StartTheWorld(interp); return -1; } set_global_version(tstate, new_version); _Py_Executors_InvalidateAll(interp, 1); - return instrument_all_executing_code_objects(interp); + int res = instrument_all_executing_code_objects(interp); + _PyEval_StartTheWorld(interp); + return res; } int @@ -2158,15 +2254,21 @@ monitoring_restart_events_impl(PyObject *module) */ PyThreadState *tstate = _PyThreadState_GET(); PyInterpreterState *interp = tstate->interp; + + _PyEval_StopTheWorld(interp); uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT; uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT; if (new_version <= MONITORING_VERSION_INCREMENT) { PyErr_Format(PyExc_OverflowError, "events set too many times"); + _PyEval_StartTheWorld(interp); return NULL; } interp->last_restart_version = restart_version; set_global_version(tstate, new_version); - if (instrument_all_executing_code_objects(interp)) { + int res = instrument_all_executing_code_objects(interp); + _PyEval_StartTheWorld(interp); + + if (res) { return NULL; } Py_RETURN_NONE; diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c index ccbb3eb3f7c82a..ce25e725664345 100644 --- a/Python/legacy_tracing.c +++ b/Python/legacy_tracing.c @@ -16,6 +16,13 @@ typedef struct _PyLegacyEventHandler { int event; } _PyLegacyEventHandler; +#ifdef Py_GIL_DISABLED +#define LOCK_SETUP() PyMutex_Lock(&_PyRuntime.ceval.sys_trace_profile_mutex); +#define UNLOCK_SETUP() PyMutex_Unlock(&_PyRuntime.ceval.sys_trace_profile_mutex); +#else +#define LOCK_SETUP() +#define UNLOCK_SETUP() +#endif /* The Py_tracefunc function expects the following arguments: * obj: the trace object (PyObject *) * frame: the current frame (PyFrameObject *) @@ -414,19 +421,10 @@ is_tstate_valid(PyThreadState *tstate) } #endif -int -_PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +static Py_ssize_t +setup_profile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_profileobj) { - assert(is_tstate_valid(tstate)); - /* The caller must hold the GIL */ - assert(PyGILState_Check()); - - /* Call _PySys_Audit() in the context of the current thread state, - even if tstate is not the current thread state. */ - PyThreadState *current_tstate = _PyThreadState_GET(); - if (_PySys_Audit(current_tstate, "sys.setprofile", NULL) < 0) { - return -1; - } + *old_profileobj = NULL; /* Setup PEP 669 monitoring callbacks and events. */ if (!tstate->interp->sys_profile_initialized) { tstate->interp->sys_profile_initialized = true; @@ -469,25 +467,15 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) int delta = (func != NULL) - (tstate->c_profilefunc != NULL); tstate->c_profilefunc = func; - PyObject *old_profileobj = tstate->c_profileobj; + *old_profileobj = tstate->c_profileobj; tstate->c_profileobj = Py_XNewRef(arg); - Py_XDECREF(old_profileobj); tstate->interp->sys_profiling_threads += delta; assert(tstate->interp->sys_profiling_threads >= 0); - - uint32_t events = 0; - if (tstate->interp->sys_profiling_threads) { - events = - (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | - (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | - (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) | - (1 << PY_MONITORING_EVENT_PY_THROW); - } - return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events); + return tstate->interp->sys_profiling_threads; } int -_PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +_PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) { assert(is_tstate_valid(tstate)); /* The caller must hold the GIL */ @@ -496,11 +484,32 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) /* Call _PySys_Audit() in the context of the current thread state, even if tstate is not the current thread state. */ PyThreadState *current_tstate = _PyThreadState_GET(); - if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { + if (_PySys_Audit(current_tstate, "sys.setprofile", NULL) < 0) { return -1; } - assert(tstate->interp->sys_tracing_threads >= 0); + // needs to be decref'd outside of the lock + PyObject *old_profileobj; + LOCK_SETUP(); + int profiling_threads = setup_profile(tstate, func, arg, &old_profileobj); + UNLOCK_SETUP(); + Py_XDECREF(old_profileobj); + + uint32_t events = 0; + if (profiling_threads) { + events = + (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | + (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | + (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) | + (1 << PY_MONITORING_EVENT_PY_THROW); + } + return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events); +} + +static Py_ssize_t +setup_tracing(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_traceobj) +{ + *old_traceobj = NULL; /* Setup PEP 669 monitoring callbacks and events. */ if (!tstate->interp->sys_trace_initialized) { tstate->interp->sys_trace_initialized = true; @@ -553,14 +562,40 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) int delta = (func != NULL) - (tstate->c_tracefunc != NULL); tstate->c_tracefunc = func; - PyObject *old_traceobj = tstate->c_traceobj; + *old_traceobj = tstate->c_traceobj; tstate->c_traceobj = Py_XNewRef(arg); - Py_XDECREF(old_traceobj); tstate->interp->sys_tracing_threads += delta; assert(tstate->interp->sys_tracing_threads >= 0); + return tstate->interp->sys_tracing_threads; +} + +int +_PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +{ + assert(is_tstate_valid(tstate)); + /* The caller must hold the GIL */ + assert(PyGILState_Check()); + + /* Call _PySys_Audit() in the context of the current thread state, + even if tstate is not the current thread state. */ + PyThreadState *current_tstate = _PyThreadState_GET(); + if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { + return -1; + } + + assert(tstate->interp->sys_tracing_threads >= 0); + // needs to be decref'd outside of the lock + PyObject *old_traceobj; + LOCK_SETUP(); + int tracing_threads = setup_tracing(tstate, func, arg, &old_traceobj); + UNLOCK_SETUP(); + Py_XDECREF(old_traceobj); + if (tracing_threads < 0) { + return -1; + } uint32_t events = 0; - if (tstate->interp->sys_tracing_threads) { + if (tracing_threads) { events = (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | diff --git a/Python/pystate.c b/Python/pystate.c index 37480df88aeb72..06806bd75fbcb2 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -399,6 +399,7 @@ _Py_COMP_DIAG_POP &(runtime)->unicode_state.ids.mutex, \ &(runtime)->imports.extensions.mutex, \ &(runtime)->ceval.pending_mainthread.mutex, \ + &(runtime)->ceval.sys_trace_profile_mutex, \ &(runtime)->atexit.mutex, \ &(runtime)->audit_hooks.mutex, \ &(runtime)->allocators.mutex, \ From fe19d59273bb3b0e2db553e8fd98d6788afdaebc Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 25 Mar 2024 13:03:27 -0700 Subject: [PATCH 2/7] Lock on allocation, use version as sync point --- Include/cpython/pyatomic.h | 8 + Include/cpython/pyatomic_gcc.h | 8 + Include/cpython/pyatomic_msc.h | 25 ++ Include/cpython/pyatomic_std.h | 16 ++ Include/internal/pycore_gc.h | 2 +- .../internal/pycore_pyatomic_ft_wrappers.h | 6 + .../test_free_threading/test_monitoring.py | 2 +- Python/bytecodes.c | 6 +- Python/ceval.c | 1 + Python/generated_cases.c.h | 5 +- Python/instrumentation.c | 220 +++++++++--------- Python/legacy_tracing.c | 3 +- 12 files changed, 183 insertions(+), 119 deletions(-) diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index c3e132d3877ca5..69083f1d9dd0c2 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -465,10 +465,16 @@ _Py_atomic_store_ullong_relaxed(unsigned long long *obj, static inline void * _Py_atomic_load_ptr_acquire(const void *obj); +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj); + // Stores `*obj = value` (release operation) static inline void _Py_atomic_store_ptr_release(void *obj, void *value); +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value); + static inline void _Py_atomic_store_ssize_release(Py_ssize_t *obj, Py_ssize_t value); @@ -491,6 +497,8 @@ static inline Py_ssize_t _Py_atomic_load_ssize_acquire(const Py_ssize_t *obj); + + // --- _Py_atomic_fence ------------------------------------------------------ // Sequential consistency fence. C11 fences have complex semantics. When diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h index 0b40f81bd8736d..af78a94c736545 100644 --- a/Include/cpython/pyatomic_gcc.h +++ b/Include/cpython/pyatomic_gcc.h @@ -492,10 +492,18 @@ static inline void * _Py_atomic_load_ptr_acquire(const void *obj) { return (void *)__atomic_load_n((void **)obj, __ATOMIC_ACQUIRE); } +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj) +{ return (uintptr_t)__atomic_load_n((uintptr_t *)obj, __ATOMIC_ACQUIRE); } + static inline void _Py_atomic_store_ptr_release(void *obj, void *value) { __atomic_store_n((void **)obj, value, __ATOMIC_RELEASE); } +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); } + static inline void _Py_atomic_store_int_release(int *obj, int value) { __atomic_store_n(obj, value, __ATOMIC_RELEASE); } diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index 3205e253b28546..c8021714f48790 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -914,6 +914,18 @@ _Py_atomic_load_ptr_acquire(const void *obj) #endif } +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(uintptr_t volatile *)obj; +#elif defined(_M_ARM64) + return (uintptr_t)__ldar64((unsigned __int64 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_ptr_acquire" +#endif +} + static inline void _Py_atomic_store_ptr_release(void *obj, void *value) { @@ -926,6 +938,19 @@ _Py_atomic_store_ptr_release(void *obj, void *value) #endif } +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) +{ +#if defined(_M_X64) || defined(_M_IX86) + *(uintptr_t volatile *)obj = value; +#elif defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(unsigned __int64); + __stlr64((unsigned __int64 volatile *)obj, (unsigned __int64)value); +#else +# error "no implementation of _Py_atomic_store_int_release" +#endif +} + static inline void _Py_atomic_store_int_release(int *obj, int value) { diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index ef34bb0b77dfe5..6a77eae536d8dd 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -863,6 +863,14 @@ _Py_atomic_load_ptr_acquire(const void *obj) memory_order_acquire); } +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uintptr_t)*)obj, + memory_order_acquire); +} + static inline void _Py_atomic_store_ptr_release(void *obj, void *value) { @@ -871,6 +879,14 @@ _Py_atomic_store_ptr_release(void *obj, void *value) memory_order_release); } +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uintptr_t)*)obj, value, + memory_order_release); +} + static inline void _Py_atomic_store_int_release(int *obj, int value) { diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 60020b5c01f8a6..9e465fdd86279f 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -93,7 +93,7 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) { * threads and needs special purpose when freeing due to * the possibility of in-flight lock-free reads occurring. * Objects with this bit that are GC objects will automatically - * delay-freed by PyObject_GC_Del. */ + * delay-freed by PyObject_GC_Del. */ static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) { return (op->ob_gc_bits & _PyGC_BITS_SHARED_INLINE) != 0; } diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 3f0c413ad31b5b..7b187209d68f98 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -28,10 +28,14 @@ extern "C" { _Py_atomic_store_ptr(&value, new_value) #define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) \ _Py_atomic_load_ptr_acquire(&value) +#define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) \ + _Py_atomic_load_uintptr_acquire(&value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ _Py_atomic_store_ptr_release(&value, new_value) +#define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) \ + _Py_atomic_store_uintptr_release(&value, new_value) #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ @@ -42,8 +46,10 @@ extern "C" { #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value #define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value #define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) value +#define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py index b10a0e39053c1c..e170840ca3cb27 100644 --- a/Lib/test/test_free_threading/test_monitoring.py +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -49,7 +49,7 @@ def after_test(self): """Runs once after the test is done""" pass - def test_instrumention(self): + def test_instrumentation(self): # Setup a bunch of functions which will need instrumentation... funcs = [] for i in range(self.func_count): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c34d702f06418e..915173474701fe 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -20,6 +20,7 @@ #include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_opcode_metadata.h" // uop names #include "pycore_opcode_utils.h" // MAKE_FUNCTION_* +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_ACQUIRE #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_range.h" // _PyRangeIterObject @@ -150,10 +151,11 @@ dummy_func( uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; - uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + PyCodeObject *code = _PyFrame_GetCode(frame); + uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version); assert((code_version & 255) == 0); if (code_version != global_version) { - int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); + int err = _Py_Instrument(code, tstate->interp); ERROR_IF(err, error); next_instr = this_instr; } diff --git a/Python/ceval.c b/Python/ceval.c index b88e555ded5c2e..2f217c5f33c6ce 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -20,6 +20,7 @@ #include "pycore_opcode_metadata.h" // EXTRA_CASES #include "pycore_optimizer.h" // _PyUOpExecutor_Type #include "pycore_opcode_utils.h" // MAKE_FUNCTION_* +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_ACQUIRE #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_range.h" // _PyRangeIterObject diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a7764b0ec12e10..9da979b2e4cd26 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4927,10 +4927,11 @@ uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; - uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + PyCodeObject *code = _PyFrame_GetCode(frame); + uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version); assert((code_version & 255) == 0); if (code_version != global_version) { - int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); + int err = _Py_Instrument(code, tstate->interp); if (err) goto error; next_instr = this_instr; } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 4b743135f6d203..8e21add2a23492 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -14,7 +14,7 @@ #include "pycore_namespace.h" #include "pycore_object.h" #include "pycore_opcode_metadata.h" // IS_VALID_OPCODE, _PyOpcode_Caches -#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_UINT8_RELAXED +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_UINTPTR_RELEASE #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() @@ -22,12 +22,26 @@ // #define INSTRUMENT_DEBUG 1 #if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) + #define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) \ if (!_PyInterpreterState_GET()->stoptheworld.world_stopped) { \ _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); \ } +#define ASSERT_WORLD_STOPPED() assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); + +#define LOCK_CODE(code) \ + assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped); \ + Py_BEGIN_CRITICAL_SECTION(code) + +#define UNLOCK_CODE(code) Py_END_CRITICAL_SECTION() + #else + #define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) +#define ASSERT_WORLD_STOPPED() +#define LOCK_CODE(code) +#define UNLOCK_CODE() + #endif PyObject _PyInstrumentation_DISABLE = _PyObject_HEAD_INIT(&PyBaseObject_Type); @@ -286,39 +300,19 @@ compute_line(PyCodeObject *code, int offset, int8_t line_delta) return PyCode_Addr2Line(code, offset * sizeof(_Py_CODEUNIT)); } -static inline _PyCoMonitoringData * -get_monitoring(PyCodeObject *code) { - return FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring); -} - -static inline _PyCoLineInstrumentationData * -get_lines_data(PyCodeObject *code) -{ - _PyCoMonitoringData *monitoring = get_monitoring(code); - return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->lines); -} - -static inline uint8_t * -get_per_instruction_opcodes(PyCodeObject *code) -{ - _PyCoMonitoringData *monitoring = get_monitoring(code); - return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->per_instruction_opcodes); -} - int _PyInstruction_GetLength(PyCodeObject *code, int offset) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); - _PyCoMonitoringData *monitoring = get_monitoring(code); int opcode = _PyCode_CODE(code)[offset].op.code; assert(opcode != 0); assert(opcode != RESERVED); if (opcode == INSTRUMENTED_LINE) { - opcode = monitoring->lines[offset].original_opcode; + opcode = code->_co_monitoring->lines[offset].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode = monitoring->per_instruction_opcodes[offset]; + opcode = code->_co_monitoring->per_instruction_opcodes[offset]; } int deinstrumented = DE_INSTRUMENT[opcode]; if (deinstrumented) { @@ -587,12 +581,10 @@ int _Py_GetBaseOpcode(PyCodeObject *code, int i) { int opcode = _PyCode_CODE(code)[i].op.code; if (opcode == INSTRUMENTED_LINE) { - _PyCoLineInstrumentationData *lines = get_lines_data(code); - opcode = lines[i].original_opcode; + opcode = code->_co_monitoring->lines[i].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - uint8_t *per_instr_opcodes = get_per_instruction_opcodes(code); - opcode = per_instr_opcodes[i]; + opcode = code->_co_monitoring->per_instruction_opcodes[i]; } CHECK(opcode != INSTRUMENTED_INSTRUCTION); CHECK(opcode != INSTRUMENTED_LINE); @@ -626,7 +618,7 @@ de_instrument(PyCodeObject *code, int i, int event) return; } CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, deinstrumented); + *opcode_ptr = deinstrumented; if (_PyOpcode_Caches[deinstrumented]) { instr[1].counter = adaptive_counter_warmup(); } @@ -670,7 +662,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i) int original_opcode = code->_co_monitoring->per_instruction_opcodes[i]; CHECK(original_opcode != 0); CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, original_opcode); + *opcode_ptr = original_opcode; if (_PyOpcode_Caches[original_opcode]) { instr[1].counter = adaptive_counter_warmup(); } @@ -703,7 +695,7 @@ instrument(PyCodeObject *code, int i) int deopt = _PyOpcode_Deopt[opcode]; int instrumented = INSTRUMENTED_OPCODES[deopt]; assert(instrumented); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, instrumented); + *opcode_ptr = instrumented; if (_PyOpcode_Caches[deopt]) { instr[1].counter = adaptive_counter_warmup(); } @@ -721,7 +713,7 @@ instrument_line(PyCodeObject *code, int i) _PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; lines->original_opcode = _PyOpcode_Deopt[opcode]; CHECK(lines->original_opcode > 0); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_LINE); + *opcode_ptr = INSTRUMENTED_LINE; } static void @@ -750,13 +742,13 @@ instrument_per_instruction(PyCodeObject *code, int i) code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode]; } assert(code->_co_monitoring->per_instruction_opcodes[i] > 0); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_INSTRUCTION); + *opcode_ptr = INSTRUMENTED_INSTRUCTION; } static void remove_tools(PyCodeObject * code, int offset, int event, int tools) { - Py_BEGIN_CRITICAL_SECTION(code); + ASSERT_WORLD_STOPPED_OR_LOCKED(code); assert(event != PY_MONITORING_EVENT_LINE); assert(event != PY_MONITORING_EVENT_INSTRUCTION); assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event)); @@ -776,7 +768,6 @@ remove_tools(PyCodeObject * code, int offset, int event, int tools) de_instrument(code, offset, event); } } - Py_END_CRITICAL_SECTION(); } #ifndef NDEBUG @@ -792,7 +783,8 @@ tools_is_subset_for_event(PyCodeObject * code, int event, int tools) static void remove_line_tools(PyCodeObject * code, int offset, int tools) { - Py_BEGIN_CRITICAL_SECTION(code); + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(code->_co_monitoring); if (code->_co_monitoring->line_tools) { @@ -810,7 +802,6 @@ remove_line_tools(PyCodeObject * code, int offset, int tools) de_instrument_line(code, offset); } } - Py_END_CRITICAL_SECTION(); } static void @@ -873,7 +864,7 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools) static void remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) { - Py_BEGIN_CRITICAL_SECTION(code); + ASSERT_WORLD_STOPPED_OR_LOCKED(code); assert(code->_co_monitoring); if (code->_co_monitoring->per_instruction_tools) { @@ -891,7 +882,6 @@ remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) de_instrument_per_instruction(code, offset); } } - Py_END_CRITICAL_SECTION(); } @@ -1019,9 +1009,8 @@ get_tools_for_instruction(PyCodeObject *code, PyInterpreterState *interp, int i, if (PY_MONITORING_IS_INSTRUMENTED_EVENT(event)) { CHECK(is_version_up_to_date(code, interp)); CHECK(instrumentation_cross_checks(interp, code)); - uint8_t *monitoring_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring->tools); - if (monitoring_tools) { - tools = monitoring_tools[i]; + if (code->_co_monitoring->tools) { + tools = code->_co_monitoring->tools[i]; } else { tools = code->_co_monitoring->active_monitors.tools[event]; @@ -1107,7 +1096,9 @@ call_instrumentation_vector( break; } else { + LOCK_CODE(code); remove_tools(code, offset, event, 1 << tool); + UNLOCK_CODE(); } } } @@ -1200,7 +1191,7 @@ _Py_call_instrumentation_exc2( int _Py_Instrumentation_GetLine(PyCodeObject *code, int index) { - _PyCoMonitoringData *monitoring = get_monitoring(code); + _PyCoMonitoringData *monitoring = code->_co_monitoring; assert(monitoring != NULL); assert(monitoring->lines != NULL); assert(index >= code->_co_firsttraceable); @@ -1219,7 +1210,7 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, assert(instrumentation_cross_checks(tstate->interp, code)); int i = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *monitoring = get_monitoring(code); + _PyCoMonitoringData *monitoring = code->_co_monitoring; _PyCoLineInstrumentationData *line_data = &monitoring->lines[i]; if (tstate->tracing) { goto done; @@ -1241,11 +1232,10 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, } } - uint8_t *line_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->line_tools); - uint8_t tools = line_tools != NULL ? - line_tools[i] : + uint8_t tools = code->_co_monitoring->line_tools != NULL ? + code->_co_monitoring->line_tools[i] : (interp->monitors.tools[PY_MONITORING_EVENT_LINE] | - monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] + code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] ); /* Special case sys.settrace to avoid boxing the line number, * only to immediately unbox it. */ @@ -1302,7 +1292,9 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, } else { /* DISABLE */ + LOCK_CODE(code); remove_line_tools(code, i, 1 << tool); + UNLOCK_CODE(); } } while (tools); Py_DECREF(line_obj); @@ -1322,17 +1314,15 @@ _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* assert(is_version_up_to_date(code, tstate->interp)); assert(instrumentation_cross_checks(tstate->interp, code)); int offset = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *instr_data = get_monitoring(code); - uint8_t *per_instr_opcodes = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_opcodes); - assert(per_instr_opcodes); - int next_opcode = per_instr_opcodes[offset]; + _PyCoMonitoringData *instrumentation_data = code->_co_monitoring; + assert(instrumentation_data->per_instruction_opcodes); + int next_opcode = instrumentation_data->per_instruction_opcodes[offset]; if (tstate->tracing) { return next_opcode; } PyInterpreterState *interp = tstate->interp; - uint8_t *per_instr_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_tools); - uint8_t tools = per_instr_tools != NULL ? - per_instr_tools[offset] : + uint8_t tools = instrumentation_data->per_instruction_tools != NULL ? + instrumentation_data->per_instruction_tools[offset] : (interp->monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] | code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] ); @@ -1360,7 +1350,9 @@ _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* } else { /* DISABLE */ + LOCK_CODE(code); remove_per_instruction_tools(code, offset, 1 << tool); + UNLOCK_CODE(); } } Py_DECREF(offset_obj); @@ -1388,9 +1380,10 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj) } static void -initialize_tools(PyCodeObject *code, uint8_t* tools) +initialize_tools(PyCodeObject *code) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); + uint8_t* tools = code->_co_monitoring->tools; assert(tools != NULL); int code_len = (int)Py_SIZE(code); @@ -1445,9 +1438,10 @@ initialize_tools(PyCodeObject *code, uint8_t* tools) #define NO_LINE -128 static void -initialize_lines(PyCodeObject *code, _PyCoLineInstrumentationData *line_data) +initialize_lines(PyCodeObject *code) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); + _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; assert(line_data != NULL); int code_len = (int)Py_SIZE(code); @@ -1563,9 +1557,10 @@ initialize_lines(PyCodeObject *code, _PyCoLineInstrumentationData *line_data) } static void -initialize_line_tools(PyCodeObject *code, uint8_t *line_tools, _Py_LocalMonitors *all_events) +initialize_line_tools(PyCodeObject *code, _Py_LocalMonitors *all_events) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); + uint8_t *line_tools = code->_co_monitoring->line_tools; assert(line_tools != NULL); int code_len = (int)Py_SIZE(code); @@ -1577,21 +1572,21 @@ initialize_line_tools(PyCodeObject *code, uint8_t *line_tools, _Py_LocalMonitors static int allocate_instrumentation_data(PyCodeObject *code) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); if (code->_co_monitoring == NULL) { - _PyCoMonitoringData *monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); - if (monitoring == NULL) { + code->_co_monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); + if (code->_co_monitoring == NULL) { PyErr_NoMemory(); return -1; } - monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; - monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; - monitoring->tools = NULL; - monitoring->lines = NULL; - monitoring->line_tools = NULL; - monitoring->per_instruction_opcodes = NULL; - monitoring->per_instruction_tools = NULL; - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring, monitoring); + code->_co_monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; + code->_co_monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; + code->_co_monitoring->tools = NULL; + code->_co_monitoring->lines = NULL; + code->_co_monitoring->line_tools = NULL; + code->_co_monitoring->per_instruction_opcodes = NULL; + code->_co_monitoring->per_instruction_tools = NULL; } return 0; } @@ -1610,60 +1605,53 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) code->_co_monitoring->local_monitors); bool multitools = multiple_tools(&all_events); if (code->_co_monitoring->tools == NULL && multitools) { - uint8_t *tools = PyMem_Malloc(code_len); - if (tools == NULL) { + code->_co_monitoring->tools = PyMem_Malloc(code_len); + if (code->_co_monitoring->tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_tools(code, tools); - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->tools, tools); + initialize_tools(code); } if (all_events.tools[PY_MONITORING_EVENT_LINE]) { if (code->_co_monitoring->lines == NULL) { - _PyCoLineInstrumentationData *lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (lines == NULL) { + code->_co_monitoring->lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (code->_co_monitoring->lines == NULL) { PyErr_NoMemory(); return -1; } - initialize_lines(code, lines); - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->lines, lines); + initialize_lines(code); } if (multitools && code->_co_monitoring->line_tools == NULL) { - uint8_t *line_tools = PyMem_Malloc(code_len); - if (line_tools == NULL) { + code->_co_monitoring->line_tools = PyMem_Malloc(code_len); + if (code->_co_monitoring->line_tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_line_tools(code, line_tools, &all_events); - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->line_tools, line_tools); + initialize_line_tools(code, &all_events); } } if (all_events.tools[PY_MONITORING_EVENT_INSTRUCTION]) { if (code->_co_monitoring->per_instruction_opcodes == NULL) { - uint8_t *per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (per_instruction_opcodes == NULL) { + code->_co_monitoring->per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (code->_co_monitoring->per_instruction_opcodes == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - per_instruction_opcodes[i] = 0; + code->_co_monitoring->per_instruction_opcodes[i] = 0; } - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_opcodes, - per_instruction_opcodes); } if (multitools && code->_co_monitoring->per_instruction_tools == NULL) { - uint8_t *per_instruction_tools = PyMem_Malloc(code_len); - if (per_instruction_tools == NULL) { + code->_co_monitoring->per_instruction_tools = PyMem_Malloc(code_len); + if (code->_co_monitoring->per_instruction_tools == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - per_instruction_tools[i] = 0; + code->_co_monitoring->per_instruction_tools[i] = 0; } - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_tools, - per_instruction_tools); } } return 0; @@ -1713,12 +1701,8 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) assert(monitors_are_empty(monitors_and(new_events, removed_events))); } code->_co_monitoring->active_monitors = active_events; - code->_co_instrumentation_version = global_version(interp); if (monitors_are_empty(new_events) && monitors_are_empty(removed_events)) { -#ifdef INSTRUMENT_DEBUG - sanity_check_instrumentation(code); -#endif - return 0; + goto done; } /* Insert instrumentation */ for (int i = code->_co_firsttraceable; i < code_len; i+= _PyInstruction_GetLength(code, i)) { @@ -1807,6 +1791,10 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) i += _PyInstruction_GetLength(code, i); } } +done: + FT_ATOMIC_STORE_UINTPTR_RELEASE(code->_co_instrumentation_version, + global_version(interp)); + #ifdef INSTRUMENT_DEBUG sanity_check_instrumentation(code); #endif @@ -1817,9 +1805,9 @@ int _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) { int res; - Py_BEGIN_CRITICAL_SECTION(code); + LOCK_CODE(code); res = instrument_lock_held(code, interp); - Py_END_CRITICAL_SECTION(); + UNLOCK_CODE(); return res; } @@ -1833,9 +1821,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) static int instrument_all_executing_code_objects(PyInterpreterState *interp) { -#ifdef Py_GIL_DISABLED - assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); -#endif + ASSERT_WORLD_STOPPED(); _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1908,22 +1894,25 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) if (check_tool(interp, tool_id)) { return -1; } + + int res; _PyEval_StopTheWorld(interp); uint32_t existing_events = get_events(&interp->monitors, tool_id); if (existing_events == events) { - _PyEval_StartTheWorld(interp); - return 0; + res = 0; + goto done; } set_events(&interp->monitors, tool_id, events); uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT; if (new_version == 0) { PyErr_Format(PyExc_OverflowError, "events set too many times"); - _PyEval_StartTheWorld(interp); - return -1; + res = -1; + goto done; } set_global_version(tstate, new_version); _Py_Executors_InvalidateAll(interp, 1); - int res = instrument_all_executing_code_objects(interp); + res = instrument_all_executing_code_objects(interp); +done: _PyEval_StartTheWorld(interp); return res; } @@ -1941,24 +1930,33 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent if (check_tool(interp, tool_id)) { return -1; } + + int res; + LOCK_CODE(code); if (allocate_instrumentation_data(code)) { - return -1; + res = -1; + goto done; } + _Py_LocalMonitors *local = &code->_co_monitoring->local_monitors; uint32_t existing_events = get_local_events(local, tool_id); if (existing_events == events) { - return 0; + res = 0; + goto done; } set_local_events(local, tool_id, events); if (is_version_up_to_date(code, interp)) { /* Force instrumentation update */ code->_co_instrumentation_version -= MONITORING_VERSION_INCREMENT; } + _Py_Executors_InvalidateDependency(interp, code, 1); - if (_Py_Instrument(code, interp)) { - return -1; - } - return 0; + + res = instrument_lock_held(code, interp); + +done: + UNLOCK_CODE(); + return res; } int @@ -2259,8 +2257,8 @@ monitoring_restart_events_impl(PyObject *module) uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT; uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT; if (new_version <= MONITORING_VERSION_INCREMENT) { - PyErr_Format(PyExc_OverflowError, "events set too many times"); _PyEval_StartTheWorld(interp); + PyErr_Format(PyExc_OverflowError, "events set too many times"); return NULL; } interp->last_restart_version = restart_version; diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c index ce25e725664345..fcaa9b226ce40c 100644 --- a/Python/legacy_tracing.c +++ b/Python/legacy_tracing.c @@ -491,7 +491,7 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) // needs to be decref'd outside of the lock PyObject *old_profileobj; LOCK_SETUP(); - int profiling_threads = setup_profile(tstate, func, arg, &old_profileobj); + Py_ssize_t profiling_threads = setup_profile(tstate, func, arg, &old_profileobj); UNLOCK_SETUP(); Py_XDECREF(old_profileobj); @@ -582,7 +582,6 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { return -1; } - assert(tstate->interp->sys_tracing_threads >= 0); // needs to be decref'd outside of the lock PyObject *old_traceobj; From 210b8023c0b489961bf8f12751fc8d44cd982b21 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 28 Mar 2024 15:29:55 -0700 Subject: [PATCH 3/7] Lock in non-debug builds --- Python/instrumentation.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 8e21add2a23492..3543537ca34cb9 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -29,6 +29,15 @@ } #define ASSERT_WORLD_STOPPED() assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); +#else + +#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) +#define ASSERT_WORLD_STOPPED() + +#endif + +#ifdef Py_GIL_DISABLED + #define LOCK_CODE(code) \ assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped); \ Py_BEGIN_CRITICAL_SECTION(code) @@ -37,8 +46,6 @@ #else -#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) -#define ASSERT_WORLD_STOPPED() #define LOCK_CODE(code) #define UNLOCK_CODE() From 597f40d5b493fd5dba040912fecd48a3a4917987 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Fri, 29 Mar 2024 12:56:11 -0700 Subject: [PATCH 4/7] Move CHECK_EVAL_BREAKER in ENTER_EXECUTOR --- Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 915173474701fe..c16e11e7fc2f1e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -173,7 +173,7 @@ dummy_func( _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); - uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version); } @@ -2379,7 +2379,6 @@ dummy_func( }; tier1 inst(ENTER_EXECUTOR, (--)) { - CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -2388,6 +2387,7 @@ dummy_func( assert(tstate->previous_executor == NULL); tstate->previous_executor = Py_None; Py_INCREF(executor); + CHECK_EVAL_BREAKER(); GOTO_TIER_TWO(executor); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fccff24a418586..df87f9178f17cf 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -21,7 +21,7 @@ _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); - uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); assert((version & _PY_EVAL_EVENTS_MASK) == 0); if (eval_breaker != version) { UOP_STAT_INC(uopcode, miss); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 9da979b2e4cd26..efe73a27fa7b2c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2501,7 +2501,6 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(ENTER_EXECUTOR); - CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -2510,6 +2509,7 @@ assert(tstate->previous_executor == NULL); tstate->previous_executor = Py_None; Py_INCREF(executor); + CHECK_EVAL_BREAKER(); GOTO_TIER_TWO(executor); DISPATCH(); } @@ -4954,7 +4954,7 @@ _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); - uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version, RESUME); DISPATCH(); From 8393bfe0eb365d1fcb04f5032d88e28e493bf1e6 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 2 Apr 2024 11:48:51 -0700 Subject: [PATCH 5/7] Don't potentially leak executor --- Include/cpython/pyatomic_msc.h | 4 ++-- Include/internal/pycore_pyatomic_ft_wrappers.h | 13 +++++++++++++ Python/bytecodes.c | 9 ++++++++- Python/generated_cases.c.h | 11 +++++++++-- Python/instrumentation.c | 11 +++-------- Python/legacy_tracing.c | 2 +- Tools/jit/template.c | 1 + 7 files changed, 37 insertions(+), 14 deletions(-) diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index c8021714f48790..212cd7817d01c5 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -922,7 +922,7 @@ _Py_atomic_load_uintptr_acquire(const uintptr_t *obj) #elif defined(_M_ARM64) return (uintptr_t)__ldar64((unsigned __int64 volatile *)obj); #else -# error "no implementation of _Py_atomic_load_ptr_acquire" +# error "no implementation of _Py_atomic_load_uintptr_acquire" #endif } @@ -947,7 +947,7 @@ _Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) _Py_atomic_ASSERT_ARG_TYPE(unsigned __int64); __stlr64((unsigned __int64 volatile *)obj, (unsigned __int64)value); #else -# error "no implementation of _Py_atomic_store_int_release" +# error "no implementation of _Py_atomic_store_uintptr_release" #endif } diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 7b187209d68f98..a16855729bf0fc 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -40,6 +40,9 @@ extern "C" { _Py_atomic_store_ssize_relaxed(&value, new_value) #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ _Py_atomic_store_uint8_relaxed(&value, new_value) +#define FT_ATOMIC_EXCHANGE_PYOBJECT(value, new_value) \ + _Py_atomic_exchange_ptr(&value, new_value) + #else #define FT_ATOMIC_LOAD_PTR(value) value #define FT_ATOMIC_LOAD_SSIZE(value) value @@ -52,6 +55,16 @@ extern "C" { #define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_EXCHANGE_PYOBJECT(value, new_value) \ + _atomic_exchange_pyobject_withgil(&value, new_value) + +static inline PyObject * +_atomic_exchange_pyobject_withgil(PyObject **src, PyObject *new_value) +{ + PyObject *res = *src; + *src = new_value; + return res; +} #endif diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c16e11e7fc2f1e..a64098dd8dacb4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2379,6 +2379,14 @@ dummy_func( }; tier1 inst(ENTER_EXECUTOR, (--)) { + int prevoparg = oparg; + CHECK_EVAL_BREAKER(); + if (this_instr->op.code != ENTER_EXECUTOR || + this_instr->op.arg != prevoparg) { + next_instr = this_instr; + DISPATCH(); + } + PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -2387,7 +2395,6 @@ dummy_func( assert(tstate->previous_executor == NULL); tstate->previous_executor = Py_None; Py_INCREF(executor); - CHECK_EVAL_BREAKER(); GOTO_TIER_TWO(executor); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index efe73a27fa7b2c..7d3c6ea673fd11 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2498,9 +2498,17 @@ } TARGET(ENTER_EXECUTOR) { - frame->instr_ptr = next_instr; + _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 1; INSTRUCTION_STATS(ENTER_EXECUTOR); + int prevoparg = oparg; + CHECK_EVAL_BREAKER(); + if (this_instr->op.code != ENTER_EXECUTOR || + this_instr->op.arg != prevoparg) { + next_instr = this_instr; + DISPATCH(); + } PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -2509,7 +2517,6 @@ assert(tstate->previous_executor == NULL); tstate->previous_executor = Py_None; Py_INCREF(executor); - CHECK_EVAL_BREAKER(); GOTO_TIER_TWO(executor); DISPATCH(); } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 3543537ca34cb9..f52921ace7bbeb 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -42,7 +42,7 @@ assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped); \ Py_BEGIN_CRITICAL_SECTION(code) -#define UNLOCK_CODE(code) Py_END_CRITICAL_SECTION() +#define UNLOCK_CODE() Py_END_CRITICAL_SECTION() #else @@ -1374,15 +1374,10 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj) PyInterpreterState *is = _PyInterpreterState_GET(); assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); assert(0 <= event_id && event_id < _PY_MONITORING_EVENTS); -#ifdef Py_GIL_DISABLED - PyObject *callback = _Py_atomic_exchange_ptr( - &is->monitoring_callables[tool_id][event_id], + PyObject *callback = FT_ATOMIC_EXCHANGE_PYOBJECT(is->monitoring_callables[tool_id][event_id], Py_XNewRef(obj) ); -#else - PyObject *callback = is->monitoring_callables[tool_id][event_id]; - is->monitoring_callables[tool_id][event_id] = Py_XNewRef(obj); -#endif + return callback; } diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c index fcaa9b226ce40c..d7aae7d2343ac2 100644 --- a/Python/legacy_tracing.c +++ b/Python/legacy_tracing.c @@ -586,7 +586,7 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) // needs to be decref'd outside of the lock PyObject *old_traceobj; LOCK_SETUP(); - int tracing_threads = setup_tracing(tstate, func, arg, &old_traceobj); + Py_ssize_t tracing_threads = setup_tracing(tstate, func, arg, &old_traceobj); UNLOCK_SETUP(); Py_XDECREF(old_traceobj); if (tracing_threads < 0) { diff --git a/Tools/jit/template.c b/Tools/jit/template.c index b195aff377b3b5..228dc83254d678 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -12,6 +12,7 @@ #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" #include "pycore_optimizer.h" +#include "pycore_pyatomic_ft_wrappers.h" #include "pycore_range.h" #include "pycore_setobject.h" #include "pycore_sliceobject.h" From 65ff505212bb7369bd94cfab357d28a4a49ec6a1 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 4 Apr 2024 15:51:50 -0700 Subject: [PATCH 6/7] Just use _Py_atomic_exchange_ptr instead of FT macro --- Include/internal/pycore_pyatomic_ft_wrappers.h | 12 ------------ Python/instrumentation.c | 5 ++--- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index a16855729bf0fc..fed5d6e0ec2c54 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -40,8 +40,6 @@ extern "C" { _Py_atomic_store_ssize_relaxed(&value, new_value) #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ _Py_atomic_store_uint8_relaxed(&value, new_value) -#define FT_ATOMIC_EXCHANGE_PYOBJECT(value, new_value) \ - _Py_atomic_exchange_ptr(&value, new_value) #else #define FT_ATOMIC_LOAD_PTR(value) value @@ -55,16 +53,6 @@ extern "C" { #define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value -#define FT_ATOMIC_EXCHANGE_PYOBJECT(value, new_value) \ - _atomic_exchange_pyobject_withgil(&value, new_value) - -static inline PyObject * -_atomic_exchange_pyobject_withgil(PyObject **src, PyObject *new_value) -{ - PyObject *res = *src; - *src = new_value; - return res; -} #endif diff --git a/Python/instrumentation.c b/Python/instrumentation.c index f52921ace7bbeb..71efeff077633d 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1374,9 +1374,8 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj) PyInterpreterState *is = _PyInterpreterState_GET(); assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); assert(0 <= event_id && event_id < _PY_MONITORING_EVENTS); - PyObject *callback = FT_ATOMIC_EXCHANGE_PYOBJECT(is->monitoring_callables[tool_id][event_id], - Py_XNewRef(obj) - ); + PyObject *callback = _Py_atomic_exchange_ptr(&is->monitoring_callables[tool_id][event_id], + Py_XNewRef(obj)); return callback; } From 6e1960151ce3a7469567c6973e2871f2f6628336 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 18 Apr 2024 12:41:00 -0700 Subject: [PATCH 7/7] Use FT_ATOMIC_LOAD_UINTPTR_ACQUIRE for INSTRUMENTED_RESUME too --- Python/bytecodes.c | 2 +- Python/generated_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a64098dd8dacb4..c1fbd3c7d26e01 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -180,7 +180,7 @@ dummy_func( inst(INSTRUMENTED_RESUME, (--)) { uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; - uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { ERROR_NO_POP(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7d3c6ea673fd11..a426d9e208492e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3274,7 +3274,7 @@ next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_RESUME); uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; - uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { goto error;