From 015a3f397baa791567d8bd699f054a03ba773220 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 5 Dec 2024 03:38:50 +0800 Subject: [PATCH 1/5] Trace through __init__ --- Include/internal/pycore_uop_ids.h | 301 +++++++++++++------------ Include/internal/pycore_uop_metadata.h | 4 + Lib/test/test_capi/test_opt.py | 21 ++ Python/bytecodes.c | 12 + Python/executor_cases.c.h | 30 +++ Python/optimizer.c | 50 +++- Python/optimizer_cases.c.h | 4 + 7 files changed, 271 insertions(+), 151 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index fab4ce6a25b347..d70781eaed0df9 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -65,89 +65,90 @@ extern "C" { #define _CHECK_FUNCTION_VERSION 335 #define _CHECK_FUNCTION_VERSION_INLINE 336 #define _CHECK_FUNCTION_VERSION_KW 337 -#define _CHECK_IS_NOT_PY_CALLABLE 338 -#define _CHECK_IS_NOT_PY_CALLABLE_KW 339 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 340 -#define _CHECK_METHOD_VERSION 341 -#define _CHECK_METHOD_VERSION_KW 342 -#define _CHECK_PEP_523 343 -#define _CHECK_PERIODIC 344 -#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 345 -#define _CHECK_STACK_SPACE 346 -#define _CHECK_STACK_SPACE_OPERAND 347 -#define _CHECK_VALIDITY 348 -#define _CHECK_VALIDITY_AND_SET_IP 349 -#define _COMPARE_OP 350 -#define _COMPARE_OP_FLOAT 351 -#define _COMPARE_OP_INT 352 -#define _COMPARE_OP_STR 353 -#define _CONTAINS_OP 354 +#define _CHECK_INIT_MATCHES_VERSIONS 338 +#define _CHECK_IS_NOT_PY_CALLABLE 339 +#define _CHECK_IS_NOT_PY_CALLABLE_KW 340 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 341 +#define _CHECK_METHOD_VERSION 342 +#define _CHECK_METHOD_VERSION_KW 343 +#define _CHECK_PEP_523 344 +#define _CHECK_PERIODIC 345 +#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 346 +#define _CHECK_STACK_SPACE 347 +#define _CHECK_STACK_SPACE_OPERAND 348 +#define _CHECK_VALIDITY 349 +#define _CHECK_VALIDITY_AND_SET_IP 350 +#define _COMPARE_OP 351 +#define _COMPARE_OP_FLOAT 352 +#define _COMPARE_OP_INT 353 +#define _COMPARE_OP_STR 354 +#define _CONTAINS_OP 355 #define _CONTAINS_OP_DICT CONTAINS_OP_DICT #define _CONTAINS_OP_SET CONTAINS_OP_SET #define _CONVERT_VALUE CONVERT_VALUE #define _COPY COPY #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 355 +#define _CREATE_INIT_FRAME 356 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 356 +#define _DEOPT 357 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 357 -#define _DO_CALL_FUNCTION_EX 358 -#define _DO_CALL_KW 359 -#define _DYNAMIC_EXIT 360 +#define _DO_CALL 358 +#define _DO_CALL_FUNCTION_EX 359 +#define _DO_CALL_KW 360 +#define _DYNAMIC_EXIT 361 #define _END_SEND END_SEND -#define _ERROR_POP_N 361 +#define _ERROR_POP_N 362 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 362 -#define _EXPAND_METHOD_KW 363 -#define _FATAL_ERROR 364 +#define _EXPAND_METHOD 363 +#define _EXPAND_METHOD_KW 364 +#define _FATAL_ERROR 365 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 365 -#define _FOR_ITER_GEN_FRAME 366 -#define _FOR_ITER_TIER_TWO 367 +#define _FOR_ITER 366 +#define _FOR_ITER_GEN_FRAME 367 +#define _FOR_ITER_TIER_TWO 368 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BOTH_FLOAT 368 -#define _GUARD_BOTH_INT 369 -#define _GUARD_BOTH_UNICODE 370 -#define _GUARD_BUILTINS_VERSION_PUSH_KEYS 371 -#define _GUARD_DORV_NO_DICT 372 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 373 -#define _GUARD_GLOBALS_VERSION 374 -#define _GUARD_GLOBALS_VERSION_PUSH_KEYS 375 -#define _GUARD_IS_FALSE_POP 376 -#define _GUARD_IS_NONE_POP 377 -#define _GUARD_IS_NOT_NONE_POP 378 -#define _GUARD_IS_TRUE_POP 379 -#define _GUARD_KEYS_VERSION 380 -#define _GUARD_NOS_FLOAT 381 -#define _GUARD_NOS_INT 382 -#define _GUARD_NOT_EXHAUSTED_LIST 383 -#define _GUARD_NOT_EXHAUSTED_RANGE 384 -#define _GUARD_NOT_EXHAUSTED_TUPLE 385 -#define _GUARD_TOS_FLOAT 386 -#define _GUARD_TOS_INT 387 -#define _GUARD_TYPE_VERSION 388 +#define _GUARD_BOTH_FLOAT 369 +#define _GUARD_BOTH_INT 370 +#define _GUARD_BOTH_UNICODE 371 +#define _GUARD_BUILTINS_VERSION_PUSH_KEYS 372 +#define _GUARD_DORV_NO_DICT 373 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 374 +#define _GUARD_GLOBALS_VERSION 375 +#define _GUARD_GLOBALS_VERSION_PUSH_KEYS 376 +#define _GUARD_IS_FALSE_POP 377 +#define _GUARD_IS_NONE_POP 378 +#define _GUARD_IS_NOT_NONE_POP 379 +#define _GUARD_IS_TRUE_POP 380 +#define _GUARD_KEYS_VERSION 381 +#define _GUARD_NOS_FLOAT 382 +#define _GUARD_NOS_INT 383 +#define _GUARD_NOT_EXHAUSTED_LIST 384 +#define _GUARD_NOT_EXHAUSTED_RANGE 385 +#define _GUARD_NOT_EXHAUSTED_TUPLE 386 +#define _GUARD_TOS_FLOAT 387 +#define _GUARD_TOS_INT 388 +#define _GUARD_TYPE_VERSION 389 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 389 -#define _INIT_CALL_PY_EXACT_ARGS 390 -#define _INIT_CALL_PY_EXACT_ARGS_0 391 -#define _INIT_CALL_PY_EXACT_ARGS_1 392 -#define _INIT_CALL_PY_EXACT_ARGS_2 393 -#define _INIT_CALL_PY_EXACT_ARGS_3 394 -#define _INIT_CALL_PY_EXACT_ARGS_4 395 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 390 +#define _INIT_CALL_PY_EXACT_ARGS 391 +#define _INIT_CALL_PY_EXACT_ARGS_0 392 +#define _INIT_CALL_PY_EXACT_ARGS_1 393 +#define _INIT_CALL_PY_EXACT_ARGS_2 394 +#define _INIT_CALL_PY_EXACT_ARGS_3 395 +#define _INIT_CALL_PY_EXACT_ARGS_4 396 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER @@ -159,142 +160,142 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 396 -#define _IS_NONE 397 +#define _INTERNAL_INCREMENT_OPT_COUNTER 397 +#define _IS_NONE 398 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 398 -#define _ITER_CHECK_RANGE 399 -#define _ITER_CHECK_TUPLE 400 -#define _ITER_JUMP_LIST 401 -#define _ITER_JUMP_RANGE 402 -#define _ITER_JUMP_TUPLE 403 -#define _ITER_NEXT_LIST 404 -#define _ITER_NEXT_RANGE 405 -#define _ITER_NEXT_TUPLE 406 -#define _JUMP_TO_TOP 407 +#define _ITER_CHECK_LIST 399 +#define _ITER_CHECK_RANGE 400 +#define _ITER_CHECK_TUPLE 401 +#define _ITER_JUMP_LIST 402 +#define _ITER_JUMP_RANGE 403 +#define _ITER_JUMP_TUPLE 404 +#define _ITER_NEXT_LIST 405 +#define _ITER_NEXT_RANGE 406 +#define _ITER_NEXT_TUPLE 407 +#define _JUMP_TO_TOP 408 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 408 -#define _LOAD_ATTR_CLASS 409 -#define _LOAD_ATTR_CLASS_0 410 -#define _LOAD_ATTR_CLASS_1 411 +#define _LOAD_ATTR 409 +#define _LOAD_ATTR_CLASS 410 +#define _LOAD_ATTR_CLASS_0 411 +#define _LOAD_ATTR_CLASS_1 412 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 412 -#define _LOAD_ATTR_INSTANCE_VALUE_0 413 -#define _LOAD_ATTR_INSTANCE_VALUE_1 414 -#define _LOAD_ATTR_METHOD_LAZY_DICT 415 -#define _LOAD_ATTR_METHOD_NO_DICT 416 -#define _LOAD_ATTR_METHOD_WITH_VALUES 417 -#define _LOAD_ATTR_MODULE 418 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 419 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 420 -#define _LOAD_ATTR_PROPERTY_FRAME 421 -#define _LOAD_ATTR_SLOT 422 -#define _LOAD_ATTR_SLOT_0 423 -#define _LOAD_ATTR_SLOT_1 424 -#define _LOAD_ATTR_WITH_HINT 425 +#define _LOAD_ATTR_INSTANCE_VALUE 413 +#define _LOAD_ATTR_INSTANCE_VALUE_0 414 +#define _LOAD_ATTR_INSTANCE_VALUE_1 415 +#define _LOAD_ATTR_METHOD_LAZY_DICT 416 +#define _LOAD_ATTR_METHOD_NO_DICT 417 +#define _LOAD_ATTR_METHOD_WITH_VALUES 418 +#define _LOAD_ATTR_MODULE 419 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 420 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 421 +#define _LOAD_ATTR_PROPERTY_FRAME 422 +#define _LOAD_ATTR_SLOT 423 +#define _LOAD_ATTR_SLOT_0 424 +#define _LOAD_ATTR_SLOT_1 425 +#define _LOAD_ATTR_WITH_HINT 426 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 426 +#define _LOAD_BYTECODE 427 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST #define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL -#define _LOAD_CONST_INLINE 427 -#define _LOAD_CONST_INLINE_BORROW 428 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 429 -#define _LOAD_CONST_INLINE_WITH_NULL 430 +#define _LOAD_CONST_INLINE 428 +#define _LOAD_CONST_INLINE_BORROW 429 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 430 +#define _LOAD_CONST_INLINE_WITH_NULL 431 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 431 -#define _LOAD_FAST_0 432 -#define _LOAD_FAST_1 433 -#define _LOAD_FAST_2 434 -#define _LOAD_FAST_3 435 -#define _LOAD_FAST_4 436 -#define _LOAD_FAST_5 437 -#define _LOAD_FAST_6 438 -#define _LOAD_FAST_7 439 +#define _LOAD_FAST 432 +#define _LOAD_FAST_0 433 +#define _LOAD_FAST_1 434 +#define _LOAD_FAST_2 435 +#define _LOAD_FAST_3 436 +#define _LOAD_FAST_4 437 +#define _LOAD_FAST_5 438 +#define _LOAD_FAST_6 439 +#define _LOAD_FAST_7 440 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 440 -#define _LOAD_GLOBAL_BUILTINS 441 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 442 -#define _LOAD_GLOBAL_MODULE 443 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 444 +#define _LOAD_GLOBAL 441 +#define _LOAD_GLOBAL_BUILTINS 442 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 443 +#define _LOAD_GLOBAL_MODULE 444 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 445 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 445 -#define _LOAD_SMALL_INT_0 446 -#define _LOAD_SMALL_INT_1 447 -#define _LOAD_SMALL_INT_2 448 -#define _LOAD_SMALL_INT_3 449 +#define _LOAD_SMALL_INT 446 +#define _LOAD_SMALL_INT_0 447 +#define _LOAD_SMALL_INT_1 448 +#define _LOAD_SMALL_INT_2 449 +#define _LOAD_SMALL_INT_3 450 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 450 +#define _MAKE_CALLARGS_A_TUPLE 451 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 451 +#define _MAKE_WARM 452 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 452 -#define _MAYBE_EXPAND_METHOD_KW 453 -#define _MONITOR_CALL 454 -#define _MONITOR_JUMP_BACKWARD 455 -#define _MONITOR_RESUME 456 +#define _MAYBE_EXPAND_METHOD 453 +#define _MAYBE_EXPAND_METHOD_KW 454 +#define _MONITOR_CALL 455 +#define _MONITOR_JUMP_BACKWARD 456 +#define _MONITOR_RESUME 457 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 457 -#define _POP_JUMP_IF_TRUE 458 +#define _POP_JUMP_IF_FALSE 458 +#define _POP_JUMP_IF_TRUE 459 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 459 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 460 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 460 +#define _PUSH_FRAME 461 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 461 -#define _PY_FRAME_KW 462 -#define _QUICKEN_RESUME 463 -#define _REPLACE_WITH_TRUE 464 +#define _PY_FRAME_GENERAL 462 +#define _PY_FRAME_KW 463 +#define _QUICKEN_RESUME 464 +#define _REPLACE_WITH_TRUE 465 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 465 -#define _SEND 466 -#define _SEND_GEN_FRAME 467 +#define _SAVE_RETURN_OFFSET 466 +#define _SEND 467 +#define _SEND_GEN_FRAME 468 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 468 -#define _STORE_ATTR 469 -#define _STORE_ATTR_INSTANCE_VALUE 470 -#define _STORE_ATTR_SLOT 471 -#define _STORE_ATTR_WITH_HINT 472 +#define _START_EXECUTOR 469 +#define _STORE_ATTR 470 +#define _STORE_ATTR_INSTANCE_VALUE 471 +#define _STORE_ATTR_SLOT 472 +#define _STORE_ATTR_WITH_HINT 473 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 473 -#define _STORE_FAST_0 474 -#define _STORE_FAST_1 475 -#define _STORE_FAST_2 476 -#define _STORE_FAST_3 477 -#define _STORE_FAST_4 478 -#define _STORE_FAST_5 479 -#define _STORE_FAST_6 480 -#define _STORE_FAST_7 481 +#define _STORE_FAST 474 +#define _STORE_FAST_0 475 +#define _STORE_FAST_1 476 +#define _STORE_FAST_2 477 +#define _STORE_FAST_3 478 +#define _STORE_FAST_4 479 +#define _STORE_FAST_5 480 +#define _STORE_FAST_6 481 +#define _STORE_FAST_7 482 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 482 -#define _STORE_SUBSCR 483 +#define _STORE_SLICE 483 +#define _STORE_SUBSCR 484 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 484 -#define _TO_BOOL 485 +#define _TIER2_RESUME_CHECK 485 +#define _TO_BOOL 486 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -304,13 +305,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 486 +#define _UNPACK_SEQUENCE 487 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 486 +#define MAX_UOP_ID 487 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 89fce193f40bd8..b056e25df9f50c 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -233,6 +233,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CHECK_INIT_MATCHES_VERSIONS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CHECK_AND_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_EXIT_INIT_CHECK] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, @@ -356,6 +357,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_FUNCTION_VERSION] = "_CHECK_FUNCTION_VERSION", [_CHECK_FUNCTION_VERSION_INLINE] = "_CHECK_FUNCTION_VERSION_INLINE", [_CHECK_FUNCTION_VERSION_KW] = "_CHECK_FUNCTION_VERSION_KW", + [_CHECK_INIT_MATCHES_VERSIONS] = "_CHECK_INIT_MATCHES_VERSIONS", [_CHECK_IS_NOT_PY_CALLABLE] = "_CHECK_IS_NOT_PY_CALLABLE", [_CHECK_IS_NOT_PY_CALLABLE_KW] = "_CHECK_IS_NOT_PY_CALLABLE_KW", [_CHECK_MANAGED_OBJECT_HAS_VALUES] = "_CHECK_MANAGED_OBJECT_HAS_VALUES", @@ -1007,6 +1009,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 3; case _CALL_TUPLE_1: return 3; + case _CHECK_INIT_MATCHES_VERSIONS: + return 0; case _CHECK_AND_ALLOCATE_OBJECT: return 2 + oparg; case _CREATE_INIT_FRAME: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 4cf9b66170c055..fdc0902fd39ff3 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1511,6 +1511,27 @@ def test_jit_error_pops(self): with self.assertRaises(TypeError): {item for item in items} + def test_trace_through_simple_init(self): + def testfunc(n): + for i in range(n): + Initer(i) + + opt = _testinternalcapi.new_uop_optimizer() + with temporary_optimizer(opt): + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_PUSH_FRAME", uops) + # One return value for __init__, the other for the shim/cleanup frame. + self.assertEqual(list(iter_opnames(ex)).count("_RETURN_VALUE"), 2) + # Strength reduced version + self.assertIn("_CHECK_INIT_MATCHES_VERSIONS", uops) + +class Initer: + def __init__(self, x): + self.x = x def global_identity(x): return x diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3d280941b35244..e605414038fb3d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3717,6 +3717,18 @@ dummy_func( _CALL_TUPLE_1 + _CHECK_PERIODIC; + tier2 op(_CHECK_INIT_MATCHES_VERSIONS, (type_version/2, init_func_version/2, callable[1], null[1], args[oparg] -- callable[1], null[1], args[oparg])) { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable[0]); + DEOPT_IF(!PyType_Check(callable_o)); + PyTypeObject *tp = (PyTypeObject *)callable_o; + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); + assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); + PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; + PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + DEOPT_IF(init_func->func_version != init_func_version); + } + op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable[1], null[1], args[oparg] -- init[1], self[1], args[oparg])) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable[0]); DEOPT_IF(!PyStackRef_IsNull(null[0])); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 987ff2e6419669..aa9fe0573c4104 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4477,6 +4477,36 @@ break; } + case _CHECK_INIT_MATCHES_VERSIONS: { + _PyStackRef *callable; + oparg = CURRENT_OPARG(); + callable = &stack_pointer[-2 - oparg]; + uint32_t type_version = (uint32_t)CURRENT_OPERAND0(); + uint32_t init_func_version = (uint32_t)CURRENT_OPERAND1(); + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable[0]); + if (!PyType_Check(callable_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + PyTypeObject *tp = (PyTypeObject *)callable_o; + if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); + PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; + PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + if (init_func->func_version != init_func_version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + case _CHECK_AND_ALLOCATE_OBJECT: { _PyStackRef *args; _PyStackRef *null; diff --git a/Python/optimizer.c b/Python/optimizer.c index 6a232218981dcd..82eec182dd180e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -517,7 +517,8 @@ add_to_trace( assert(func == NULL || func->func_code == (PyObject *)code); \ trace_stack[trace_stack_depth].func = func; \ trace_stack[trace_stack_depth].code = code; \ - trace_stack[trace_stack_depth].instr = instr; \ + trace_stack[trace_stack_depth].instr = instr;\ + trace_stack[trace_stack_depth].is_dunder_init = false; \ trace_stack_depth++; #define TRACE_STACK_POP() \ if (trace_stack_depth <= 0) { \ @@ -555,6 +556,7 @@ translate_bytecode_to_trace( PyFunctionObject *func; PyCodeObject *code; _Py_CODEUNIT *instr; + bool is_dunder_init; } trace_stack[TRACE_STACK_SIZE]; int trace_stack_depth = 0; int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions @@ -795,6 +797,12 @@ translate_bytecode_to_trace( operand = 0; } ADD_TO_TRACE(uop, oparg, operand, target); + if (trace_stack[trace_stack_depth].is_dunder_init) { + RESERVE_RAW(3, "_Py_InitCleanup"); + ADD_TO_TRACE(EXIT_INIT_CHECK, 0, 0, target); + ADD_TO_TRACE(RETURN_VALUE, 0, 0, 0); + ADD_TO_TRACE(RESUME_CHECK, 0, 0, 0); + } DPRINTF(2, "Returning to %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), @@ -825,6 +833,39 @@ translate_bytecode_to_trace( PyCodeObject *new_code = NULL; PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version, (PyObject **) &new_code); + if (opcode == CALL_ALLOC_AND_ENTER_INIT) { + // In CALL_ALLOC_AND_ENTER_INIT, func_version is actually the type version. + PyTypeObject *typ = _PyType_LookupByVersion(func_version); + if (typ == NULL || !PyType_HasFeature(typ, Py_TPFLAGS_HEAPTYPE)) { + DPRINTF(2, "Bailing due to dynamic target\n"); + ADD_TO_TRACE(uop, oparg, 0, target); + ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); + goto done; + } + assert(PyType_HasFeature(typ, Py_TPFLAGS_HEAPTYPE)); + PyHeapTypeObject *ht = (PyHeapTypeObject *)typ; + PyObject *init = ht->_spec_cache.init; + if (!PyFunction_Check(init) || + ((PyFunctionObject *)init)->func_version + != ((PyCodeObject *)((PyFunctionObject *)init)->func_code)->co_version) { + DPRINTF(2, "Bailing due to non-matching __init__\n"); + ADD_TO_TRACE(uop, oparg, 0, target); + ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); + goto done; + } + PyFunctionObject *init_func = (PyFunctionObject *)init; + // Insert a guard that the __init__ is what we expect. + // Then trace through the __init__. + assert(trace[trace_length - 3].opcode == _NOP); + trace[trace_length - 3].opcode = _CHECK_INIT_MATCHES_VERSIONS; + trace[trace_length - 3].operand0 = typ->tp_version_tag; + trace[trace_length - 3].oparg = oparg; + trace[trace_length - 3].target = target; + trace[trace_length - 3].operand1 = init_func->func_version; + new_func = init_func; + new_code = (PyCodeObject *)init_func->func_code; + func_version = init_func->func_version; + } DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n", (int)func_version, new_func, new_code); if (new_code != NULL) { @@ -851,6 +892,7 @@ translate_bytecode_to_trace( // Increment IP to the return address instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); + trace_stack[trace_stack_depth - 1].is_dunder_init = opcode == CALL_ALLOC_AND_ENTER_INIT; _Py_BloomFilter_Add(dependencies, new_code); /* Set the operand to the callee's function or code object, * to assist optimization passes. @@ -894,6 +936,12 @@ translate_bytecode_to_trace( instr++; } + if (uop == _CHECK_AND_ALLOCATE_OBJECT) { + // Reserving a _NOP to insert a check later. + RESERVE_RAW(1, "_NOP"); + ADD_TO_TRACE(_NOP, 0, 0, 0); + } + // All other instructions ADD_TO_TRACE(uop, oparg, operand, target); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index f77a5aa35bdf82..bb8982cbd00b0f 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1954,6 +1954,10 @@ break; } + case _CHECK_INIT_MATCHES_VERSIONS: { + break; + } + case _CHECK_AND_ALLOCATE_OBJECT: { _Py_UopsSymbol **args; _Py_UopsSymbol *null; From bf470bca07ef7629c68402fbefdbd05686ab599c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 5 Dec 2024 20:19:02 +0800 Subject: [PATCH 2/5] Apply review suggestion --- Python/bytecodes.c | 3 +-- Python/executor_cases.c.h | 6 +----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e605414038fb3d..e228a1a65999aa 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3721,8 +3721,7 @@ dummy_func( PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable[0]); DEOPT_IF(!PyType_Check(callable_o)); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); - DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); + DEOPT_IF(tp->tp_version_tag != type_version); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index aa9fe0573c4104..7d5948c0391896 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4489,11 +4489,7 @@ JUMP_TO_JUMP_TARGET(); } PyTypeObject *tp = (PyTypeObject *)callable_o; - if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version) { + if (tp->tp_version_tag != type_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } From 2f7138b05b46cc24849dbde74392721ad3d0672a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Thu, 5 Dec 2024 23:19:45 +0800 Subject: [PATCH 3/5] Fix segfault --- Python/optimizer.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 82eec182dd180e..ea47a0e2f440f5 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -798,10 +798,9 @@ translate_bytecode_to_trace( } ADD_TO_TRACE(uop, oparg, operand, target); if (trace_stack[trace_stack_depth].is_dunder_init) { - RESERVE_RAW(3, "_Py_InitCleanup"); + RESERVE_RAW(2, "_Py_InitCleanup"); ADD_TO_TRACE(EXIT_INIT_CHECK, 0, 0, target); ADD_TO_TRACE(RETURN_VALUE, 0, 0, 0); - ADD_TO_TRACE(RESUME_CHECK, 0, 0, 0); } DPRINTF(2, "Returning to %s (%s:%d) at byte offset %d\n", @@ -856,12 +855,13 @@ translate_bytecode_to_trace( PyFunctionObject *init_func = (PyFunctionObject *)init; // Insert a guard that the __init__ is what we expect. // Then trace through the __init__. - assert(trace[trace_length - 3].opcode == _NOP); - trace[trace_length - 3].opcode = _CHECK_INIT_MATCHES_VERSIONS; - trace[trace_length - 3].operand0 = typ->tp_version_tag; - trace[trace_length - 3].oparg = oparg; - trace[trace_length - 3].target = target; - trace[trace_length - 3].operand1 = init_func->func_version; + _PyUOpInstruction *nop = &trace[trace_length - 3]; + assert(nop->opcode == _NOP); + nop->opcode = _CHECK_INIT_MATCHES_VERSIONS; + nop->operand0 = typ->tp_version_tag; + nop->oparg = oparg; + nop->target = target; + nop->operand1 = init_func->func_version; new_func = init_func; new_code = (PyCodeObject *)init_func->func_code; func_version = init_func->func_version; From 2620394b3ae9d0a75bdf64a7743f2c341979a321 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 26 Apr 2025 05:30:45 +0800 Subject: [PATCH 4/5] fix merge conflicts --- Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 6 +++--- Python/optimizer.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f6a4df9072aa85..2d8f47fff45284 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4051,8 +4051,8 @@ dummy_func( _CALL_TUPLE_1 + _CHECK_PERIODIC; - tier2 op(_CHECK_INIT_MATCHES_VERSIONS, (type_version/2, init_func_version/2, callable, null, args[oparg] -- callable, null, args[oparg])) { - PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable[0]); + tier2 op(_CHECK_INIT_MATCHES_VERSIONS, (type_version/2, init_func_version/2, callable, unused, unused[oparg] -- callable, unused, unused[oparg])) { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); DEOPT_IF(!PyType_Check(callable_o)); PyTypeObject *tp = (PyTypeObject *)callable_o; DEOPT_IF(tp->tp_version_tag != type_version); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index dad0d41b64ceca..745e539d952068 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5259,12 +5259,12 @@ } case _CHECK_INIT_MATCHES_VERSIONS: { - _PyStackRef *callable; + _PyStackRef callable; oparg = CURRENT_OPARG(); - callable = &stack_pointer[-2 - oparg]; + callable = stack_pointer[-2 - oparg]; uint32_t type_version = (uint32_t)CURRENT_OPERAND0(); uint32_t init_func_version = (uint32_t)CURRENT_OPERAND1(); - PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable[0]); + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); if (!PyType_Check(callable_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); diff --git a/Python/optimizer.c b/Python/optimizer.c index fd389acfb4a78f..b92732e0913965 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -808,7 +808,7 @@ translate_bytecode_to_trace( if (typ == NULL || !PyType_HasFeature(typ, Py_TPFLAGS_HEAPTYPE)) { DPRINTF(2, "Bailing due to dynamic target\n"); ADD_TO_TRACE(uop, oparg, 0, target); - ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } assert(PyType_HasFeature(typ, Py_TPFLAGS_HEAPTYPE)); @@ -819,7 +819,7 @@ translate_bytecode_to_trace( != ((PyCodeObject *)((PyFunctionObject *)init)->func_code)->co_version) { DPRINTF(2, "Bailing due to non-matching __init__\n"); ADD_TO_TRACE(uop, oparg, 0, target); - ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } PyFunctionObject *init_func = (PyFunctionObject *)init; From 5c22d13418f30a0add55ae38b8ee59292ee90988 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 26 Apr 2025 05:44:55 +0800 Subject: [PATCH 5/5] Add check --- Python/optimizer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index b92732e0913965..b4e1848e10c489 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -805,7 +805,9 @@ translate_bytecode_to_trace( if (opcode == CALL_ALLOC_AND_ENTER_INIT) { // In CALL_ALLOC_AND_ENTER_INIT, func_version is actually the type version. PyTypeObject *typ = _PyType_LookupByVersion(func_version); - if (typ == NULL || !PyType_HasFeature(typ, Py_TPFLAGS_HEAPTYPE)) { + if (typ == NULL || + !PyType_HasFeature(typ, Py_TPFLAGS_HEAPTYPE) || + !PyType_HasFeature(typ, Py_TPFLAGS_INLINE_VALUES)) { DPRINTF(2, "Bailing due to dynamic target\n"); ADD_TO_TRACE(uop, oparg, 0, target); ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);