From 0c109e0c2d0af5adeed47eefe3701ac1d84c3877 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 26 Apr 2025 03:39:05 +0800 Subject: [PATCH] Add type attributes to the optimizer DSL --- Python/optimizer_bytecodes.c | 48 +++++++------------- Python/optimizer_cases.c.h | 3 +- Tools/cases_generator/analyzer.py | 7 +-- Tools/cases_generator/optimizer_generator.py | 18 ++++++++ Tools/cases_generator/parser.py | 2 + Tools/cases_generator/parsing.py | 41 +++++++++++++++-- 6 files changed, 80 insertions(+), 39 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 040e54479b722a..50ccd42c4af381 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -104,18 +104,16 @@ dummy_func(void) { res = sym_new_null(ctx); } - op(_GUARD_TOS_INT, (tos -- tos)) { + op(_GUARD_TOS_INT, (tos -- type(&PyLong_Type) tos)) { if (sym_matches_type(tos, &PyLong_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(tos, &PyLong_Type); } - op(_GUARD_NOS_INT, (nos, unused -- nos, unused)) { + op(_GUARD_NOS_INT, (nos, unused -- type(&PyLong_Type) nos, unused)) { if (sym_matches_type(nos, &PyLong_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyLong_Type); } op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { @@ -141,18 +139,16 @@ dummy_func(void) { } } - op(_GUARD_TOS_FLOAT, (tos -- tos)) { + op(_GUARD_TOS_FLOAT, (tos -- type(&PyFloat_Type) tos)) { if (sym_matches_type(tos, &PyFloat_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(tos, &PyFloat_Type); } - op(_GUARD_NOS_FLOAT, (nos, unused -- nos, unused)) { + op(_GUARD_NOS_FLOAT, (nos, unused -- type(&PyFloat_Type) nos, unused)) { if (sym_matches_type(nos, &PyFloat_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyFloat_Type); } op(_BINARY_OP, (left, right -- res)) { @@ -408,18 +404,16 @@ dummy_func(void) { } } - op(_GUARD_NOS_UNICODE, (nos, unused -- nos, unused)) { + op(_GUARD_NOS_UNICODE, (nos, unused -- type(&PyUnicode_Type) nos, unused)) { if (sym_matches_type(nos, &PyUnicode_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyUnicode_Type); } - op(_GUARD_TOS_UNICODE, (value -- value)) { + op(_GUARD_TOS_UNICODE, (value -- type(&PyUnicode_Type) value)) { if (sym_matches_type(value, &PyUnicode_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(value, &PyUnicode_Type); } op(_TO_BOOL_STR, (value -- res)) { @@ -429,8 +423,7 @@ dummy_func(void) { } } - op(_UNARY_NOT, (value -- res)) { - sym_set_type(value, &PyBool_Type); + op(_UNARY_NOT, (type(&PyBool_Type) value -- res)) { res = sym_new_truthiness(ctx, value, false); } @@ -631,13 +624,12 @@ dummy_func(void) { self_or_null = sym_new_not_null(ctx); } - op(_CHECK_FUNCTION_VERSION, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { + op(_CHECK_FUNCTION_VERSION, (func_version/2, callable, self_or_null, unused[oparg] -- type(&PyFunction_Type) callable, self_or_null, unused[oparg])) { if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyFunction_Type)) { assert(PyFunction_Check(sym_get_const(ctx, callable))); REPLACE_OP(this_instr, _CHECK_FUNCTION_VERSION_INLINE, 0, func_version); this_instr->operand1 = (uintptr_t)sym_get_const(ctx, callable); } - sym_set_type(callable, &PyFunction_Type); } op(_CHECK_FUNCTION_EXACT_ARGS, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { @@ -653,9 +645,9 @@ dummy_func(void) { } } - op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) { + op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- type(&PyMethod_Type) callable, null, unused[oparg])) { sym_set_null(null); - sym_set_type(callable, &PyMethod_Type); + (void)callable; } op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { @@ -977,46 +969,40 @@ dummy_func(void) { } } - op(_GUARD_TOS_LIST, (tos -- tos)) { + op(_GUARD_TOS_LIST, (tos -- type(&PyList_Type) tos)) { if (sym_matches_type(tos, &PyList_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(tos, &PyList_Type); } - op(_GUARD_NOS_LIST, (nos, unused -- nos, unused)) { + op(_GUARD_NOS_LIST, (nos, unused -- type(&PyList_Type) nos, unused)) { if (sym_matches_type(nos, &PyList_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyList_Type); } - op(_GUARD_TOS_TUPLE, (tos -- tos)) { + op(_GUARD_TOS_TUPLE, (tos -- type(&PyTuple_Type) tos)) { if (sym_matches_type(tos, &PyTuple_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(tos, &PyTuple_Type); } - op(_GUARD_NOS_TUPLE, (nos, unused -- nos, unused)) { + op(_GUARD_NOS_TUPLE, (nos, unused -- type(&PyTuple_Type) nos, unused)) { if (sym_matches_type(nos, &PyTuple_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyTuple_Type); } - op(_GUARD_TOS_DICT, (tos -- tos)) { + op(_GUARD_TOS_DICT, (tos -- type(&PyDict_Type) tos)) { if (sym_matches_type(tos, &PyDict_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(tos, &PyDict_Type); } - op(_GUARD_NOS_DICT, (nos, unused -- nos, unused)) { + op(_GUARD_NOS_DICT, (nos, unused -- type(&PyDict_Type) nos, unused)) { if (sym_matches_type(nos, &PyDict_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type(nos, &PyDict_Type); } op(_GUARD_TOS_ANY_SET, (tos -- tos)) { @@ -1055,7 +1041,7 @@ dummy_func(void) { sym_set_const(callable, (PyObject *)&PyUnicode_Type); } - op(_CALL_LEN, (callable[1], self_or_null[1], args[oparg] -- res)) { + op(_CALL_LEN, (callable[1], self_or_null[1], args[oparg] -- type(res)) { res = sym_new_type(ctx, &PyLong_Type); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 9a5a362ec199a9..2a931cd12e42a1 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -151,8 +151,8 @@ JitOptSymbol *value; JitOptSymbol *res; value = stack_pointer[-1]; - sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); + sym_set_type(value, &PyBool_Type); stack_pointer[-1] = res; break; } @@ -1746,6 +1746,7 @@ null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; sym_set_null(null); + (void)callable; sym_set_type(callable, &PyMethod_Type); break; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index a217d7136a5401..ad496af20845c4 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -5,7 +5,7 @@ import re from typing import Optional, Callable -from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, WhileStmt +from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, WhileStmt, StackAttribute @dataclass class EscapingCall: @@ -137,13 +137,14 @@ class StackItem: name: str type: str | None size: str + attributes: list[StackAttribute] peek: bool = False used: bool = False def __str__(self) -> str: size = f"[{self.size}]" if self.size else "" type = "" if self.type is None else f"{self.type} " - return f"{type}{self.name}{size} {self.peek}" + return f"{self.attributes} {type}{self.name}{size} {self.peek}" def is_array(self) -> bool: return self.size != "" @@ -345,7 +346,7 @@ def override_error( def convert_stack_item( item: parser.StackEffect, replace_op_arg_1: str | None ) -> StackItem: - return StackItem(item.name, item.type, item.size) + return StackItem(item.name, item.type, item.size, item.attributes) def check_unused(stack: list[StackItem], input_names: dict[str, lexer.Token]) -> None: "Unused items cannot be on the stack above used, non-peek items" diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index 7a32275347e896..ae5498b9cf5f21 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -24,6 +24,7 @@ from typing import TextIO from lexer import Token from stack import Local, Stack, StackError, Storage +from parser import TYPE DEFAULT_OUTPUT = ROOT / "Python/optimizer_cases.c.h" DEFAULT_ABSTRACT_INPUT = (ROOT / "Python/optimizer_bytecodes.c").absolute().as_posix() @@ -111,6 +112,19 @@ def goto_label(self, goto: Token, label: Token, storage: Storage) -> None: self.out.emit(goto) self.out.emit(label) +def get_type(item: StackItem) -> str | None: + for attribute in item.attributes: + if attribute.ident == TYPE: + return attribute.expr + return None + +def emit_sym_set_type_for_stack_effect(emitter: Emitter, items: list[StackItem]) -> None: + for var in items: + typ = get_type(var) + if typ is not None: + emitter.emit(f"sym_set_type({var.name}, {typ});\n") + + def write_uop( override: Uop | None, uop: Uop, @@ -146,6 +160,10 @@ def write_uop( for var in storage.inputs: # type: ignore[possibly-undefined] var.in_local = False _, storage = emitter.emit_tokens(override, storage, None, False) + # Emit type effects. + out.start_line() + emit_sym_set_type_for_stack_effect(emitter, override.stack.inputs) + emit_sym_set_type_for_stack_effect(emitter, override.stack.outputs) out.start_line() storage.flush(out) else: diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 4ec46d8cac6e4b..38853f6c592166 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -18,6 +18,8 @@ WhileStmt, BlockStmt, MacroIfStmt, + StackAttribute, + TYPE, ) import pprint diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 9c9b0053a5928b..411ed17f7e5715 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -244,15 +244,24 @@ def accept(self, visitor: Visitor) -> None: __hash__ = object.__hash__ +@dataclass +class StackAttribute(Node): + ident: str + expr: str + + def __str__(self): + return f"{self.ident}({self.expr})" + @dataclass class StackEffect(Node): name: str = field(compare=False) # __eq__ only uses type, cond, size + attributes: list[StackAttribute] = field(compare=False) type: str = "" # Optional `:type` size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond def __repr__(self) -> str: - items = [self.name, self.type, self.size] + items = [self.attributes, self.name, self.type, self.size] while items and items[-1] == "": del items[-1] return f"StackEffect({', '.join(repr(item) for item in items)})" @@ -274,6 +283,14 @@ class OpName(Node): name: str +TYPE = "type" +# We have to do this at the parsing stage and not +# lexing stage as we want to allow this to be used as +# a normal identifier in C code. +STACK_ATTRIBUTES = { + TYPE, +} + InputEffect = StackEffect | CacheEffect OutputEffect = StackEffect UOp = OpName | CacheEffect @@ -458,10 +475,26 @@ def cache_effect(self) -> CacheEffect | None: return CacheEffect(tkn.text, size) return None + def stack_attributes(self) -> list[StackAttribute]: + # IDENTIFIER '(' expression ')' + res = [] + while tkn := self.expect(lx.IDENTIFIER): + if self.expect(lx.LPAREN): + if tkn.text not in STACK_ATTRIBUTES: + raise self.make_syntax_error(f"Stack attribute {tkn.text} is not recognized.") + expr = self.expression() + self.require(lx.RPAREN) + res.append(StackAttribute(tkn.text.strip(), expr.text.strip())) + else: + self.backup() + break + return res + @contextual def stack_effect(self) -> StackEffect | None: - # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] - # | IDENTIFIER '[' expression ']' + # stack_attributes IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] + # | stack_attributes IDENTIFIER '[' expression ']' + stack_attributes = self.stack_attributes() if tkn := self.expect(lx.IDENTIFIER): type_text = "" if self.expect(lx.COLON): @@ -476,7 +509,7 @@ def stack_effect(self) -> StackEffect | None: raise self.make_syntax_error("Expected expression") self.require(lx.RBRACKET) size_text = size.text.strip() - return StackEffect(tkn.text, type_text, size_text) + return StackEffect(tkn.text, stack_attributes, type_text, size_text) return None @contextual