From 605f930346309cb5a1ef2d46439fbb6122a26f01 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 27 Jun 2023 16:19:53 -0700 Subject: [PATCH 1/5] Write macro expansions to opcode_metadata.h --- Python/opcode_metadata.h | 11 +++++ Python/optimizer.c | 4 +- Tools/cases_generator/generate_cases.py | 54 +++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 5 deletions(-) diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index 4e31a5e69614bb..b44285599f3766 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -1131,10 +1131,18 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { [STORE_FAST] = { .nuops = 1, .uops = { { STORE_FAST, 0, 0 } } }, [POP_TOP] = { .nuops = 1, .uops = { { POP_TOP, 0, 0 } } }, [PUSH_NULL] = { .nuops = 1, .uops = { { PUSH_NULL, 0, 0 } } }, + [END_FOR] = { .nuops = 2, .uops = { { POP_TOP, 0, 0 }, { POP_TOP, 0, 0 } } }, [END_SEND] = { .nuops = 1, .uops = { { END_SEND, 0, 0 } } }, [UNARY_NEGATIVE] = { .nuops = 1, .uops = { { UNARY_NEGATIVE, 0, 0 } } }, [UNARY_NOT] = { .nuops = 1, .uops = { { UNARY_NOT, 0, 0 } } }, [UNARY_INVERT] = { .nuops = 1, .uops = { { UNARY_INVERT, 0, 0 } } }, + [BINARY_OP_MULTIPLY_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_MULTIPLY_INT, 0, 0 } } }, + [BINARY_OP_ADD_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_ADD_INT, 0, 0 } } }, + [BINARY_OP_SUBTRACT_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_SUBTRACT_INT, 0, 0 } } }, + [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, 0, 0 } } }, + [BINARY_OP_ADD_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_ADD_FLOAT, 0, 0 } } }, + [BINARY_OP_SUBTRACT_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_SUBTRACT_FLOAT, 0, 0 } } }, + [BINARY_OP_ADD_UNICODE] = { .nuops = 2, .uops = { { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_ADD_UNICODE, 0, 0 } } }, [BINARY_SLICE] = { .nuops = 1, .uops = { { BINARY_SLICE, 0, 0 } } }, [STORE_SLICE] = { .nuops = 1, .uops = { { STORE_SLICE, 0, 0 } } }, [BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_LIST_INT, 0, 0 } } }, @@ -1162,6 +1170,9 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { [DELETE_ATTR] = { .nuops = 1, .uops = { { DELETE_ATTR, 0, 0 } } }, [STORE_GLOBAL] = { .nuops = 1, .uops = { { STORE_GLOBAL, 0, 0 } } }, [DELETE_GLOBAL] = { .nuops = 1, .uops = { { DELETE_GLOBAL, 0, 0 } } }, + [LOAD_LOCALS] = { .nuops = 1, .uops = { { _LOAD_LOCALS, 0, 0 } } }, + [LOAD_NAME] = { .nuops = 2, .uops = { { _LOAD_LOCALS, 0, 0 }, { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } }, + [LOAD_FROM_DICT_OR_GLOBALS] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } }, [DELETE_DEREF] = { .nuops = 1, .uops = { { DELETE_DEREF, 0, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, [LOAD_DEREF] = { .nuops = 1, .uops = { { LOAD_DEREF, 0, 0 } } }, diff --git a/Python/optimizer.c b/Python/optimizer.c index 0a6cc5c92f04f5..52880a503abf35 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -474,6 +474,8 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) } opt->optimize = uop_optimize; opt->resume_threshold = UINT16_MAX; - opt->backedge_threshold = 0; + // Need at least 3 iterations to settle specializations. + // A few lower bits of the counter are reserved for other flags. + opt->backedge_threshold = 3 << OPTIMIZER_BITS_IN_COUNTER; return (PyObject *)opt; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index d66b28139e43fb..1adbd215377a2f 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1246,14 +1246,18 @@ def write_metadata(self) -> None: pass case parser.InstDef(name=name): instr = self.instrs[name] + # Since an 'op' is not a bytecode, it has no expansion if instr.kind != "op" and instr.is_viable_uop(): + # Double check there aren't any used cache effects. + # If this fails, see write_macro_expansions(). + assert not [e for e in instr.cache_effects if e.name != UNUSED], \ + (instr.name, instr.cache_effects) self.out.emit( f"[{name}] = " f"{{ .nuops = 1, .uops = {{ {{ {name}, 0, 0 }} }} }}," ) case parser.Macro(): - # TODO: emit expansion if all parts are viable uops - pass + self.write_macro_expansions(self.macro_instrs[thing.name]) case parser.Pseudo(): pass case _: @@ -1303,7 +1307,7 @@ def write_pseudo_instrs(self) -> None: def write_uop_defines(self) -> None: """Write '#define XXX NNN' for each uop""" self.out.emit("") - counter = 300 + counter = 300 # TODO: Avoid collision with pseudo instructions def add(name: str) -> None: nonlocal counter self.out.emit(f"#define {name} {counter}") @@ -1314,6 +1318,47 @@ def add(name: str) -> None: if instr.kind == "op" and instr.is_viable_uop(): add(instr.name) + def write_macro_expansions(self, mac: MacroInstruction) -> None: + """Write the macro expansions for a macro-instruction.""" + # TODO: Refactor to share code with write_cody(), is_viaible_uop(), etc. + offset = 0 # Cache effect offset + expansions: list[tuple[str, int, int]] = [] # [(name, size, offset), ...] + for part in mac.parts: + match part: + case parser.CacheEffect(): + offset += part.size + case Component(): + # All component instructions must be viable uops + if not part.instr.is_viable_uop(): + print(f"Part {part.instr.name} of {mac.name} is not a viable uop") + return + effect = None + effect_offset = 0 + for eff in part.instr.cache_effects: + if eff.name != UNUSED: + assert effect is None, \ + f"{mac.name} has multiple cache effects {eff.name}" + effect = eff + effect_offset = offset + offset += eff.size + if effect is not None: + # We don't seem to have examples of this yet; remove the print once we do + print(f"Part {part.instr.name} of {mac.name} has cache effect {effect.name}") + assert not part.instr.instr_flags.HAS_ARG_FLAG, \ + f"{mac.name} has both cache effect and arg flag" + exp = (part.instr.name, effect.size, effect_offset) + else: + exp = (part.instr.name, 0, 0) + expansions.append(exp) + case _: + typing.assert_never(part) + assert len(expansions) > 0, f"Macro {mac.name} has empty expansion?!" + pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions] + self.out.emit( + f"[{mac.name}] = " + f"{{ .nuops = {len(expansions)}, .uops = {{ {', '.join(pieces)} }} }}," + ) + def emit_metadata_entry( self, name: str, fmt: str, flags: InstructionFlags ) -> None: @@ -1379,6 +1424,7 @@ def write_executor_instructions(self) -> None: for thing in self.everything: match thing: case OverriddenInstructionPlaceHolder(): + # TODO: Is this helpful? self.write_overridden_instr_place_holder(thing) case parser.InstDef(): instr = self.instrs[thing.name] @@ -1388,7 +1434,7 @@ def write_executor_instructions(self) -> None: instr.write(self.out, tier=TIER_TWO) self.out.emit("break;") case parser.Macro(): - pass # TODO + pass case parser.Pseudo(): pass case _: From bbc59518f525bb8c78fa20f1c5d5ca0b3ce3fc93 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 27 Jun 2023 16:45:10 -0700 Subject: [PATCH 2/5] Print uop names in uops debug output This would be simpler if the numeric opcode values were generated by generate_cases.py. --- Python/ceval.c | 6 +++--- Python/opcode_metadata.h | 17 +++++++++++++++++ Python/optimizer.c | 4 ++-- Tools/cases_generator/generate_cases.py | 12 ++++++++---- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index e19860d04a7914..65b6f2481665de 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2817,10 +2817,10 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject oparg = (int)operand; #ifdef LLTRACE if (lltrace >= 3) { - const char *opname = opcode < 256 ? _PyOpcode_OpName[opcode] : ""; + const char *opname = opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode]; int stack_level = (int)(stack_pointer - _PyFrame_Stackbase(frame)); - fprintf(stderr, " uop %s %d, operand %" PRIu64 ", stack_level %d\n", - opname, opcode, operand, stack_level); + fprintf(stderr, " uop %s, operand %" PRIu64 ", stack_level %d\n", + opname, operand, stack_level); } #endif pc++; diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index b44285599f3766..614192a1a59adc 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -913,6 +913,7 @@ struct opcode_macro_expansion { #ifndef NEED_OPCODE_METADATA extern const struct opcode_metadata _PyOpcode_opcode_metadata[512]; extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256]; +extern const char *_PyOpcode_uop_name[512]; #else const struct opcode_metadata _PyOpcode_opcode_metadata[512] = { [NOP] = { true, INSTR_FMT_IX, 0 }, @@ -1218,4 +1219,20 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { [COPY] = { .nuops = 1, .uops = { { COPY, 0, 0 } } }, [SWAP] = { .nuops = 1, .uops = { { SWAP, 0, 0 } } }, }; +const char *_PyOpcode_uop_name[512] = { + [300] = "EXIT_TRACE", + [301] = "SET_IP", + [302] = "_GUARD_BOTH_INT", + [303] = "_BINARY_OP_MULTIPLY_INT", + [304] = "_BINARY_OP_ADD_INT", + [305] = "_BINARY_OP_SUBTRACT_INT", + [306] = "_GUARD_BOTH_FLOAT", + [307] = "_BINARY_OP_MULTIPLY_FLOAT", + [308] = "_BINARY_OP_ADD_FLOAT", + [309] = "_BINARY_OP_SUBTRACT_FLOAT", + [310] = "_GUARD_BOTH_UNICODE", + [311] = "_BINARY_OP_ADD_UNICODE", + [312] = "_LOAD_LOCALS", + [313] = "_LOAD_FROM_DICT_OR_GLOBALS", +}; #endif diff --git a/Python/optimizer.c b/Python/optimizer.c index 52880a503abf35..9d77ab4ff879bb 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -325,8 +325,8 @@ translate_bytecode_to_trace( } #define ADD_TO_TRACE(OPCODE, OPERAND) \ if (lltrace >= 2) { \ - const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : ""; \ - fprintf(stderr, " ADD_TO_TRACE(%s %d, %" PRIu64 ")\n", opname, (OPCODE), (uint64_t)(OPERAND)); \ + const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)]; \ + fprintf(stderr, " ADD_TO_TRACE(%s, %" PRIu64 ")\n", opname, (uint64_t)(OPERAND)); \ } \ trace[trace_length].opcode = (OPCODE); \ trace[trace_length].operand = (OPERAND); \ diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 1adbd215377a2f..957917bea34906 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1182,7 +1182,8 @@ def write_metadata(self) -> None: self.write_pseudo_instrs() - self.write_uop_defines() + self.out.emit("") + self.write_uop_items(lambda name, counter: f"#define {name} {counter}") self.write_stack_effect_functions() @@ -1213,6 +1214,7 @@ def write_metadata(self) -> None: self.out.emit("#ifndef NEED_OPCODE_METADATA") self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];") self.out.emit("extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];") + self.out.emit("extern const char *_PyOpcode_uop_name[512];") self.out.emit("#else") self.out.emit("const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {") @@ -1263,6 +1265,9 @@ def write_metadata(self) -> None: case _: typing.assert_never(thing) + with self.out.block("const char *_PyOpcode_uop_name[512] =", ";"): + self.write_uop_items(lambda name, counter: f"[{counter}] = \"{name}\",") + self.out.emit("#endif") with open(self.pymetadata_filename, "w") as f: @@ -1304,13 +1309,12 @@ def write_pseudo_instrs(self) -> None: self.out.emit(f" ((OP) == {op}) || \\") self.out.emit(f" 0") - def write_uop_defines(self) -> None: + def write_uop_items(self, make_text: typing.Callable[[str, int], str]) -> None: """Write '#define XXX NNN' for each uop""" - self.out.emit("") counter = 300 # TODO: Avoid collision with pseudo instructions def add(name: str) -> None: nonlocal counter - self.out.emit(f"#define {name} {counter}") + self.out.emit(make_text(name, counter)) counter += 1 add("EXIT_TRACE") add("SET_IP") From 60403c9dc88afdb33b234cbf9cddc51862ab1c0f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 27 Jun 2023 16:50:47 -0700 Subject: [PATCH 3/5] Only output uop names in debug mode --- Python/opcode_metadata.h | 4 ++++ Tools/cases_generator/generate_cases.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index 614192a1a59adc..1bd3848a1dc633 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -913,7 +913,9 @@ struct opcode_macro_expansion { #ifndef NEED_OPCODE_METADATA extern const struct opcode_metadata _PyOpcode_opcode_metadata[512]; extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256]; +#ifdef Py_DEBUG extern const char *_PyOpcode_uop_name[512]; +#endif #else const struct opcode_metadata _PyOpcode_opcode_metadata[512] = { [NOP] = { true, INSTR_FMT_IX, 0 }, @@ -1219,6 +1221,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { [COPY] = { .nuops = 1, .uops = { { COPY, 0, 0 } } }, [SWAP] = { .nuops = 1, .uops = { { SWAP, 0, 0 } } }, }; +#ifdef Py_DEBUG const char *_PyOpcode_uop_name[512] = { [300] = "EXIT_TRACE", [301] = "SET_IP", @@ -1236,3 +1239,4 @@ const char *_PyOpcode_uop_name[512] = { [313] = "_LOAD_FROM_DICT_OR_GLOBALS", }; #endif +#endif diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 957917bea34906..0b6ad839fb48c2 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1214,7 +1214,9 @@ def write_metadata(self) -> None: self.out.emit("#ifndef NEED_OPCODE_METADATA") self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];") self.out.emit("extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];") + self.out.emit("#ifdef Py_DEBUG") self.out.emit("extern const char *_PyOpcode_uop_name[512];") + self.out.emit("#endif") self.out.emit("#else") self.out.emit("const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {") @@ -1265,8 +1267,10 @@ def write_metadata(self) -> None: case _: typing.assert_never(thing) + self.out.emit("#ifdef Py_DEBUG") with self.out.block("const char *_PyOpcode_uop_name[512] =", ";"): self.write_uop_items(lambda name, counter: f"[{counter}] = \"{name}\",") + self.out.emit("#endif") self.out.emit("#endif") From 9e35908e77512338843821ab7b1db544835b8c92 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 27 Jun 2023 20:33:58 -0700 Subject: [PATCH 4/5] Attempt to silence c-analyzer (Why can't I run this locally on macOS?) --- Python/opcode_metadata.h | 4 ++-- Tools/cases_generator/generate_cases.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index 1bd3848a1dc633..cfdc09294e9dee 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -914,7 +914,7 @@ struct opcode_macro_expansion { extern const struct opcode_metadata _PyOpcode_opcode_metadata[512]; extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256]; #ifdef Py_DEBUG -extern const char *_PyOpcode_uop_name[512]; +extern const char * const _PyOpcode_uop_name[512]; #endif #else const struct opcode_metadata _PyOpcode_opcode_metadata[512] = { @@ -1222,7 +1222,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { [SWAP] = { .nuops = 1, .uops = { { SWAP, 0, 0 } } }, }; #ifdef Py_DEBUG -const char *_PyOpcode_uop_name[512] = { +const char * const _PyOpcode_uop_name[512] = { [300] = "EXIT_TRACE", [301] = "SET_IP", [302] = "_GUARD_BOTH_INT", diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 0b6ad839fb48c2..9fcdcbef96a319 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1215,7 +1215,7 @@ def write_metadata(self) -> None: self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];") self.out.emit("extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];") self.out.emit("#ifdef Py_DEBUG") - self.out.emit("extern const char *_PyOpcode_uop_name[512];") + self.out.emit("extern const char * const _PyOpcode_uop_name[512];") self.out.emit("#endif") self.out.emit("#else") @@ -1268,7 +1268,7 @@ def write_metadata(self) -> None: typing.assert_never(thing) self.out.emit("#ifdef Py_DEBUG") - with self.out.block("const char *_PyOpcode_uop_name[512] =", ";"): + with self.out.block("const char * const _PyOpcode_uop_name[512] =", ";"): self.write_uop_items(lambda name, counter: f"[{counter}] = \"{name}\",") self.out.emit("#endif") From ea90c43d9d511847f2c591ada61aa967144cdce5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 28 Jun 2023 10:57:35 -0700 Subject: [PATCH 5/5] Make Instruction and MacroInstruction pre-compute active cache effects --- Tools/cases_generator/generate_cases.py | 175 +++++++++++------------- 1 file changed, 81 insertions(+), 94 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 9fcdcbef96a319..7c99e3b929b463 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -300,6 +300,13 @@ def emit_macros(cls, out: Formatter): f"(_PyOpcode_opcode_metadata[(OP)].flags & ({name}))") +@dataclasses.dataclass +class ActiveCacheEffect: + """Wraps a CacheEffect that is actually used, in context.""" + effect: parser.CacheEffect + offset: int + + FORBIDDEN_NAMES_IN_UOPS = ( "resume_with_error", # Proxy for "goto", which isn't an IDENTIFIER "unbound_local_error", @@ -344,6 +351,7 @@ class Instruction: unmoved_names: frozenset[str] instr_fmt: str instr_flags: InstructionFlags + active_caches: list[ActiveCacheEffect] # Set later family: parser.Family | None = None @@ -375,15 +383,19 @@ def __init__(self, inst: parser.InstDef): self.instr_flags = InstructionFlags.fromInstruction(inst) + self.active_caches = [] + offset = 0 + for effect in self.cache_effects: + if effect.name != UNUSED: + self.active_caches.append(ActiveCacheEffect(effect, offset)) + offset += effect.size + if self.instr_flags.HAS_ARG_FLAG: fmt = "IB" else: fmt = "IX" - cache = "C" - for ce in self.cache_effects: - for _ in range(ce.size): - fmt += cache - cache = "0" + if offset: + fmt += "C" + "0"*(offset-1) self.instr_fmt = fmt def is_viable_uop(self) -> bool: @@ -392,18 +404,11 @@ def is_viable_uop(self) -> bool: return False if self.instr_flags.HAS_ARG_FLAG: # If the instruction uses oparg, it cannot use any caches - for c in self.cache_effects: - if c.name != UNUSED: - return False + if self.active_caches: + return False else: # If it doesn't use oparg, it can have one cache entry - caches: list[parser.CacheEffect] = [] - cache_offset = 0 - for c in self.cache_effects: - if c.name != UNUSED: - caches.append(c) - cache_offset += c.size - if len(caches) > 1: + if len(self.active_caches) > 1: return False for forbidden in FORBIDDEN_NAMES_IN_UOPS: # TODO: Don't check in '#ifdef ENABLE_SPECIALIZATION' regions @@ -458,7 +463,7 @@ def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: # out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;") - self.write_body(out, 0, tier=tier) + self.write_body(out, 0, self.active_caches, tier=tier) # Skip the rest if the block always exits if self.always_exits: @@ -492,33 +497,30 @@ def write_body( self, out: Formatter, dedent: int, - cache_adjust: int = 0, + active_caches: list[ActiveCacheEffect], tier: Tiers = TIER_ONE, ) -> None: """Write the instruction body.""" # Write cache effect variable declarations and initializations - cache_offset = cache_adjust - for ceffect in self.cache_effects: - if ceffect.name != UNUSED: - bits = ceffect.size * BITS_PER_CODE_UNIT - if bits == 64: - # NOTE: We assume that 64-bit data in the cache - # is always an object pointer. - # If this becomes false, we need a way to specify - # syntactically what type the cache data is. - typ = "PyObject *" - func = "read_obj" - else: - typ = f"uint{bits}_t " - func = f"read_u{bits}" - if tier == TIER_ONE: - out.emit( - f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);" - ) - else: - out.emit(f"{typ}{ceffect.name} = operand;") - cache_offset += ceffect.size - assert cache_offset == self.cache_offset + cache_adjust + for active in active_caches: + ceffect = active.effect + bits = ceffect.size * BITS_PER_CODE_UNIT + if bits == 64: + # NOTE: We assume that 64-bit data in the cache + # is always an object pointer. + # If this becomes false, we need a way to specify + # syntactically what type the cache data is. + typ = "PyObject *" + func = "read_obj" + else: + typ = f"uint{bits}_t " + func = f"read_u{bits}" + if tier == TIER_ONE: + out.emit( + f"{typ}{ceffect.name} = {func}(&next_instr[{active.offset}].cache);" + ) + else: + out.emit(f"{typ}{ceffect.name} = operand;") # Write the body, substituting a goto for ERROR_IF() and other stuff assert dedent <= 0 @@ -583,8 +585,9 @@ class Component: instr: Instruction input_mapping: StackEffectMapping output_mapping: StackEffectMapping + active_caches: list[ActiveCacheEffect] - def write_body(self, out: Formatter, cache_adjust: int) -> None: + def write_body(self, out: Formatter) -> None: with out.block(""): input_names = {ieffect.name for _, ieffect in self.input_mapping} for var, ieffect in self.input_mapping: @@ -593,7 +596,7 @@ def write_body(self, out: Formatter, cache_adjust: int) -> None: if oeffect.name not in input_names: out.declare(oeffect, None) - self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust) + self.instr.write_body(out, -4, self.active_caches) for var, oeffect in self.output_mapping: out.assign(var, oeffect) @@ -611,6 +614,7 @@ class MacroInstruction: instr_flags: InstructionFlags macro: parser.Macro parts: list[Component | parser.CacheEffect] + cache_offset: int predicted: bool = False @@ -873,11 +877,11 @@ def effect_counts(self, name: str) -> tuple[int, int, int]: cache = instr.cache_offset input = len(instr.input_effects) output = len(instr.output_effects) - elif macro := self.macro_instrs.get(name): - cache, input, output = 0, 0, 0 - for part in macro.parts: + elif mac := self.macro_instrs.get(name): + cache = mac.cache_offset + input, output = 0, 0 + for part in mac.parts: if isinstance(part, Component): - cache += part.instr.cache_offset # A component may pop what the previous component pushed, # so we offset the input/output counts by that. delta_i = len(part.instr.input_effects) @@ -885,9 +889,6 @@ def effect_counts(self, name: str) -> tuple[int, int, int]: offset = min(delta_i, output) input += delta_i - offset output += delta_o - offset - else: - assert isinstance(part, parser.CacheEffect), part - cache += part.size else: assert False, f"Unknown instruction {name!r}" return cache, input, output @@ -906,29 +907,25 @@ def analyze_macro(self, macro: parser.Macro) -> MacroInstruction: stack, initial_sp = self.stack_analysis(components) sp = initial_sp parts: list[Component | parser.CacheEffect] = [] - format = "IB" flags = InstructionFlags.newEmpty() - cache = "C" + offset = 0 for component in components: match component: case parser.CacheEffect() as ceffect: parts.append(ceffect) - for _ in range(ceffect.size): - format += cache - cache = "0" + offset += ceffect.size case Instruction() as instr: - part, sp = self.analyze_instruction(instr, stack, sp) + part, sp, offset = self.analyze_instruction(instr, stack, sp, offset) parts.append(part) - for ce in instr.cache_effects: - for _ in range(ce.size): - format += cache - cache = "0" flags.add(instr.instr_flags) case _: typing.assert_never(component) final_sp = sp + format = "IB" + if offset: + format += "C" + "0"*(offset-1) return MacroInstruction( - macro.name, stack, initial_sp, final_sp, format, flags, macro, parts + macro.name, stack, initial_sp, final_sp, format, flags, macro, parts, offset ) def analyze_pseudo(self, pseudo: parser.Pseudo) -> PseudoInstruction: @@ -941,8 +938,8 @@ def analyze_pseudo(self, pseudo: parser.Pseudo) -> PseudoInstruction: return PseudoInstruction(pseudo.name, targets, fmts[0], targets[0].instr_flags) def analyze_instruction( - self, instr: Instruction, stack: list[StackEffect], sp: int - ) -> tuple[Component, int]: + self, instr: Instruction, stack: list[StackEffect], sp: int, offset: int + ) -> tuple[Component, int, int]: input_mapping: StackEffectMapping = [] for ieffect in reversed(instr.input_effects): sp -= 1 @@ -951,7 +948,12 @@ def analyze_instruction( for oeffect in instr.output_effects: output_mapping.append((stack[sp], oeffect)) sp += 1 - return Component(instr, input_mapping, output_mapping), sp + active_effects: list[ActiveCacheEffect] = [] + for ceffect in instr.cache_effects: + if ceffect.name != UNUSED: + active_effects.append(ActiveCacheEffect(ceffect, offset)) + offset += ceffect.size + return Component(instr, input_mapping, output_mapping, active_effects), sp, offset def check_macro_components( self, macro: parser.Macro @@ -1030,7 +1032,7 @@ def stack_analysis( def get_stack_effect_info( self, thing: parser.InstDef | parser.Macro | parser.Pseudo - ) -> tuple[AnyInstruction | None, str, str]: + ) -> tuple[AnyInstruction | None, str | None, str | None]: def effect_str(effects: list[StackEffect]) -> str: n_effect, sym_effect = list_effect_size(effects) if sym_effect: @@ -1108,6 +1110,7 @@ def write_stack_effect_functions(self) -> None: continue instr, popped, pushed = self.get_stack_effect_info(thing) if instr is not None: + assert popped is not None and pushed is not None popped_data.append((instr, popped)) pushed_data.append((instr, pushed)) @@ -1254,8 +1257,7 @@ def write_metadata(self) -> None: if instr.kind != "op" and instr.is_viable_uop(): # Double check there aren't any used cache effects. # If this fails, see write_macro_expansions(). - assert not [e for e in instr.cache_effects if e.name != UNUSED], \ - (instr.name, instr.cache_effects) + assert not instr.active_caches, (instr.name, instr.cache_effects) self.out.emit( f"[{name}] = " f"{{ .nuops = 1, .uops = {{ {{ {name}, 0, 0 }} }} }}," @@ -1332,34 +1334,19 @@ def write_macro_expansions(self, mac: MacroInstruction) -> None: offset = 0 # Cache effect offset expansions: list[tuple[str, int, int]] = [] # [(name, size, offset), ...] for part in mac.parts: - match part: - case parser.CacheEffect(): - offset += part.size - case Component(): - # All component instructions must be viable uops - if not part.instr.is_viable_uop(): - print(f"Part {part.instr.name} of {mac.name} is not a viable uop") - return - effect = None - effect_offset = 0 - for eff in part.instr.cache_effects: - if eff.name != UNUSED: - assert effect is None, \ - f"{mac.name} has multiple cache effects {eff.name}" - effect = eff - effect_offset = offset - offset += eff.size - if effect is not None: - # We don't seem to have examples of this yet; remove the print once we do - print(f"Part {part.instr.name} of {mac.name} has cache effect {effect.name}") - assert not part.instr.instr_flags.HAS_ARG_FLAG, \ - f"{mac.name} has both cache effect and arg flag" - exp = (part.instr.name, effect.size, effect_offset) - else: - exp = (part.instr.name, 0, 0) - expansions.append(exp) - case _: - typing.assert_never(part) + if isinstance(part, Component): + # All component instructions must be viable uops + if not part.instr.is_viable_uop(): + print(f"NOTE: Part {part.instr.name} of {mac.name} is not a viable uop") + return + if part.instr.instr_flags.HAS_ARG_FLAG or not part.active_caches: + size, offset = 0, 0 + else: + # If this assert triggers, is_viable_uops() lied + assert len(part.active_caches) == 1, (mac.name, part.instr.name) + cache = part.active_caches[0] + size, offset = cache.effect.size, cache.offset + expansions.append((part.instr.name, size, offset)) assert len(expansions) > 0, f"Macro {mac.name} has empty expansion?!" pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions] self.out.emit( @@ -1483,7 +1470,7 @@ def write_macro(self, mac: MacroInstruction) -> None: cache_adjust += size case Component() as comp: last_instr = comp.instr - comp.write_body(self.out, cache_adjust) + comp.write_body(self.out) cache_adjust += comp.instr.cache_offset if cache_adjust: