From 1abe63601ab66eee7e0358f72b432e22c5e40cb4 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 11 Jul 2025 18:23:31 +0900 Subject: [PATCH 1/5] SymbolTable::varnames --- compiler/codegen/src/symboltable.rs | 38 ++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 66dbff326a..0b13522ab8 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -45,6 +45,9 @@ pub struct SymbolTable { /// A list of sub-scopes in the order as found in the /// AST nodes. pub sub_tables: Vec, + + /// Variable names in definition order (parameters first, then locals) + pub varnames: Vec, } impl SymbolTable { @@ -56,6 +59,7 @@ impl SymbolTable { is_nested, symbols: IndexMap::default(), sub_tables: vec![], + varnames: Vec::new(), } } @@ -573,6 +577,8 @@ struct SymbolTableBuilder<'src> { tables: Vec, future_annotations: bool, source_code: SourceCode<'src>, + // Current scope's varnames being collected (temporary storage) + current_varnames: Vec, } /// Enum to indicate in what mode an expression @@ -595,6 +601,7 @@ impl<'src> SymbolTableBuilder<'src> { tables: vec![], future_annotations: false, source_code, + current_varnames: Vec::new(), }; this.enter_scope("top", SymbolTableType::Module, 0); this @@ -605,6 +612,8 @@ impl SymbolTableBuilder<'_> { fn finish(mut self) -> Result { assert_eq!(self.tables.len(), 1); let mut symbol_table = self.tables.pop().unwrap(); + // Save varnames for the top-level module scope + symbol_table.varnames = self.current_varnames; analyze_symbol_table(&mut symbol_table)?; Ok(symbol_table) } @@ -621,7 +630,9 @@ impl SymbolTableBuilder<'_> { /// Pop symbol table and add to sub table of parent table. fn leave_scope(&mut self) { - let table = self.tables.pop().unwrap(); + let mut table = self.tables.pop().unwrap(); + // Save the collected varnames to the symbol table + table.varnames = std::mem::take(&mut self.current_varnames); self.tables.last_mut().unwrap().sub_tables.push(table); } @@ -1533,18 +1544,43 @@ impl SymbolTableBuilder<'_> { } SymbolUsage::Parameter => { flags.insert(SymbolFlags::PARAMETER); + // Parameters are always added to varnames first + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } } SymbolUsage::AnnotationParameter => { flags.insert(SymbolFlags::PARAMETER | SymbolFlags::ANNOTATED); + // Annotated parameters are also added to varnames + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } } SymbolUsage::AnnotationAssigned => { flags.insert(SymbolFlags::ASSIGNED | SymbolFlags::ANNOTATED); } SymbolUsage::Assigned => { flags.insert(SymbolFlags::ASSIGNED); + // Local variables (assigned) are added to varnames if they are local scope + // and not already in varnames + if symbol.scope == SymbolScope::Local { + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } + } } SymbolUsage::AssignedNamedExprInComprehension => { flags.insert(SymbolFlags::ASSIGNED | SymbolFlags::ASSIGNED_IN_COMPREHENSION); + // Named expressions in comprehensions might also be locals + if symbol.scope == SymbolScope::Local { + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } + } } SymbolUsage::Global => { symbol.scope = SymbolScope::GlobalExplicit; From db374ae49789e0181c29d6cb44baaaa03ca8ca9a Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 11 Jul 2025 19:23:30 +0900 Subject: [PATCH 2/5] varname_cache copies it --- compiler/codegen/src/compile.rs | 9 ++++++++- compiler/codegen/src/symboltable.rs | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 5aef2020d0..75bf4c74c9 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -399,6 +399,13 @@ impl Compiler<'_> { .map(|(var, _)| var.clone()) .collect(); + // Initialize varname_cache from SymbolTable::varnames + let varname_cache: IndexSet = table + .varnames + .iter() + .cloned() + .collect(); + // Qualname will be set later by set_qualname let qualname = None; @@ -420,7 +427,7 @@ impl Compiler<'_> { current_block: ir::BlockIdx(0), constants: IndexSet::default(), name_cache: IndexSet::default(), - varname_cache: IndexSet::default(), + varname_cache, cellvar_cache, freevar_cache, }; diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 0b13522ab8..52b6bae644 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -626,6 +626,8 @@ impl SymbolTableBuilder<'_> { .unwrap_or(false); let table = SymbolTable::new(name.to_owned(), typ, line_number, is_nested); self.tables.push(table); + // Clear current_varnames for the new scope + self.current_varnames.clear(); } /// Pop symbol table and add to sub table of parent table. From 2725d42acce41a7c45d9025a9e808f78a131ee2f Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 11 Jul 2025 19:51:50 +0900 Subject: [PATCH 3/5] fasthidden & static attributes --- compiler/codegen/src/compile.rs | 25 ++++++++++++++++--------- compiler/codegen/src/ir.rs | 11 ++++++++++- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 75bf4c74c9..e473004e7f 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -8,10 +8,10 @@ #![deny(clippy::cast_possible_truncation)] use crate::{ - IndexSet, ToPythonName, + IndexMap, IndexSet, ToPythonName, error::{CodegenError, CodegenErrorType, PatternUnreachableReason}, ir::{self, BlockIdx}, - symboltable::{self, SymbolFlags, SymbolScope, SymbolTable}, + symboltable::{self, SymbolFlags, SymbolScope, SymbolTable, SymbolTableType}, unparse::unparse_expr, }; use itertools::Itertools; @@ -318,6 +318,8 @@ impl<'src> Compiler<'src> { varname_cache: IndexSet::default(), cellvar_cache: IndexSet::default(), freevar_cache: IndexSet::default(), + fasthidden_cache: IndexMap::default(), + static_attributes: None, }; Compiler { code_stack: vec![module_code], @@ -382,6 +384,9 @@ impl Compiler<'_> { let source_path = self.source_code.path.to_owned(); let first_line_number = self.get_source_line_number(); + // Get the private name from current scope if exists + let private = self.code_stack.last().and_then(|info| info.private.clone()); + let table = self.push_symbol_table(); let cellvar_cache = table @@ -400,17 +405,13 @@ impl Compiler<'_> { .collect(); // Initialize varname_cache from SymbolTable::varnames - let varname_cache: IndexSet = table - .varnames - .iter() - .cloned() - .collect(); + let varname_cache: IndexSet = table.varnames.iter().cloned().collect(); // Qualname will be set later by set_qualname let qualname = None; - // Get the private name from current scope if exists - let private = self.code_stack.last().and_then(|info| info.private.clone()); + // Check if this is a class scope + let is_class_scope = table.typ == SymbolTableType::Class; let info = ir::CodeInfo { flags, @@ -430,6 +431,12 @@ impl Compiler<'_> { varname_cache, cellvar_cache, freevar_cache, + fasthidden_cache: IndexMap::default(), + static_attributes: if is_class_scope { + Some(IndexSet::default()) + } else { + None + }, }; self.code_stack.push(info); } diff --git a/compiler/codegen/src/ir.rs b/compiler/codegen/src/ir.rs index 852051777e..d9d017876f 100644 --- a/compiler/codegen/src/ir.rs +++ b/compiler/codegen/src/ir.rs @@ -1,7 +1,7 @@ use std::ops; -use crate::IndexSet; use crate::error::InternalError; +use crate::{IndexMap, IndexSet}; use ruff_source_file::{OneIndexed, SourceLocation}; use rustpython_compiler_core::bytecode::{ CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label, OpArg, @@ -83,6 +83,13 @@ pub struct CodeInfo { pub varname_cache: IndexSet, pub cellvar_cache: IndexSet, pub freevar_cache: IndexSet, + + // For tracking hidden fast locals (used in comprehensions) + // Maps variable name to bool (true if currently fast, false if was fast) + pub fast_hidden_cache: IndexMap, + + // For class scopes: attributes accessed via self.X + pub static_attributes: Option>, } impl CodeInfo { pub fn finalize_code(mut self, optimize: u8) -> crate::InternalResult { @@ -111,6 +118,8 @@ impl CodeInfo { varname_cache, cellvar_cache, freevar_cache, + fast_hidden_cache: _, + static_attributes: _, } = self; let mut instructions = Vec::new(); From 3503b768e0bd9cf77a78281c483087c9fa80b425 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 11 Jul 2025 20:22:33 +0900 Subject: [PATCH 4/5] metadata --- compiler/codegen/src/compile.rs | 106 +++++++++++++++++--------------- compiler/codegen/src/ir.rs | 88 +++++++++++++++----------- 2 files changed, 110 insertions(+), 84 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index e473004e7f..e1d76e65f0 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -303,23 +303,26 @@ impl<'src> Compiler<'src> { fn new(opts: CompileOpts, source_code: SourceCode<'src>, code_name: String) -> Self { let module_code = ir::CodeInfo { flags: bytecode::CodeFlags::NEW_LOCALS, - posonlyarg_count: 0, - arg_count: 0, - kwonlyarg_count: 0, source_path: source_code.path.to_owned(), - first_line_number: OneIndexed::MIN, - obj_name: code_name.clone(), - qualname: Some(code_name), private: None, blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), - constants: IndexSet::default(), - name_cache: IndexSet::default(), - varname_cache: IndexSet::default(), - cellvar_cache: IndexSet::default(), - freevar_cache: IndexSet::default(), - fasthidden_cache: IndexMap::default(), + metadata: ir::CodeUnitMetadata { + name: code_name.clone(), + qualname: Some(code_name), + consts: IndexSet::default(), + names: IndexSet::default(), + varnames: IndexSet::default(), + cellvars: IndexSet::default(), + freevars: IndexSet::default(), + fast_hidden: IndexMap::default(), + argcount: 0, + posonlyargcount: 0, + kwonlyargcount: 0, + firstlineno: OneIndexed::MIN, + }, static_attributes: None, + in_inlined_comp: false, }; Compiler { code_stack: vec![module_code], @@ -415,28 +418,30 @@ impl Compiler<'_> { let info = ir::CodeInfo { flags, - posonlyarg_count, - arg_count, - kwonlyarg_count, source_path, - first_line_number, - obj_name, - qualname, private, - blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), - constants: IndexSet::default(), - name_cache: IndexSet::default(), - varname_cache, - cellvar_cache, - freevar_cache, - fasthidden_cache: IndexMap::default(), + metadata: ir::CodeUnitMetadata { + name: obj_name, + qualname, + consts: IndexSet::default(), + names: IndexSet::default(), + varnames: varname_cache, + cellvars: cellvar_cache, + freevars: freevar_cache, + fast_hidden: IndexMap::default(), + argcount: arg_count, + posonlyargcount: posonlyarg_count, + kwonlyargcount: kwonlyarg_count, + firstlineno: first_line_number, + }, static_attributes: if is_class_scope { Some(IndexSet::default()) } else { None }, + in_inlined_comp: false, }; self.code_stack.push(info); } @@ -452,7 +457,7 @@ impl Compiler<'_> { // could take impl Into>, but everything is borrowed from ast structs; we never // actually have a `String` to pass fn name(&mut self, name: &str) -> bytecode::NameIdx { - self._name_inner(name, |i| &mut i.name_cache) + self._name_inner(name, |i| &mut i.metadata.names) } fn varname(&mut self, name: &str) -> CompileResult { if Compiler::is_forbidden_arg_name(name) { @@ -460,7 +465,7 @@ impl Compiler<'_> { "cannot assign to {name}", )))); } - Ok(self._name_inner(name, |i| &mut i.varname_cache)) + Ok(self._name_inner(name, |i| &mut i.metadata.varnames)) } fn _name_inner( &mut self, @@ -478,14 +483,14 @@ impl Compiler<'_> { /// Set the qualified name for the current code object, based on CPython's compiler_set_qualname fn set_qualname(&mut self) -> String { let qualname = self.make_qualname(); - self.current_code_info().qualname = Some(qualname.clone()); + self.current_code_info().metadata.qualname = Some(qualname.clone()); qualname } fn make_qualname(&mut self) -> String { let stack_size = self.code_stack.len(); assert!(stack_size >= 1); - let current_obj_name = self.current_code_info().obj_name.clone(); + let current_obj_name = self.current_code_info().metadata.name.clone(); // If we're at the module level (stack_size == 1), qualname is just the name if stack_size <= 1 { @@ -497,7 +502,7 @@ impl Compiler<'_> { let mut parent = &self.code_stack[parent_idx]; // If parent is a type parameter scope, look at grandparent - if parent.obj_name.starts_with(" { current_obj_name } else { // Check parent scope type - let parent_obj_name = &parent.obj_name; + let parent_obj_name = &parent.metadata.name; // Determine if parent is a function-like scope let is_function_parent = parent.flags.contains(bytecode::CodeFlags::IS_OPTIMIZED) @@ -550,12 +555,12 @@ impl Compiler<'_> { if is_function_parent { // For functions, append . to parent qualname // Use parent's qualname if available, otherwise use parent_obj_name - let parent_qualname = parent.qualname.as_ref().unwrap_or(parent_obj_name); + let parent_qualname = parent.metadata.qualname.as_ref().unwrap_or(parent_obj_name); format!("{parent_qualname}..{current_obj_name}") } else { // For classes and other scopes, use parent's qualname directly // Use parent's qualname if available, otherwise use parent_obj_name - let parent_qualname = parent.qualname.as_ref().unwrap_or(parent_obj_name); + let parent_qualname = parent.metadata.qualname.as_ref().unwrap_or(parent_obj_name); if parent_qualname == "" { // Module level, just use the name current_obj_name @@ -720,7 +725,7 @@ impl Compiler<'_> { .ok_or_else(|| InternalError::MissingSymbol(name.to_string())), ); let info = self.code_stack.last_mut().unwrap(); - let mut cache = &mut info.name_cache; + let mut cache = &mut info.metadata.names; enum NameOpType { Fast, Global, @@ -729,7 +734,7 @@ impl Compiler<'_> { } let op_typ = match symbol.scope { SymbolScope::Local if self.ctx.in_func() => { - cache = &mut info.varname_cache; + cache = &mut info.metadata.varnames; NameOpType::Fast } SymbolScope::GlobalExplicit => NameOpType::Global, @@ -739,16 +744,16 @@ impl Compiler<'_> { SymbolScope::GlobalImplicit | SymbolScope::Unknown => NameOpType::Local, SymbolScope::Local => NameOpType::Local, SymbolScope::Free => { - cache = &mut info.freevar_cache; + cache = &mut info.metadata.freevars; NameOpType::Deref } SymbolScope::Cell => { - cache = &mut info.cellvar_cache; + cache = &mut info.metadata.cellvars; NameOpType::Deref } // TODO: is this right? SymbolScope::TypeParams => { // Type parameters are always cell variables - cache = &mut info.cellvar_cache; + cache = &mut info.metadata.cellvars; NameOpType::Deref } // SymbolScope::Unknown => NameOpType::Global, }; @@ -764,7 +769,7 @@ impl Compiler<'_> { .get_index_of(name.as_ref()) .unwrap_or_else(|| cache.insert_full(name.into_owned()).0); if let SymbolScope::Free = symbol.scope { - idx += info.cellvar_cache.len(); + idx += info.metadata.cellvars.len(); } let op = match op_typ { NameOpType::Fast => match usage { @@ -1653,7 +1658,8 @@ impl Compiler<'_> { let (doc_str, body) = split_doc(body, &self.opts); self.current_code_info() - .constants + .metadata + .consts .insert_full(ConstantData::None); // Emit RESUME instruction at function start @@ -1774,10 +1780,12 @@ impl Compiler<'_> { ); let parent_code = self.code_stack.last().unwrap(); let vars = match symbol.scope { - SymbolScope::Free => &parent_code.freevar_cache, - SymbolScope::Cell => &parent_code.cellvar_cache, - SymbolScope::TypeParams => &parent_code.cellvar_cache, - _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => &parent_code.freevar_cache, + SymbolScope::Free => &parent_code.metadata.freevars, + SymbolScope::Cell => &parent_code.metadata.cellvars, + SymbolScope::TypeParams => &parent_code.metadata.cellvars, + _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => { + &parent_code.metadata.freevars + } x => unreachable!( "var {} in a {:?} should be free or cell but it's {:?}", var, table.typ, x @@ -1785,7 +1793,7 @@ impl Compiler<'_> { }; let mut idx = vars.get_index_of(var).unwrap(); if let SymbolScope::Free = symbol.scope { - idx += parent_code.cellvar_cache.len(); + idx += parent_code.metadata.cellvars.len(); } emit!(self, Instruction::LoadClosure(idx.to_u32())) } @@ -1910,7 +1918,8 @@ impl Compiler<'_> { .code_stack .last_mut() .unwrap() - .cellvar_cache + .metadata + .cellvars .iter() .position(|var| *var == "__class__"); @@ -3691,7 +3700,8 @@ impl Compiler<'_> { }; self.current_code_info() - .constants + .metadata + .consts .insert_full(ConstantData::None); self.compile_expression(body)?; @@ -4344,7 +4354,7 @@ impl Compiler<'_> { fn arg_constant(&mut self, constant: ConstantData) -> u32 { let info = self.current_code_info(); - info.constants.insert_full(constant).0.to_u32() + info.metadata.consts.insert_full(constant).0.to_u32() } fn emit_load_const(&mut self, constant: ConstantData) { diff --git a/compiler/codegen/src/ir.rs b/compiler/codegen/src/ir.rs index d9d017876f..905ad83553 100644 --- a/compiler/codegen/src/ir.rs +++ b/compiler/codegen/src/ir.rs @@ -6,6 +6,24 @@ use ruff_source_file::{OneIndexed, SourceLocation}; use rustpython_compiler_core::bytecode::{ CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label, OpArg, }; + +/// Metadata for a code unit +// = _PyCompile_CodeUnitMetadata +#[derive(Clone, Debug)] +pub struct CodeUnitMetadata { + pub name: String, // u_name (obj_name) + pub qualname: Option, // u_qualname + pub consts: IndexSet, // u_consts + pub names: IndexSet, // u_names + pub varnames: IndexSet, // u_varnames + pub cellvars: IndexSet, // u_cellvars + pub freevars: IndexSet, // u_freevars + pub fast_hidden: IndexMap, // u_fast_hidden + pub argcount: u32, // u_argcount + pub posonlyargcount: u32, // u_posonlyargcount + pub kwonlyargcount: u32, // u_kwonlyargcount + pub firstlineno: OneIndexed, // u_firstlineno +} // use rustpython_parser_core::source_code::{LineNumber, SourceLocation}; #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -67,29 +85,19 @@ impl Default for Block { pub struct CodeInfo { pub flags: CodeFlags, - pub posonlyarg_count: u32, // Number of positional-only arguments - pub arg_count: u32, - pub kwonlyarg_count: u32, pub source_path: String, - pub first_line_number: OneIndexed, - pub obj_name: String, // Name of the object that created this code object - pub qualname: Option, // Qualified name of the object pub private: Option, // For private name mangling, mostly for class pub blocks: Vec, pub current_block: BlockIdx, - pub constants: IndexSet, - pub name_cache: IndexSet, - pub varname_cache: IndexSet, - pub cellvar_cache: IndexSet, - pub freevar_cache: IndexSet, - // For tracking hidden fast locals (used in comprehensions) - // Maps variable name to bool (true if currently fast, false if was fast) - pub fast_hidden_cache: IndexMap, + pub metadata: CodeUnitMetadata, // For class scopes: attributes accessed via self.X pub static_attributes: Option>, + + // True if compiling an inlined comprehension + pub in_inlined_comp: bool, } impl CodeInfo { pub fn finalize_code(mut self, optimize: u8) -> crate::InternalResult { @@ -102,26 +110,31 @@ impl CodeInfo { let Self { flags, - posonlyarg_count, - arg_count, - kwonlyarg_count, source_path, - first_line_number, - obj_name, - qualname, private: _, // private is only used during compilation mut blocks, current_block: _, - constants, - name_cache, - varname_cache, - cellvar_cache, - freevar_cache, - fast_hidden_cache: _, + metadata, static_attributes: _, + in_inlined_comp: _, } = self; + let CodeUnitMetadata { + name: obj_name, + qualname, + consts: constants, + names: name_cache, + varnames: varname_cache, + cellvars: cellvar_cache, + freevars: freevar_cache, + fast_hidden: _, + argcount: arg_count, + posonlyargcount: posonlyarg_count, + kwonlyargcount: kwonlyarg_count, + firstlineno: first_line_number, + } = metadata; + let mut instructions = Vec::new(); let mut locations = Vec::new(); @@ -191,21 +204,23 @@ impl CodeInfo { } fn cell2arg(&self) -> Option> { - if self.cellvar_cache.is_empty() { + if self.metadata.cellvars.is_empty() { return None; } - let total_args = self.arg_count - + self.kwonlyarg_count + let total_args = self.metadata.argcount + + self.metadata.kwonlyargcount + self.flags.contains(CodeFlags::HAS_VARARGS) as u32 + self.flags.contains(CodeFlags::HAS_VARKEYWORDS) as u32; let mut found_cellarg = false; let cell2arg = self - .cellvar_cache + .metadata + .cellvars .iter() .map(|var| { - self.varname_cache + self.metadata + .varnames .get_index_of(var) // check that it's actually an arg .filter(|i| *i < total_args as usize) @@ -311,18 +326,19 @@ impl CodeInfo { impl InstrDisplayContext for CodeInfo { type Constant = ConstantData; fn get_constant(&self, i: usize) -> &ConstantData { - &self.constants[i] + &self.metadata.consts[i] } fn get_name(&self, i: usize) -> &str { - self.name_cache[i].as_ref() + self.metadata.names[i].as_ref() } fn get_varname(&self, i: usize) -> &str { - self.varname_cache[i].as_ref() + self.metadata.varnames[i].as_ref() } fn get_cell_name(&self, i: usize) -> &str { - self.cellvar_cache + self.metadata + .cellvars .get_index(i) - .unwrap_or_else(|| &self.freevar_cache[i - self.cellvar_cache.len()]) + .unwrap_or_else(|| &self.metadata.freevars[i - self.metadata.cellvars.len()]) .as_ref() } } From ab14160761b662c664b00f539ac3af8f9f852465 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 11 Jul 2025 22:41:52 +0900 Subject: [PATCH 5/5] fblock --- compiler/codegen/src/compile.rs | 153 ++++++++++++++++++++++++++++---- compiler/codegen/src/ir.rs | 4 + 2 files changed, 139 insertions(+), 18 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index e1d76e65f0..6b8007c151 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -14,6 +14,33 @@ use crate::{ symboltable::{self, SymbolFlags, SymbolScope, SymbolTable, SymbolTableType}, unparse::unparse_expr, }; + +const MAXBLOCKS: usize = 20; + +#[derive(Debug, Clone, Copy)] +pub enum FBlockType { + WhileLoop, + ForLoop, + TryExcept, + FinallyTry, + FinallyEnd, + With, + AsyncWith, + HandlerCleanup, + PopValue, + ExceptionHandler, + ExceptionGroupHandler, + AsyncComprehensionGenerator, + StopIteration, +} + +#[derive(Debug, Clone)] +pub struct FBlockInfo { + pub fb_type: FBlockType, + pub fb_block: BlockIdx, + pub fb_exit: BlockIdx, + // fb_datum is not needed in RustPython +} use itertools::Itertools; use malachite_bigint::BigInt; use num_complex::Complex; @@ -323,6 +350,7 @@ impl<'src> Compiler<'src> { }, static_attributes: None, in_inlined_comp: false, + fblock: Vec::with_capacity(MAXBLOCKS), }; Compiler { code_stack: vec![module_code], @@ -442,6 +470,7 @@ impl Compiler<'_> { None }, in_inlined_comp: false, + fblock: Vec::with_capacity(MAXBLOCKS), }; self.code_stack.push(info); } @@ -454,6 +483,37 @@ impl Compiler<'_> { unwrap_internal(self, stack_top.finalize_code(self.opts.optimize)) } + /// Push a new fblock + // = compiler_push_fblock + fn push_fblock( + &mut self, + fb_type: FBlockType, + fb_block: BlockIdx, + fb_exit: BlockIdx, + ) -> CompileResult<()> { + let code = self.current_code_info(); + if code.fblock.len() >= MAXBLOCKS { + return Err(self.error(CodegenErrorType::SyntaxError( + "too many statically nested blocks".to_owned(), + ))); + } + code.fblock.push(FBlockInfo { + fb_type, + fb_block, + fb_exit, + }); + Ok(()) + } + + /// Pop an fblock + // = compiler_pop_fblock + fn pop_fblock(&mut self, _expected_type: FBlockType) -> FBlockInfo { + let code = self.current_code_info(); + // TODO: Add assertion to check expected type matches + // assert!(matches!(fblock.fb_type, expected_type)); + code.fblock.pop().expect("fblock stack underflow") + } + // could take impl Into>, but everything is borrowed from ast structs; we never // actually have a `String` to pass fn name(&mut self, name: &str) -> bytecode::NameIdx { @@ -1086,26 +1146,62 @@ impl Compiler<'_> { self.switch_to_block(after_block); } } - Stmt::Break(_) => match self.ctx.loop_data { - Some((_, end)) => { - emit!(self, Instruction::Break { target: end }); - } - None => { - return Err( - self.error_ranged(CodegenErrorType::InvalidBreak, statement.range()) - ); - } - }, - Stmt::Continue(_) => match self.ctx.loop_data { - Some((start, _)) => { - emit!(self, Instruction::Continue { target: start }); + Stmt::Break(_) => { + // Find the innermost loop in fblock stack + let found_loop = { + let code = self.current_code_info(); + let mut result = None; + for i in (0..code.fblock.len()).rev() { + match code.fblock[i].fb_type { + FBlockType::WhileLoop | FBlockType::ForLoop => { + result = Some(code.fblock[i].fb_exit); + break; + } + _ => continue, + } + } + result + }; + + match found_loop { + Some(exit_block) => { + emit!(self, Instruction::Break { target: exit_block }); + } + None => { + return Err( + self.error_ranged(CodegenErrorType::InvalidBreak, statement.range()) + ); + } } - None => { - return Err( - self.error_ranged(CodegenErrorType::InvalidContinue, statement.range()) - ); + } + Stmt::Continue(_) => { + // Find the innermost loop in fblock stack + let found_loop = { + let code = self.current_code_info(); + let mut result = None; + for i in (0..code.fblock.len()).rev() { + match code.fblock[i].fb_type { + FBlockType::WhileLoop | FBlockType::ForLoop => { + result = Some(code.fblock[i].fb_block); + break; + } + _ => continue, + } + } + result + }; + + match found_loop { + Some(loop_block) => { + emit!(self, Instruction::Continue { target: loop_block }); + } + None => { + return Err( + self.error_ranged(CodegenErrorType::InvalidContinue, statement.range()) + ); + } } - }, + } Stmt::Return(StmtReturn { value, .. }) => { if !self.ctx.in_func() { return Err( @@ -2028,6 +2124,9 @@ impl Compiler<'_> { emit!(self, Instruction::SetupLoop); self.switch_to_block(while_block); + // Push fblock for while loop + self.push_fblock(FBlockType::WhileLoop, while_block, after_block)?; + self.compile_jump_if(test, false, else_block)?; let was_in_loop = self.ctx.loop_data.replace((while_block, after_block)); @@ -2040,6 +2139,9 @@ impl Compiler<'_> { } ); self.switch_to_block(else_block); + + // Pop fblock + self.pop_fblock(FBlockType::WhileLoop); emit!(self, Instruction::PopBlock); self.compile_statements(orelse)?; self.switch_to_block(after_block); @@ -2150,6 +2252,10 @@ impl Compiler<'_> { emit!(self, Instruction::GetAIter); self.switch_to_block(for_block); + + // Push fblock for async for loop + self.push_fblock(FBlockType::ForLoop, for_block, after_block)?; + emit!( self, Instruction::SetupExcept { @@ -2172,6 +2278,10 @@ impl Compiler<'_> { emit!(self, Instruction::GetIter); self.switch_to_block(for_block); + + // Push fblock for for loop + self.push_fblock(FBlockType::ForLoop, for_block, after_block)?; + emit!(self, Instruction::ForIter { target: else_block }); // Start of loop iteration, set targets: @@ -2184,6 +2294,10 @@ impl Compiler<'_> { emit!(self, Instruction::Jump { target: for_block }); self.switch_to_block(else_block); + + // Pop fblock + self.pop_fblock(FBlockType::ForLoop); + if is_async { emit!(self, Instruction::EndAsyncFor); } @@ -4162,6 +4276,9 @@ impl Compiler<'_> { // Create magnificent function : self.push_output(flags, 1, 1, 0, name.to_owned()); + // Mark that we're in an inlined comprehension + self.current_code_info().in_inlined_comp = true; + // Set qualname for comprehension self.set_qualname(); diff --git a/compiler/codegen/src/ir.rs b/compiler/codegen/src/ir.rs index 905ad83553..f2299892b3 100644 --- a/compiler/codegen/src/ir.rs +++ b/compiler/codegen/src/ir.rs @@ -98,6 +98,9 @@ pub struct CodeInfo { // True if compiling an inlined comprehension pub in_inlined_comp: bool, + + // Block stack for tracking nested control structures + pub fblock: Vec, } impl CodeInfo { pub fn finalize_code(mut self, optimize: u8) -> crate::InternalResult { @@ -118,6 +121,7 @@ impl CodeInfo { metadata, static_attributes: _, in_inlined_comp: _, + fblock: _, } = self; let CodeUnitMetadata {