diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 5aef2020d0..6b8007c151 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -8,12 +8,39 @@ #![deny(clippy::cast_possible_truncation)] use crate::{ - IndexSet, ToPythonName, + IndexMap, IndexSet, ToPythonName, error::{CodegenError, CodegenErrorType, PatternUnreachableReason}, ir::{self, BlockIdx}, - symboltable::{self, SymbolFlags, SymbolScope, SymbolTable}, + symboltable::{self, SymbolFlags, SymbolScope, SymbolTable, SymbolTableType}, unparse::unparse_expr, }; + +const MAXBLOCKS: usize = 20; + +#[derive(Debug, Clone, Copy)] +pub enum FBlockType { + WhileLoop, + ForLoop, + TryExcept, + FinallyTry, + FinallyEnd, + With, + AsyncWith, + HandlerCleanup, + PopValue, + ExceptionHandler, + ExceptionGroupHandler, + AsyncComprehensionGenerator, + StopIteration, +} + +#[derive(Debug, Clone)] +pub struct FBlockInfo { + pub fb_type: FBlockType, + pub fb_block: BlockIdx, + pub fb_exit: BlockIdx, + // fb_datum is not needed in RustPython +} use itertools::Itertools; use malachite_bigint::BigInt; use num_complex::Complex; @@ -303,21 +330,27 @@ impl<'src> Compiler<'src> { fn new(opts: CompileOpts, source_code: SourceCode<'src>, code_name: String) -> Self { let module_code = ir::CodeInfo { flags: bytecode::CodeFlags::NEW_LOCALS, - posonlyarg_count: 0, - arg_count: 0, - kwonlyarg_count: 0, source_path: source_code.path.to_owned(), - first_line_number: OneIndexed::MIN, - obj_name: code_name.clone(), - qualname: Some(code_name), private: None, blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), - constants: IndexSet::default(), - name_cache: IndexSet::default(), - varname_cache: IndexSet::default(), - cellvar_cache: IndexSet::default(), - freevar_cache: IndexSet::default(), + metadata: ir::CodeUnitMetadata { + name: code_name.clone(), + qualname: Some(code_name), + consts: IndexSet::default(), + names: IndexSet::default(), + varnames: IndexSet::default(), + cellvars: IndexSet::default(), + freevars: IndexSet::default(), + fast_hidden: IndexMap::default(), + argcount: 0, + posonlyargcount: 0, + kwonlyargcount: 0, + firstlineno: OneIndexed::MIN, + }, + static_attributes: None, + in_inlined_comp: false, + fblock: Vec::with_capacity(MAXBLOCKS), }; Compiler { code_stack: vec![module_code], @@ -382,6 +415,9 @@ impl Compiler<'_> { let source_path = self.source_code.path.to_owned(); let first_line_number = self.get_source_line_number(); + // Get the private name from current scope if exists + let private = self.code_stack.last().and_then(|info| info.private.clone()); + let table = self.push_symbol_table(); let cellvar_cache = table @@ -399,30 +435,42 @@ impl Compiler<'_> { .map(|(var, _)| var.clone()) .collect(); + // Initialize varname_cache from SymbolTable::varnames + let varname_cache: IndexSet = table.varnames.iter().cloned().collect(); + // Qualname will be set later by set_qualname let qualname = None; - // Get the private name from current scope if exists - let private = self.code_stack.last().and_then(|info| info.private.clone()); + // Check if this is a class scope + let is_class_scope = table.typ == SymbolTableType::Class; let info = ir::CodeInfo { flags, - posonlyarg_count, - arg_count, - kwonlyarg_count, source_path, - first_line_number, - obj_name, - qualname, private, - blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), - constants: IndexSet::default(), - name_cache: IndexSet::default(), - varname_cache: IndexSet::default(), - cellvar_cache, - freevar_cache, + metadata: ir::CodeUnitMetadata { + name: obj_name, + qualname, + consts: IndexSet::default(), + names: IndexSet::default(), + varnames: varname_cache, + cellvars: cellvar_cache, + freevars: freevar_cache, + fast_hidden: IndexMap::default(), + argcount: arg_count, + posonlyargcount: posonlyarg_count, + kwonlyargcount: kwonlyarg_count, + firstlineno: first_line_number, + }, + static_attributes: if is_class_scope { + Some(IndexSet::default()) + } else { + None + }, + in_inlined_comp: false, + fblock: Vec::with_capacity(MAXBLOCKS), }; self.code_stack.push(info); } @@ -435,10 +483,41 @@ impl Compiler<'_> { unwrap_internal(self, stack_top.finalize_code(self.opts.optimize)) } + /// Push a new fblock + // = compiler_push_fblock + fn push_fblock( + &mut self, + fb_type: FBlockType, + fb_block: BlockIdx, + fb_exit: BlockIdx, + ) -> CompileResult<()> { + let code = self.current_code_info(); + if code.fblock.len() >= MAXBLOCKS { + return Err(self.error(CodegenErrorType::SyntaxError( + "too many statically nested blocks".to_owned(), + ))); + } + code.fblock.push(FBlockInfo { + fb_type, + fb_block, + fb_exit, + }); + Ok(()) + } + + /// Pop an fblock + // = compiler_pop_fblock + fn pop_fblock(&mut self, _expected_type: FBlockType) -> FBlockInfo { + let code = self.current_code_info(); + // TODO: Add assertion to check expected type matches + // assert!(matches!(fblock.fb_type, expected_type)); + code.fblock.pop().expect("fblock stack underflow") + } + // could take impl Into>, but everything is borrowed from ast structs; we never // actually have a `String` to pass fn name(&mut self, name: &str) -> bytecode::NameIdx { - self._name_inner(name, |i| &mut i.name_cache) + self._name_inner(name, |i| &mut i.metadata.names) } fn varname(&mut self, name: &str) -> CompileResult { if Compiler::is_forbidden_arg_name(name) { @@ -446,7 +525,7 @@ impl Compiler<'_> { "cannot assign to {name}", )))); } - Ok(self._name_inner(name, |i| &mut i.varname_cache)) + Ok(self._name_inner(name, |i| &mut i.metadata.varnames)) } fn _name_inner( &mut self, @@ -464,14 +543,14 @@ impl Compiler<'_> { /// Set the qualified name for the current code object, based on CPython's compiler_set_qualname fn set_qualname(&mut self) -> String { let qualname = self.make_qualname(); - self.current_code_info().qualname = Some(qualname.clone()); + self.current_code_info().metadata.qualname = Some(qualname.clone()); qualname } fn make_qualname(&mut self) -> String { let stack_size = self.code_stack.len(); assert!(stack_size >= 1); - let current_obj_name = self.current_code_info().obj_name.clone(); + let current_obj_name = self.current_code_info().metadata.name.clone(); // If we're at the module level (stack_size == 1), qualname is just the name if stack_size <= 1 { @@ -483,7 +562,7 @@ impl Compiler<'_> { let mut parent = &self.code_stack[parent_idx]; // If parent is a type parameter scope, look at grandparent - if parent.obj_name.starts_with(" { current_obj_name } else { // Check parent scope type - let parent_obj_name = &parent.obj_name; + let parent_obj_name = &parent.metadata.name; // Determine if parent is a function-like scope let is_function_parent = parent.flags.contains(bytecode::CodeFlags::IS_OPTIMIZED) @@ -536,12 +615,12 @@ impl Compiler<'_> { if is_function_parent { // For functions, append . to parent qualname // Use parent's qualname if available, otherwise use parent_obj_name - let parent_qualname = parent.qualname.as_ref().unwrap_or(parent_obj_name); + let parent_qualname = parent.metadata.qualname.as_ref().unwrap_or(parent_obj_name); format!("{parent_qualname}..{current_obj_name}") } else { // For classes and other scopes, use parent's qualname directly // Use parent's qualname if available, otherwise use parent_obj_name - let parent_qualname = parent.qualname.as_ref().unwrap_or(parent_obj_name); + let parent_qualname = parent.metadata.qualname.as_ref().unwrap_or(parent_obj_name); if parent_qualname == "" { // Module level, just use the name current_obj_name @@ -706,7 +785,7 @@ impl Compiler<'_> { .ok_or_else(|| InternalError::MissingSymbol(name.to_string())), ); let info = self.code_stack.last_mut().unwrap(); - let mut cache = &mut info.name_cache; + let mut cache = &mut info.metadata.names; enum NameOpType { Fast, Global, @@ -715,7 +794,7 @@ impl Compiler<'_> { } let op_typ = match symbol.scope { SymbolScope::Local if self.ctx.in_func() => { - cache = &mut info.varname_cache; + cache = &mut info.metadata.varnames; NameOpType::Fast } SymbolScope::GlobalExplicit => NameOpType::Global, @@ -725,16 +804,16 @@ impl Compiler<'_> { SymbolScope::GlobalImplicit | SymbolScope::Unknown => NameOpType::Local, SymbolScope::Local => NameOpType::Local, SymbolScope::Free => { - cache = &mut info.freevar_cache; + cache = &mut info.metadata.freevars; NameOpType::Deref } SymbolScope::Cell => { - cache = &mut info.cellvar_cache; + cache = &mut info.metadata.cellvars; NameOpType::Deref } // TODO: is this right? SymbolScope::TypeParams => { // Type parameters are always cell variables - cache = &mut info.cellvar_cache; + cache = &mut info.metadata.cellvars; NameOpType::Deref } // SymbolScope::Unknown => NameOpType::Global, }; @@ -750,7 +829,7 @@ impl Compiler<'_> { .get_index_of(name.as_ref()) .unwrap_or_else(|| cache.insert_full(name.into_owned()).0); if let SymbolScope::Free = symbol.scope { - idx += info.cellvar_cache.len(); + idx += info.metadata.cellvars.len(); } let op = match op_typ { NameOpType::Fast => match usage { @@ -1067,26 +1146,62 @@ impl Compiler<'_> { self.switch_to_block(after_block); } } - Stmt::Break(_) => match self.ctx.loop_data { - Some((_, end)) => { - emit!(self, Instruction::Break { target: end }); - } - None => { - return Err( - self.error_ranged(CodegenErrorType::InvalidBreak, statement.range()) - ); - } - }, - Stmt::Continue(_) => match self.ctx.loop_data { - Some((start, _)) => { - emit!(self, Instruction::Continue { target: start }); + Stmt::Break(_) => { + // Find the innermost loop in fblock stack + let found_loop = { + let code = self.current_code_info(); + let mut result = None; + for i in (0..code.fblock.len()).rev() { + match code.fblock[i].fb_type { + FBlockType::WhileLoop | FBlockType::ForLoop => { + result = Some(code.fblock[i].fb_exit); + break; + } + _ => continue, + } + } + result + }; + + match found_loop { + Some(exit_block) => { + emit!(self, Instruction::Break { target: exit_block }); + } + None => { + return Err( + self.error_ranged(CodegenErrorType::InvalidBreak, statement.range()) + ); + } } - None => { - return Err( - self.error_ranged(CodegenErrorType::InvalidContinue, statement.range()) - ); + } + Stmt::Continue(_) => { + // Find the innermost loop in fblock stack + let found_loop = { + let code = self.current_code_info(); + let mut result = None; + for i in (0..code.fblock.len()).rev() { + match code.fblock[i].fb_type { + FBlockType::WhileLoop | FBlockType::ForLoop => { + result = Some(code.fblock[i].fb_block); + break; + } + _ => continue, + } + } + result + }; + + match found_loop { + Some(loop_block) => { + emit!(self, Instruction::Continue { target: loop_block }); + } + None => { + return Err( + self.error_ranged(CodegenErrorType::InvalidContinue, statement.range()) + ); + } } - }, + } Stmt::Return(StmtReturn { value, .. }) => { if !self.ctx.in_func() { return Err( @@ -1639,7 +1754,8 @@ impl Compiler<'_> { let (doc_str, body) = split_doc(body, &self.opts); self.current_code_info() - .constants + .metadata + .consts .insert_full(ConstantData::None); // Emit RESUME instruction at function start @@ -1760,10 +1876,12 @@ impl Compiler<'_> { ); let parent_code = self.code_stack.last().unwrap(); let vars = match symbol.scope { - SymbolScope::Free => &parent_code.freevar_cache, - SymbolScope::Cell => &parent_code.cellvar_cache, - SymbolScope::TypeParams => &parent_code.cellvar_cache, - _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => &parent_code.freevar_cache, + SymbolScope::Free => &parent_code.metadata.freevars, + SymbolScope::Cell => &parent_code.metadata.cellvars, + SymbolScope::TypeParams => &parent_code.metadata.cellvars, + _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => { + &parent_code.metadata.freevars + } x => unreachable!( "var {} in a {:?} should be free or cell but it's {:?}", var, table.typ, x @@ -1771,7 +1889,7 @@ impl Compiler<'_> { }; let mut idx = vars.get_index_of(var).unwrap(); if let SymbolScope::Free = symbol.scope { - idx += parent_code.cellvar_cache.len(); + idx += parent_code.metadata.cellvars.len(); } emit!(self, Instruction::LoadClosure(idx.to_u32())) } @@ -1896,7 +2014,8 @@ impl Compiler<'_> { .code_stack .last_mut() .unwrap() - .cellvar_cache + .metadata + .cellvars .iter() .position(|var| *var == "__class__"); @@ -2005,6 +2124,9 @@ impl Compiler<'_> { emit!(self, Instruction::SetupLoop); self.switch_to_block(while_block); + // Push fblock for while loop + self.push_fblock(FBlockType::WhileLoop, while_block, after_block)?; + self.compile_jump_if(test, false, else_block)?; let was_in_loop = self.ctx.loop_data.replace((while_block, after_block)); @@ -2017,6 +2139,9 @@ impl Compiler<'_> { } ); self.switch_to_block(else_block); + + // Pop fblock + self.pop_fblock(FBlockType::WhileLoop); emit!(self, Instruction::PopBlock); self.compile_statements(orelse)?; self.switch_to_block(after_block); @@ -2127,6 +2252,10 @@ impl Compiler<'_> { emit!(self, Instruction::GetAIter); self.switch_to_block(for_block); + + // Push fblock for async for loop + self.push_fblock(FBlockType::ForLoop, for_block, after_block)?; + emit!( self, Instruction::SetupExcept { @@ -2149,6 +2278,10 @@ impl Compiler<'_> { emit!(self, Instruction::GetIter); self.switch_to_block(for_block); + + // Push fblock for for loop + self.push_fblock(FBlockType::ForLoop, for_block, after_block)?; + emit!(self, Instruction::ForIter { target: else_block }); // Start of loop iteration, set targets: @@ -2161,6 +2294,10 @@ impl Compiler<'_> { emit!(self, Instruction::Jump { target: for_block }); self.switch_to_block(else_block); + + // Pop fblock + self.pop_fblock(FBlockType::ForLoop); + if is_async { emit!(self, Instruction::EndAsyncFor); } @@ -3677,7 +3814,8 @@ impl Compiler<'_> { }; self.current_code_info() - .constants + .metadata + .consts .insert_full(ConstantData::None); self.compile_expression(body)?; @@ -4138,6 +4276,9 @@ impl Compiler<'_> { // Create magnificent function : self.push_output(flags, 1, 1, 0, name.to_owned()); + // Mark that we're in an inlined comprehension + self.current_code_info().in_inlined_comp = true; + // Set qualname for comprehension self.set_qualname(); @@ -4330,7 +4471,7 @@ impl Compiler<'_> { fn arg_constant(&mut self, constant: ConstantData) -> u32 { let info = self.current_code_info(); - info.constants.insert_full(constant).0.to_u32() + info.metadata.consts.insert_full(constant).0.to_u32() } fn emit_load_const(&mut self, constant: ConstantData) { diff --git a/compiler/codegen/src/ir.rs b/compiler/codegen/src/ir.rs index 852051777e..f2299892b3 100644 --- a/compiler/codegen/src/ir.rs +++ b/compiler/codegen/src/ir.rs @@ -1,11 +1,29 @@ use std::ops; -use crate::IndexSet; use crate::error::InternalError; +use crate::{IndexMap, IndexSet}; use ruff_source_file::{OneIndexed, SourceLocation}; use rustpython_compiler_core::bytecode::{ CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label, OpArg, }; + +/// Metadata for a code unit +// = _PyCompile_CodeUnitMetadata +#[derive(Clone, Debug)] +pub struct CodeUnitMetadata { + pub name: String, // u_name (obj_name) + pub qualname: Option, // u_qualname + pub consts: IndexSet, // u_consts + pub names: IndexSet, // u_names + pub varnames: IndexSet, // u_varnames + pub cellvars: IndexSet, // u_cellvars + pub freevars: IndexSet, // u_freevars + pub fast_hidden: IndexMap, // u_fast_hidden + pub argcount: u32, // u_argcount + pub posonlyargcount: u32, // u_posonlyargcount + pub kwonlyargcount: u32, // u_kwonlyargcount + pub firstlineno: OneIndexed, // u_firstlineno +} // use rustpython_parser_core::source_code::{LineNumber, SourceLocation}; #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -67,22 +85,22 @@ impl Default for Block { pub struct CodeInfo { pub flags: CodeFlags, - pub posonlyarg_count: u32, // Number of positional-only arguments - pub arg_count: u32, - pub kwonlyarg_count: u32, pub source_path: String, - pub first_line_number: OneIndexed, - pub obj_name: String, // Name of the object that created this code object - pub qualname: Option, // Qualified name of the object pub private: Option, // For private name mangling, mostly for class pub blocks: Vec, pub current_block: BlockIdx, - pub constants: IndexSet, - pub name_cache: IndexSet, - pub varname_cache: IndexSet, - pub cellvar_cache: IndexSet, - pub freevar_cache: IndexSet, + + pub metadata: CodeUnitMetadata, + + // For class scopes: attributes accessed via self.X + pub static_attributes: Option>, + + // True if compiling an inlined comprehension + pub in_inlined_comp: bool, + + // Block stack for tracking nested control structures + pub fblock: Vec, } impl CodeInfo { pub fn finalize_code(mut self, optimize: u8) -> crate::InternalResult { @@ -95,24 +113,32 @@ impl CodeInfo { let Self { flags, - posonlyarg_count, - arg_count, - kwonlyarg_count, source_path, - first_line_number, - obj_name, - qualname, private: _, // private is only used during compilation mut blocks, current_block: _, - constants, - name_cache, - varname_cache, - cellvar_cache, - freevar_cache, + metadata, + static_attributes: _, + in_inlined_comp: _, + fblock: _, } = self; + let CodeUnitMetadata { + name: obj_name, + qualname, + consts: constants, + names: name_cache, + varnames: varname_cache, + cellvars: cellvar_cache, + freevars: freevar_cache, + fast_hidden: _, + argcount: arg_count, + posonlyargcount: posonlyarg_count, + kwonlyargcount: kwonlyarg_count, + firstlineno: first_line_number, + } = metadata; + let mut instructions = Vec::new(); let mut locations = Vec::new(); @@ -182,21 +208,23 @@ impl CodeInfo { } fn cell2arg(&self) -> Option> { - if self.cellvar_cache.is_empty() { + if self.metadata.cellvars.is_empty() { return None; } - let total_args = self.arg_count - + self.kwonlyarg_count + let total_args = self.metadata.argcount + + self.metadata.kwonlyargcount + self.flags.contains(CodeFlags::HAS_VARARGS) as u32 + self.flags.contains(CodeFlags::HAS_VARKEYWORDS) as u32; let mut found_cellarg = false; let cell2arg = self - .cellvar_cache + .metadata + .cellvars .iter() .map(|var| { - self.varname_cache + self.metadata + .varnames .get_index_of(var) // check that it's actually an arg .filter(|i| *i < total_args as usize) @@ -302,18 +330,19 @@ impl CodeInfo { impl InstrDisplayContext for CodeInfo { type Constant = ConstantData; fn get_constant(&self, i: usize) -> &ConstantData { - &self.constants[i] + &self.metadata.consts[i] } fn get_name(&self, i: usize) -> &str { - self.name_cache[i].as_ref() + self.metadata.names[i].as_ref() } fn get_varname(&self, i: usize) -> &str { - self.varname_cache[i].as_ref() + self.metadata.varnames[i].as_ref() } fn get_cell_name(&self, i: usize) -> &str { - self.cellvar_cache + self.metadata + .cellvars .get_index(i) - .unwrap_or_else(|| &self.freevar_cache[i - self.cellvar_cache.len()]) + .unwrap_or_else(|| &self.metadata.freevars[i - self.metadata.cellvars.len()]) .as_ref() } } diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 66dbff326a..52b6bae644 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -45,6 +45,9 @@ pub struct SymbolTable { /// A list of sub-scopes in the order as found in the /// AST nodes. pub sub_tables: Vec, + + /// Variable names in definition order (parameters first, then locals) + pub varnames: Vec, } impl SymbolTable { @@ -56,6 +59,7 @@ impl SymbolTable { is_nested, symbols: IndexMap::default(), sub_tables: vec![], + varnames: Vec::new(), } } @@ -573,6 +577,8 @@ struct SymbolTableBuilder<'src> { tables: Vec, future_annotations: bool, source_code: SourceCode<'src>, + // Current scope's varnames being collected (temporary storage) + current_varnames: Vec, } /// Enum to indicate in what mode an expression @@ -595,6 +601,7 @@ impl<'src> SymbolTableBuilder<'src> { tables: vec![], future_annotations: false, source_code, + current_varnames: Vec::new(), }; this.enter_scope("top", SymbolTableType::Module, 0); this @@ -605,6 +612,8 @@ impl SymbolTableBuilder<'_> { fn finish(mut self) -> Result { assert_eq!(self.tables.len(), 1); let mut symbol_table = self.tables.pop().unwrap(); + // Save varnames for the top-level module scope + symbol_table.varnames = self.current_varnames; analyze_symbol_table(&mut symbol_table)?; Ok(symbol_table) } @@ -617,11 +626,15 @@ impl SymbolTableBuilder<'_> { .unwrap_or(false); let table = SymbolTable::new(name.to_owned(), typ, line_number, is_nested); self.tables.push(table); + // Clear current_varnames for the new scope + self.current_varnames.clear(); } /// Pop symbol table and add to sub table of parent table. fn leave_scope(&mut self) { - let table = self.tables.pop().unwrap(); + let mut table = self.tables.pop().unwrap(); + // Save the collected varnames to the symbol table + table.varnames = std::mem::take(&mut self.current_varnames); self.tables.last_mut().unwrap().sub_tables.push(table); } @@ -1533,18 +1546,43 @@ impl SymbolTableBuilder<'_> { } SymbolUsage::Parameter => { flags.insert(SymbolFlags::PARAMETER); + // Parameters are always added to varnames first + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } } SymbolUsage::AnnotationParameter => { flags.insert(SymbolFlags::PARAMETER | SymbolFlags::ANNOTATED); + // Annotated parameters are also added to varnames + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } } SymbolUsage::AnnotationAssigned => { flags.insert(SymbolFlags::ASSIGNED | SymbolFlags::ANNOTATED); } SymbolUsage::Assigned => { flags.insert(SymbolFlags::ASSIGNED); + // Local variables (assigned) are added to varnames if they are local scope + // and not already in varnames + if symbol.scope == SymbolScope::Local { + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } + } } SymbolUsage::AssignedNamedExprInComprehension => { flags.insert(SymbolFlags::ASSIGNED | SymbolFlags::ASSIGNED_IN_COMPREHENSION); + // Named expressions in comprehensions might also be locals + if symbol.scope == SymbolScope::Local { + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } + } } SymbolUsage::Global => { symbol.scope = SymbolScope::GlobalExplicit;