From 4a4fb2fdeaa408d75178cc54865cdb95542cdc8a Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Mon, 15 Sep 2025 17:00:24 +0900 Subject: [PATCH 1/2] Code.replace --- Lib/test/test_code.py | 14 ----- vm/src/builtins/code.rs | 111 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 6b0dc09e28..393336a415 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -347,8 +347,6 @@ def func(arg): newcode = code.replace(co_name="func") # Should not raise SystemError self.assertEqual(code, newcode) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_empty_linetable(self): def func(): pass @@ -433,7 +431,6 @@ def test_co_positions_artificial_instructions(self): ] ) - # TODO: RUSTPYTHON @unittest.expectedFailure def test_endline_and_columntable_none_when_no_debug_ranges(self): # Make sure that if `-X no_debug_ranges` is used, there is @@ -450,7 +447,6 @@ def f(): """) assert_python_ok('-X', 'no_debug_ranges', '-c', code) - # TODO: RUSTPYTHON @unittest.expectedFailure def test_endline_and_columntable_none_when_no_debug_ranges_env(self): # Same as above but using the environment variable opt out. @@ -468,8 +464,6 @@ def f(): # co_positions behavior when info is missing. - # TODO: RUSTPYTHON - @unittest.expectedFailure # @requires_debug_ranges() def test_co_positions_empty_linetable(self): def func(): @@ -480,8 +474,6 @@ def func(): self.assertIsNone(line) self.assertEqual(end_line, new_code.co_firstlineno + 1) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_code_equality(self): def f(): try: @@ -522,8 +514,6 @@ def test_code_hash_uses_order(self): self.assertNotEqual(c, swapped) self.assertNotEqual(hash(c), hash(swapped)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_code_hash_uses_bytecode(self): c = (lambda x, y: x + y).__code__ d = (lambda x, y: x * y).__code__ @@ -735,8 +725,6 @@ def check_positions(self, func): self.assertEqual(l1, l2) self.assertEqual(len(pos1), len(pos2)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_positions(self): self.check_positions(parse_location_table) self.check_positions(misshappen) @@ -751,8 +739,6 @@ def check_lines(self, func): self.assertEqual(l1, l2) self.assertEqual(len(lines1), len(lines2)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_lines(self): self.check_lines(parse_location_table) self.check_lines(misshappen) diff --git a/vm/src/builtins/code.rs b/vm/src/builtins/code.rs index 1ce0f3b3e0..f418775808 100644 --- a/vm/src/builtins/code.rs +++ b/vm/src/builtins/code.rs @@ -40,6 +40,22 @@ pub struct ReplaceArgs { co_flags: OptionalArg, #[pyarg(named, optional)] co_varnames: OptionalArg>, + #[pyarg(named, optional)] + co_nlocals: OptionalArg, + #[pyarg(named, optional)] + co_stacksize: OptionalArg, + #[pyarg(named, optional)] + co_code: OptionalArg, + #[pyarg(named, optional)] + co_linetable: OptionalArg, + #[pyarg(named, optional)] + co_exceptiontable: OptionalArg, + #[pyarg(named, optional)] + co_freevars: OptionalArg>, + #[pyarg(named, optional)] + co_cellvars: OptionalArg>, + #[pyarg(named, optional)] + co_qualname: OptionalArg, } #[derive(Clone)] @@ -350,6 +366,34 @@ impl PyCode { vm.ctx.new_tuple(names) } + #[pygetset] + pub fn co_linetable(&self, vm: &VirtualMachine) -> crate::builtins::PyBytesRef { + // Return empty bytes for now - this should be the new line table format + vm.ctx.new_bytes(vec![]) + } + + #[pygetset] + pub fn co_exceptiontable(&self, vm: &VirtualMachine) -> crate::builtins::PyBytesRef { + // Return empty bytes for now - this should be exception table + vm.ctx.new_bytes(vec![]) + } + + #[pymethod] + pub fn co_lines(&self, vm: &VirtualMachine) -> PyResult { + // Return an iterator over (start_offset, end_offset, lineno) tuples + // For now, return an empty iterator + let empty_list = vm.ctx.new_list(vec![]); + vm.call_method(empty_list.as_object(), "__iter__", ()) + } + + #[pymethod] + pub fn co_positions(&self, vm: &VirtualMachine) -> PyResult { + // Return an iterator over (line, end_line, column, end_column) tuples + // For now, return an iterator that yields None tuples + let empty_list = vm.ctx.new_list(vec![]); + vm.call_method(empty_list.as_object(), "__iter__", ()) + } + #[pymethod] pub fn replace(&self, args: ReplaceArgs, vm: &VirtualMachine) -> PyResult { let posonlyarg_count = match args.co_posonlyargcount { @@ -408,6 +452,63 @@ impl PyCode { OptionalArg::Missing => self.code.varnames.iter().map(|s| s.to_object()).collect(), }; + let qualname = match args.co_qualname { + OptionalArg::Present(qualname) => qualname, + OptionalArg::Missing => self.code.qualname.to_owned(), + }; + + let max_stackdepth = match args.co_stacksize { + OptionalArg::Present(stacksize) => stacksize, + OptionalArg::Missing => self.code.max_stackdepth, + }; + + let instructions = match args.co_code { + OptionalArg::Present(_code_bytes) => { + // Convert bytes back to instructions + // For now, keep the original instructions + // TODO: Properly parse bytecode from bytes + self.code.instructions.clone() + } + OptionalArg::Missing => self.code.instructions.clone(), + }; + + let cellvars = match args.co_cellvars { + OptionalArg::Present(cellvars) => cellvars + .into_iter() + .map(|o| o.as_interned_str(vm).unwrap()) + .collect(), + OptionalArg::Missing => self.code.cellvars.clone(), + }; + + let freevars = match args.co_freevars { + OptionalArg::Present(freevars) => freevars + .into_iter() + .map(|o| o.as_interned_str(vm).unwrap()) + .collect(), + OptionalArg::Missing => self.code.freevars.clone(), + }; + + // Validate co_nlocals if provided + if let OptionalArg::Present(nlocals) = args.co_nlocals { + if nlocals as usize != varnames.len() { + return Err(vm.new_value_error(format!( + "co_nlocals ({}) != len(co_varnames) ({})", + nlocals, + varnames.len() + ))); + } + } + + // Note: co_linetable and co_exceptiontable are not stored in CodeObject yet + // They would need to be added to the CodeObject structure + // For now, just validate they are bytes if provided + if let OptionalArg::Present(_linetable) = args.co_linetable { + // Would store linetable if CodeObject supported it + } + if let OptionalArg::Present(_exceptiontable) = args.co_exceptiontable { + // Would store exceptiontable if CodeObject supported it + } + Ok(Self { code: CodeObject { flags: CodeFlags::from_bits_truncate(flags), @@ -417,10 +518,10 @@ impl PyCode { source_path: source_path.as_object().as_interned_str(vm).unwrap(), first_line_number, obj_name: obj_name.as_object().as_interned_str(vm).unwrap(), - qualname: self.code.qualname, + qualname: qualname.as_object().as_interned_str(vm).unwrap(), - max_stackdepth: self.code.max_stackdepth, - instructions: self.code.instructions.clone(), + max_stackdepth, + instructions, locations: self.code.locations.clone(), constants: constants.into_iter().map(Literal).collect(), names: names @@ -431,8 +532,8 @@ impl PyCode { .into_iter() .map(|o| o.as_interned_str(vm).unwrap()) .collect(), - cellvars: self.code.cellvars.clone(), - freevars: self.code.freevars.clone(), + cellvars, + freevars, cell2arg: self.code.cell2arg.clone(), }, }) From 1d63075490913bff514cf854a568f8e0f9df6a8e Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Mon, 15 Sep 2025 17:31:00 +0900 Subject: [PATCH 2/2] implement linetable --- Lib/test/test_code.py | 2 + Lib/test/test_compile.py | 2 - compiler/codegen/src/ir.rs | 139 ++++++++++- compiler/core/src/bytecode.rs | 77 +++++++ compiler/core/src/marshal.rs | 16 ++ vm/src/builtins/code.rs | 418 +++++++++++++++++++++++++++++++--- 6 files changed, 623 insertions(+), 31 deletions(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 393336a415..b7e5784b48 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -431,6 +431,7 @@ def test_co_positions_artificial_instructions(self): ] ) + # TODO: RUSTPYTHON @unittest.expectedFailure def test_endline_and_columntable_none_when_no_debug_ranges(self): # Make sure that if `-X no_debug_ranges` is used, there is @@ -447,6 +448,7 @@ def f(): """) assert_python_ok('-X', 'no_debug_ranges', '-c', code) + # TODO: RUSTPYTHON @unittest.expectedFailure def test_endline_and_columntable_none_when_no_debug_ranges_env(self): # Same as above but using the environment variable opt out. diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 5b07b3c85b..27bbe0b64a 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -885,8 +885,6 @@ def foo(x): self.assertIn('LOAD_ATTR', instructions) self.assertIn('PRECALL', instructions) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_lineno_procedure_call(self): def call(): ( diff --git a/compiler/codegen/src/ir.rs b/compiler/codegen/src/ir.rs index 1cc59dd656..31c8926091 100644 --- a/compiler/codegen/src/ir.rs +++ b/compiler/codegen/src/ir.rs @@ -5,7 +5,7 @@ use rustpython_compiler_core::{ OneIndexed, SourceLocation, bytecode::{ CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label, - OpArg, + OpArg, PyCodeLocationInfoKind, }, }; @@ -72,6 +72,7 @@ pub struct InstructionInfo { pub target: BlockIdx, // pub range: TextRange, pub location: SourceLocation, + // TODO: end_location for debug ranges } // spell-checker:ignore petgraph @@ -199,6 +200,9 @@ impl CodeInfo { locations.clear() } + // Generate linetable from locations + let linetable = generate_linetable(&locations, first_line_number.get() as i32); + Ok(CodeObject { flags, posonlyarg_count, @@ -218,6 +222,8 @@ impl CodeInfo { cellvars: cellvar_cache.into_iter().collect(), freevars: freevar_cache.into_iter().collect(), cell2arg, + linetable, + exceptiontable: Box::new([]), // TODO: Generate actual exception table }) } @@ -388,3 +394,134 @@ fn iter_blocks(blocks: &[Block]) -> impl Iterator + ' Some((idx, b)) }) } + +/// Generate CPython 3.11+ format linetable from source locations +fn generate_linetable(locations: &[SourceLocation], first_line: i32) -> Box<[u8]> { + if locations.is_empty() { + return Box::new([]); + } + + let mut linetable = Vec::new(); + // Initialize prev_line to first_line + // The first entry's delta is relative to co_firstlineno + let mut prev_line = first_line; + let mut i = 0; + + while i < locations.len() { + let loc = &locations[i]; + + // Count consecutive instructions with the same location + let mut length = 1; + while i + length < locations.len() && locations[i + length] == locations[i] { + length += 1; + } + + // Process in chunks of up to 8 instructions + while length > 0 { + let entry_length = length.min(8); + + // Get line and column information + // SourceLocation always has row and column (both are OneIndexed) + let line = loc.row.get() as i32; + let col = (loc.column.get() as i32) - 1; // Convert 1-based to 0-based + + let line_delta = line - prev_line; + + // Choose the appropriate encoding based on line delta and column info + // Note: SourceLocation always has valid column, so we never get NO_COLUMNS case + if line_delta == 0 { + let end_col = col; // Use same column for end (no range info available) + + if col < 80 && end_col - col < 16 && end_col >= col { + // Short form (codes 0-9) for common cases + let code = (col / 8).min(9) as u8; // Short0 to Short9 + linetable.push(0x80 | (code << 3) | ((entry_length - 1) as u8)); + let col_byte = (((col % 8) as u8) << 4) | ((end_col - col) as u8 & 0xf); + linetable.push(col_byte); + } else if col < 128 && end_col < 128 { + // One-line form (code 10) for same line + linetable.push( + 0x80 | ((PyCodeLocationInfoKind::OneLine0 as u8) << 3) + | ((entry_length - 1) as u8), + ); + linetable.push(col as u8); + linetable.push(end_col as u8); + } else { + // Long form for columns >= 128 + linetable.push( + 0x80 | ((PyCodeLocationInfoKind::Long as u8) << 3) + | ((entry_length - 1) as u8), + ); + write_signed_varint(&mut linetable, 0); // line_delta = 0 + write_varint(&mut linetable, 0); // end_line delta = 0 + write_varint(&mut linetable, (col as u32) + 1); // column + 1 for encoding + write_varint(&mut linetable, (end_col as u32) + 1); // end_col + 1 + } + } else if line_delta > 0 && line_delta < 3 + /* && column.is_some() */ + { + // One-line form (codes 11-12) for line deltas 1-2 + let end_col = col; // Use same column for end + + if col < 128 && end_col < 128 { + let code = (PyCodeLocationInfoKind::OneLine0 as u8) + (line_delta as u8); // 11 for delta=1, 12 for delta=2 + linetable.push(0x80 | (code << 3) | ((entry_length - 1) as u8)); + linetable.push(col as u8); + linetable.push(end_col as u8); + } else { + // Long form for columns >= 128 or negative line delta + linetable.push( + 0x80 | ((PyCodeLocationInfoKind::Long as u8) << 3) + | ((entry_length - 1) as u8), + ); + write_signed_varint(&mut linetable, line_delta); + write_varint(&mut linetable, 0); // end_line delta = 0 + write_varint(&mut linetable, (col as u32) + 1); // column + 1 for encoding + write_varint(&mut linetable, (end_col as u32) + 1); // end_col + 1 + } + } else { + // Long form (code 14) for all other cases + // This handles: line_delta < 0, line_delta >= 3, or columns >= 128 + let end_col = col; // Use same column for end + linetable.push( + 0x80 | ((PyCodeLocationInfoKind::Long as u8) << 3) | ((entry_length - 1) as u8), + ); + write_signed_varint(&mut linetable, line_delta); + write_varint(&mut linetable, 0); // end_line delta = 0 + write_varint(&mut linetable, (col as u32) + 1); // column + 1 for encoding + write_varint(&mut linetable, (end_col as u32) + 1); // end_col + 1 + } + + prev_line = line; + length -= entry_length; + i += entry_length; + } + } + + linetable.into_boxed_slice() +} + +/// Write a variable-length unsigned integer (6-bit chunks) +/// Returns the number of bytes written +fn write_varint(buf: &mut Vec, mut val: u32) -> usize { + let start_len = buf.len(); + while val >= 64 { + buf.push(0x40 | (val & 0x3f) as u8); + val >>= 6; + } + buf.push(val as u8); + buf.len() - start_len +} + +/// Write a variable-length signed integer +/// Returns the number of bytes written +fn write_signed_varint(buf: &mut Vec, val: i32) -> usize { + let uval = if val < 0 { + // (unsigned int)(-val) has an undefined behavior for INT_MIN + // So we use (0 - val as u32) to handle it correctly + ((0u32.wrapping_sub(val as u32)) << 1) | 1 + } else { + (val as u32) << 1 + }; + write_varint(buf, uval) +} diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index b38c599508..c2ce4e52c0 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -33,6 +33,75 @@ pub enum ResumeType { AfterAwait = 3, } +/// CPython 3.11+ linetable location info codes +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[repr(u8)] +pub enum PyCodeLocationInfoKind { + // Short forms are 0 to 9 + Short0 = 0, + Short1 = 1, + Short2 = 2, + Short3 = 3, + Short4 = 4, + Short5 = 5, + Short6 = 6, + Short7 = 7, + Short8 = 8, + Short9 = 9, + // One line forms are 10 to 12 + OneLine0 = 10, + OneLine1 = 11, + OneLine2 = 12, + NoColumns = 13, + Long = 14, + None = 15, +} + +impl PyCodeLocationInfoKind { + pub fn from_code(code: u8) -> Option { + match code { + 0 => Some(Self::Short0), + 1 => Some(Self::Short1), + 2 => Some(Self::Short2), + 3 => Some(Self::Short3), + 4 => Some(Self::Short4), + 5 => Some(Self::Short5), + 6 => Some(Self::Short6), + 7 => Some(Self::Short7), + 8 => Some(Self::Short8), + 9 => Some(Self::Short9), + 10 => Some(Self::OneLine0), + 11 => Some(Self::OneLine1), + 12 => Some(Self::OneLine2), + 13 => Some(Self::NoColumns), + 14 => Some(Self::Long), + 15 => Some(Self::None), + _ => Option::None, + } + } + + pub fn is_short(&self) -> bool { + (*self as u8) <= 9 + } + + pub fn short_column_group(&self) -> Option { + if self.is_short() { + Some(*self as u8) + } else { + Option::None + } + } + + pub fn one_line_delta(&self) -> Option { + match self { + Self::OneLine0 => Some(0), + Self::OneLine1 => Some(1), + Self::OneLine2 => Some(2), + _ => Option::None, + } + } +} + pub trait Constant: Sized { type Name: AsRef; @@ -146,6 +215,10 @@ pub struct CodeObject { pub varnames: Box<[C::Name]>, pub cellvars: Box<[C::Name]>, pub freevars: Box<[C::Name]>, + pub linetable: Box<[u8]>, + // Line number table (CPython 3.11+ format) + pub exceptiontable: Box<[u8]>, + // Exception handling table } bitflags! { @@ -1202,6 +1275,8 @@ impl CodeObject { first_line_number: self.first_line_number, max_stackdepth: self.max_stackdepth, cell2arg: self.cell2arg, + linetable: self.linetable, + exceptiontable: self.exceptiontable, } } @@ -1232,6 +1307,8 @@ impl CodeObject { first_line_number: self.first_line_number, max_stackdepth: self.max_stackdepth, cell2arg: self.cell2arg.clone(), + linetable: self.linetable.clone(), + exceptiontable: self.exceptiontable.clone(), } } } diff --git a/compiler/core/src/marshal.rs b/compiler/core/src/marshal.rs index ff82340c0e..b8044a1ab9 100644 --- a/compiler/core/src/marshal.rs +++ b/compiler/core/src/marshal.rs @@ -251,6 +251,16 @@ pub fn deserialize_code( let cellvars = read_names()?; let freevars = read_names()?; + // Read linetable and exceptiontable + let linetable_len = rdr.read_u32()?; + let linetable = rdr.read_slice(linetable_len)?.to_vec().into_boxed_slice(); + + let exceptiontable_len = rdr.read_u32()?; + let exceptiontable = rdr + .read_slice(exceptiontable_len)? + .to_vec() + .into_boxed_slice(); + Ok(CodeObject { instructions, locations, @@ -269,6 +279,8 @@ pub fn deserialize_code( varnames, cellvars, freevars, + linetable, + exceptiontable, }) } @@ -684,4 +696,8 @@ pub fn serialize_code(buf: &mut W, code: &CodeObject) write_names(&code.varnames); write_names(&code.cellvars); write_names(&code.freevars); + + // Serialize linetable and exceptiontable + write_vec(buf, &code.linetable); + write_vec(buf, &code.exceptiontable); } diff --git a/vm/src/builtins/code.rs b/vm/src/builtins/code.rs index f418775808..b49f76caa0 100644 --- a/vm/src/builtins/code.rs +++ b/vm/src/builtins/code.rs @@ -16,8 +16,125 @@ use crate::{ use malachite_bigint::BigInt; use num_traits::Zero; use rustpython_compiler_core::OneIndexed; +use rustpython_compiler_core::bytecode::PyCodeLocationInfoKind; use std::{borrow::Borrow, fmt, ops::Deref}; +/// State for iterating through code address ranges +struct PyCodeAddressRange<'a> { + ar_start: i32, + ar_end: i32, + ar_line: i32, + computed_line: i32, + reader: LineTableReader<'a>, +} + +impl<'a> PyCodeAddressRange<'a> { + fn new(linetable: &'a [u8], first_line: i32) -> Self { + PyCodeAddressRange { + ar_start: 0, + ar_end: 0, + ar_line: -1, + computed_line: first_line, + reader: LineTableReader::new(linetable), + } + } + + /// Check if this is a NO_LINE marker (code 15) + fn is_no_line_marker(byte: u8) -> bool { + (byte >> 3) == 0x1f + } + + /// Advance to next address range + fn advance(&mut self) -> bool { + if self.reader.at_end() { + return false; + } + + let first_byte = match self.reader.read_byte() { + Some(b) => b, + None => return false, + }; + + if (first_byte & 0x80) == 0 { + return false; // Invalid linetable + } + + let code = (first_byte >> 3) & 0x0f; + let length = ((first_byte & 0x07) + 1) as i32; + + // Get line delta for this entry + let line_delta = self.get_line_delta(code); + + // Update computed line + self.computed_line += line_delta; + + // Check for NO_LINE marker + if Self::is_no_line_marker(first_byte) { + self.ar_line = -1; + } else { + self.ar_line = self.computed_line; + } + + // Update address range + self.ar_start = self.ar_end; + self.ar_end += length * 2; // sizeof(_Py_CODEUNIT) = 2 + + // Skip remaining bytes for this entry + while !self.reader.at_end() { + if let Some(b) = self.reader.peek_byte() { + if (b & 0x80) != 0 { + break; + } + self.reader.read_byte(); + } else { + break; + } + } + + true + } + + fn get_line_delta(&mut self, code: u8) -> i32 { + let kind = match PyCodeLocationInfoKind::from_code(code) { + Some(k) => k, + None => return 0, + }; + + match kind { + PyCodeLocationInfoKind::None => 0, // NO_LINE marker + PyCodeLocationInfoKind::Long => { + let delta = self.reader.read_signed_varint(); + // Skip end_line, col, end_col + self.reader.read_varint(); + self.reader.read_varint(); + self.reader.read_varint(); + delta + } + PyCodeLocationInfoKind::NoColumns => self.reader.read_signed_varint(), + PyCodeLocationInfoKind::OneLine0 => { + self.reader.read_byte(); // Skip column + self.reader.read_byte(); // Skip end column + 0 + } + PyCodeLocationInfoKind::OneLine1 => { + self.reader.read_byte(); // Skip column + self.reader.read_byte(); // Skip end column + 1 + } + PyCodeLocationInfoKind::OneLine2 => { + self.reader.read_byte(); // Skip column + self.reader.read_byte(); // Skip end column + 2 + } + _ if kind.is_short() => { + self.reader.read_byte(); // Skip column byte + 0 + } + _ => 0, + } + } +} + #[derive(FromArgs)] pub struct ReplaceArgs { #[pyarg(named, optional)] @@ -368,30 +485,207 @@ impl PyCode { #[pygetset] pub fn co_linetable(&self, vm: &VirtualMachine) -> crate::builtins::PyBytesRef { - // Return empty bytes for now - this should be the new line table format - vm.ctx.new_bytes(vec![]) + // Return the actual linetable from the code object + vm.ctx.new_bytes(self.code.linetable.to_vec()) } #[pygetset] pub fn co_exceptiontable(&self, vm: &VirtualMachine) -> crate::builtins::PyBytesRef { - // Return empty bytes for now - this should be exception table - vm.ctx.new_bytes(vec![]) + // Return the actual exception table from the code object + vm.ctx.new_bytes(self.code.exceptiontable.to_vec()) } #[pymethod] pub fn co_lines(&self, vm: &VirtualMachine) -> PyResult { + // TODO: Implement lazy iterator (lineiterator) like CPython for better performance + // Currently returns eager list for simplicity + // Return an iterator over (start_offset, end_offset, lineno) tuples - // For now, return an empty iterator - let empty_list = vm.ctx.new_list(vec![]); - vm.call_method(empty_list.as_object(), "__iter__", ()) + let linetable = self.code.linetable.as_ref(); + let mut lines = Vec::new(); + + if !linetable.is_empty() { + let first_line = self.code.first_line_number.map_or(0, |n| n.get() as i32); + let mut range = PyCodeAddressRange::new(linetable, first_line); + + // Process all address ranges and merge consecutive entries with same line + let mut pending_entry: Option<(i32, i32, i32)> = None; + + while range.advance() { + let start = range.ar_start; + let end = range.ar_end; + let line = range.ar_line; + + if let Some((prev_start, _, prev_line)) = pending_entry { + if prev_line == line { + // Same line, extend the range + pending_entry = Some((prev_start, end, prev_line)); + } else { + // Different line, emit the previous entry + let tuple = if prev_line == -1 { + vm.ctx.new_tuple(vec![ + vm.ctx.new_int(prev_start).into(), + vm.ctx.new_int(start).into(), + vm.ctx.none(), + ]) + } else { + vm.ctx.new_tuple(vec![ + vm.ctx.new_int(prev_start).into(), + vm.ctx.new_int(start).into(), + vm.ctx.new_int(prev_line).into(), + ]) + }; + lines.push(tuple.into()); + pending_entry = Some((start, end, line)); + } + } else { + // First entry + pending_entry = Some((start, end, line)); + } + } + + // Emit the last pending entry + if let Some((start, end, line)) = pending_entry { + let tuple = if line == -1 { + vm.ctx.new_tuple(vec![ + vm.ctx.new_int(start).into(), + vm.ctx.new_int(end).into(), + vm.ctx.none(), + ]) + } else { + vm.ctx.new_tuple(vec![ + vm.ctx.new_int(start).into(), + vm.ctx.new_int(end).into(), + vm.ctx.new_int(line).into(), + ]) + }; + lines.push(tuple.into()); + } + } + + let list = vm.ctx.new_list(lines); + vm.call_method(list.as_object(), "__iter__", ()) } #[pymethod] pub fn co_positions(&self, vm: &VirtualMachine) -> PyResult { - // Return an iterator over (line, end_line, column, end_column) tuples - // For now, return an iterator that yields None tuples - let empty_list = vm.ctx.new_list(vec![]); - vm.call_method(empty_list.as_object(), "__iter__", ()) + // Return an iterator over (line, end_line, column, end_column) tuples for each instruction + let linetable = self.code.linetable.as_ref(); + let mut positions = Vec::new(); + + if !linetable.is_empty() { + let mut reader = LineTableReader::new(linetable); + let mut line = self.code.first_line_number.map_or(0, |n| n.get() as i32); + + while !reader.at_end() { + let first_byte = match reader.read_byte() { + Some(b) => b, + None => break, + }; + + if (first_byte & 0x80) == 0 { + break; // Invalid linetable + } + + let code = (first_byte >> 3) & 0x0f; + let length = ((first_byte & 0x07) + 1) as i32; + + let kind = match PyCodeLocationInfoKind::from_code(code) { + Some(k) => k, + None => break, // Invalid code + }; + + let (line_delta, end_line_delta, column, end_column): ( + i32, + i32, + Option, + Option, + ) = match kind { + PyCodeLocationInfoKind::None => { + // No location - all values are None + (0, 0, None, None) + } + PyCodeLocationInfoKind::Long => { + // Long form + let delta = reader.read_signed_varint(); + let end_line_delta = reader.read_varint() as i32; + + let col = reader.read_varint(); + let column = if col == 0 { + None + } else { + Some((col - 1) as i32) + }; + + let end_col = reader.read_varint(); + let end_column = if end_col == 0 { + None + } else { + Some((end_col - 1) as i32) + }; + + // endline = line + end_line_delta (will be computed after line update) + (delta, end_line_delta, column, end_column) + } + PyCodeLocationInfoKind::NoColumns => { + // No column form + let delta = reader.read_signed_varint(); + (delta, 0, None, None) // endline will be same as line (delta = 0) + } + PyCodeLocationInfoKind::OneLine0 + | PyCodeLocationInfoKind::OneLine1 + | PyCodeLocationInfoKind::OneLine2 => { + // One-line form - endline = line + let col = reader.read_byte().unwrap_or(0) as i32; + let end_col = reader.read_byte().unwrap_or(0) as i32; + let delta = kind.one_line_delta().unwrap_or(0); + (delta, 0, Some(col), Some(end_col)) // endline = line (delta = 0) + } + _ if kind.is_short() => { + // Short form - endline = line + let col_data = reader.read_byte().unwrap_or(0); + let col_group = kind.short_column_group().unwrap_or(0); + let col = ((col_group as i32) << 3) | ((col_data >> 4) as i32); + let end_col = col + (col_data & 0x0f) as i32; + (0, 0, Some(col), Some(end_col)) // endline = line (delta = 0) + } + _ => (0, 0, None, None), + }; + + // Update line number + line += line_delta; + + // Generate position tuples for each instruction covered by this entry + for _ in 0..length { + // Handle special case for no location (code 15) + let final_line = if kind == PyCodeLocationInfoKind::None { + None + } else { + Some(line) + }; + + let final_endline = if kind == PyCodeLocationInfoKind::None { + None + } else { + Some(line + end_line_delta) + }; + + // Convert Option to PyObject (None or int) + let line_obj = final_line.to_pyobject(vm); + let end_line_obj = final_endline.to_pyobject(vm); + let column_obj = column.to_pyobject(vm); + let end_column_obj = end_column.to_pyobject(vm); + + let tuple = + vm.ctx + .new_tuple(vec![line_obj, end_line_obj, column_obj, end_column_obj]); + positions.push(tuple.into()); + } + } + } + + let list = vm.ctx.new_list(positions); + vm.call_method(list.as_object(), "__iter__", ()) } #[pymethod] @@ -489,25 +783,28 @@ impl PyCode { }; // Validate co_nlocals if provided - if let OptionalArg::Present(nlocals) = args.co_nlocals { - if nlocals as usize != varnames.len() { - return Err(vm.new_value_error(format!( - "co_nlocals ({}) != len(co_varnames) ({})", - nlocals, - varnames.len() - ))); - } + if let OptionalArg::Present(nlocals) = args.co_nlocals + && nlocals as usize != varnames.len() + { + return Err(vm.new_value_error(format!( + "co_nlocals ({}) != len(co_varnames) ({})", + nlocals, + varnames.len() + ))); } - // Note: co_linetable and co_exceptiontable are not stored in CodeObject yet - // They would need to be added to the CodeObject structure - // For now, just validate they are bytes if provided - if let OptionalArg::Present(_linetable) = args.co_linetable { - // Would store linetable if CodeObject supported it - } - if let OptionalArg::Present(_exceptiontable) = args.co_exceptiontable { - // Would store exceptiontable if CodeObject supported it - } + // Handle linetable and exceptiontable + let linetable = match args.co_linetable { + OptionalArg::Present(linetable) => linetable.as_bytes().to_vec().into_boxed_slice(), + OptionalArg::Missing => self.code.linetable.clone(), + }; + + let exceptiontable = match args.co_exceptiontable { + OptionalArg::Present(exceptiontable) => { + exceptiontable.as_bytes().to_vec().into_boxed_slice() + } + OptionalArg::Missing => self.code.exceptiontable.clone(), + }; Ok(Self { code: CodeObject { @@ -535,6 +832,8 @@ impl PyCode { cellvars, freevars, cell2arg: self.code.cell2arg.clone(), + linetable, + exceptiontable, }, }) } @@ -558,6 +857,69 @@ impl ToPyObject for bytecode::CodeObject { } } +// Helper struct for reading linetable +struct LineTableReader<'a> { + data: &'a [u8], + pos: usize, +} + +impl<'a> LineTableReader<'a> { + fn new(data: &'a [u8]) -> Self { + Self { data, pos: 0 } + } + + fn read_byte(&mut self) -> Option { + if self.pos < self.data.len() { + let byte = self.data[self.pos]; + self.pos += 1; + Some(byte) + } else { + None + } + } + + fn peek_byte(&self) -> Option { + if self.pos < self.data.len() { + Some(self.data[self.pos]) + } else { + None + } + } + + fn read_varint(&mut self) -> u32 { + if let Some(first) = self.read_byte() { + let mut val = (first & 0x3f) as u32; + let mut shift = 0; + let mut byte = first; + while (byte & 0x40) != 0 { + if let Some(next) = self.read_byte() { + shift += 6; + val |= ((next & 0x3f) as u32) << shift; + byte = next; + } else { + break; + } + } + val + } else { + 0 + } + } + + fn read_signed_varint(&mut self) -> i32 { + let uval = self.read_varint(); + if uval & 1 != 0 { + -((uval >> 1) as i32) + } else { + (uval >> 1) as i32 + } + } + + fn at_end(&self) -> bool { + self.pos >= self.data.len() + } +} + pub fn init(ctx: &Context) { PyCode::extend_class(ctx, ctx.types.code_type); }