Skip to content

Commit b666c52

Browse files
authored
code object linetable (RustPython#6150)
* Code.replace * implement linetable
1 parent 6ead821 commit b666c52

File tree

6 files changed

+699
-20
lines changed

6 files changed

+699
-20
lines changed

Lib/test/test_code.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,6 @@ def func(arg):
347347
newcode = code.replace(co_name="func") # Should not raise SystemError
348348
self.assertEqual(code, newcode)
349349

350-
# TODO: RUSTPYTHON
351-
@unittest.expectedFailure
352350
def test_empty_linetable(self):
353351
def func():
354352
pass
@@ -468,8 +466,6 @@ def f():
468466

469467
# co_positions behavior when info is missing.
470468

471-
# TODO: RUSTPYTHON
472-
@unittest.expectedFailure
473469
# @requires_debug_ranges()
474470
def test_co_positions_empty_linetable(self):
475471
def func():
@@ -480,8 +476,6 @@ def func():
480476
self.assertIsNone(line)
481477
self.assertEqual(end_line, new_code.co_firstlineno + 1)
482478

483-
# TODO: RUSTPYTHON
484-
@unittest.expectedFailure
485479
def test_code_equality(self):
486480
def f():
487481
try:
@@ -522,8 +516,6 @@ def test_code_hash_uses_order(self):
522516
self.assertNotEqual(c, swapped)
523517
self.assertNotEqual(hash(c), hash(swapped))
524518

525-
# TODO: RUSTPYTHON
526-
@unittest.expectedFailure
527519
def test_code_hash_uses_bytecode(self):
528520
c = (lambda x, y: x + y).__code__
529521
d = (lambda x, y: x * y).__code__
@@ -735,8 +727,6 @@ def check_positions(self, func):
735727
self.assertEqual(l1, l2)
736728
self.assertEqual(len(pos1), len(pos2))
737729

738-
# TODO: RUSTPYTHON
739-
@unittest.expectedFailure
740730
def test_positions(self):
741731
self.check_positions(parse_location_table)
742732
self.check_positions(misshappen)
@@ -751,8 +741,6 @@ def check_lines(self, func):
751741
self.assertEqual(l1, l2)
752742
self.assertEqual(len(lines1), len(lines2))
753743

754-
# TODO: RUSTPYTHON
755-
@unittest.expectedFailure
756744
def test_lines(self):
757745
self.check_lines(parse_location_table)
758746
self.check_lines(misshappen)

Lib/test/test_compile.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -885,8 +885,6 @@ def foo(x):
885885
self.assertIn('LOAD_ATTR', instructions)
886886
self.assertIn('PRECALL', instructions)
887887

888-
# TODO: RUSTPYTHON
889-
@unittest.expectedFailure
890888
def test_lineno_procedure_call(self):
891889
def call():
892890
(

compiler/codegen/src/ir.rs

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use rustpython_compiler_core::{
55
OneIndexed, SourceLocation,
66
bytecode::{
77
CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label,
8-
OpArg,
8+
OpArg, PyCodeLocationInfoKind,
99
},
1010
};
1111

@@ -72,6 +72,7 @@ pub struct InstructionInfo {
7272
pub target: BlockIdx,
7373
// pub range: TextRange,
7474
pub location: SourceLocation,
75+
// TODO: end_location for debug ranges
7576
}
7677

7778
// spell-checker:ignore petgraph
@@ -199,6 +200,9 @@ impl CodeInfo {
199200
locations.clear()
200201
}
201202

203+
// Generate linetable from locations
204+
let linetable = generate_linetable(&locations, first_line_number.get() as i32);
205+
202206
Ok(CodeObject {
203207
flags,
204208
posonlyarg_count,
@@ -218,6 +222,8 @@ impl CodeInfo {
218222
cellvars: cellvar_cache.into_iter().collect(),
219223
freevars: freevar_cache.into_iter().collect(),
220224
cell2arg,
225+
linetable,
226+
exceptiontable: Box::new([]), // TODO: Generate actual exception table
221227
})
222228
}
223229

@@ -388,3 +394,134 @@ fn iter_blocks(blocks: &[Block]) -> impl Iterator<Item = (BlockIdx, &Block)> + '
388394
Some((idx, b))
389395
})
390396
}
397+
398+
/// Generate CPython 3.11+ format linetable from source locations
399+
fn generate_linetable(locations: &[SourceLocation], first_line: i32) -> Box<[u8]> {
400+
if locations.is_empty() {
401+
return Box::new([]);
402+
}
403+
404+
let mut linetable = Vec::new();
405+
// Initialize prev_line to first_line
406+
// The first entry's delta is relative to co_firstlineno
407+
let mut prev_line = first_line;
408+
let mut i = 0;
409+
410+
while i < locations.len() {
411+
let loc = &locations[i];
412+
413+
// Count consecutive instructions with the same location
414+
let mut length = 1;
415+
while i + length < locations.len() && locations[i + length] == locations[i] {
416+
length += 1;
417+
}
418+
419+
// Process in chunks of up to 8 instructions
420+
while length > 0 {
421+
let entry_length = length.min(8);
422+
423+
// Get line and column information
424+
// SourceLocation always has row and column (both are OneIndexed)
425+
let line = loc.row.get() as i32;
426+
let col = (loc.column.get() as i32) - 1; // Convert 1-based to 0-based
427+
428+
let line_delta = line - prev_line;
429+
430+
// Choose the appropriate encoding based on line delta and column info
431+
// Note: SourceLocation always has valid column, so we never get NO_COLUMNS case
432+
if line_delta == 0 {
433+
let end_col = col; // Use same column for end (no range info available)
434+
435+
if col < 80 && end_col - col < 16 && end_col >= col {
436+
// Short form (codes 0-9) for common cases
437+
let code = (col / 8).min(9) as u8; // Short0 to Short9
438+
linetable.push(0x80 | (code << 3) | ((entry_length - 1) as u8));
439+
let col_byte = (((col % 8) as u8) << 4) | ((end_col - col) as u8 & 0xf);
440+
linetable.push(col_byte);
441+
} else if col < 128 && end_col < 128 {
442+
// One-line form (code 10) for same line
443+
linetable.push(
444+
0x80 | ((PyCodeLocationInfoKind::OneLine0 as u8) << 3)
445+
| ((entry_length - 1) as u8),
446+
);
447+
linetable.push(col as u8);
448+
linetable.push(end_col as u8);
449+
} else {
450+
// Long form for columns >= 128
451+
linetable.push(
452+
0x80 | ((PyCodeLocationInfoKind::Long as u8) << 3)
453+
| ((entry_length - 1) as u8),
454+
);
455+
write_signed_varint(&mut linetable, 0); // line_delta = 0
456+
write_varint(&mut linetable, 0); // end_line delta = 0
457+
write_varint(&mut linetable, (col as u32) + 1); // column + 1 for encoding
458+
write_varint(&mut linetable, (end_col as u32) + 1); // end_col + 1
459+
}
460+
} else if line_delta > 0 && line_delta < 3
461+
/* && column.is_some() */
462+
{
463+
// One-line form (codes 11-12) for line deltas 1-2
464+
let end_col = col; // Use same column for end
465+
466+
if col < 128 && end_col < 128 {
467+
let code = (PyCodeLocationInfoKind::OneLine0 as u8) + (line_delta as u8); // 11 for delta=1, 12 for delta=2
468+
linetable.push(0x80 | (code << 3) | ((entry_length - 1) as u8));
469+
linetable.push(col as u8);
470+
linetable.push(end_col as u8);
471+
} else {
472+
// Long form for columns >= 128 or negative line delta
473+
linetable.push(
474+
0x80 | ((PyCodeLocationInfoKind::Long as u8) << 3)
475+
| ((entry_length - 1) as u8),
476+
);
477+
write_signed_varint(&mut linetable, line_delta);
478+
write_varint(&mut linetable, 0); // end_line delta = 0
479+
write_varint(&mut linetable, (col as u32) + 1); // column + 1 for encoding
480+
write_varint(&mut linetable, (end_col as u32) + 1); // end_col + 1
481+
}
482+
} else {
483+
// Long form (code 14) for all other cases
484+
// This handles: line_delta < 0, line_delta >= 3, or columns >= 128
485+
let end_col = col; // Use same column for end
486+
linetable.push(
487+
0x80 | ((PyCodeLocationInfoKind::Long as u8) << 3) | ((entry_length - 1) as u8),
488+
);
489+
write_signed_varint(&mut linetable, line_delta);
490+
write_varint(&mut linetable, 0); // end_line delta = 0
491+
write_varint(&mut linetable, (col as u32) + 1); // column + 1 for encoding
492+
write_varint(&mut linetable, (end_col as u32) + 1); // end_col + 1
493+
}
494+
495+
prev_line = line;
496+
length -= entry_length;
497+
i += entry_length;
498+
}
499+
}
500+
501+
linetable.into_boxed_slice()
502+
}
503+
504+
/// Write a variable-length unsigned integer (6-bit chunks)
505+
/// Returns the number of bytes written
506+
fn write_varint(buf: &mut Vec<u8>, mut val: u32) -> usize {
507+
let start_len = buf.len();
508+
while val >= 64 {
509+
buf.push(0x40 | (val & 0x3f) as u8);
510+
val >>= 6;
511+
}
512+
buf.push(val as u8);
513+
buf.len() - start_len
514+
}
515+
516+
/// Write a variable-length signed integer
517+
/// Returns the number of bytes written
518+
fn write_signed_varint(buf: &mut Vec<u8>, val: i32) -> usize {
519+
let uval = if val < 0 {
520+
// (unsigned int)(-val) has an undefined behavior for INT_MIN
521+
// So we use (0 - val as u32) to handle it correctly
522+
((0u32.wrapping_sub(val as u32)) << 1) | 1
523+
} else {
524+
(val as u32) << 1
525+
};
526+
write_varint(buf, uval)
527+
}

compiler/core/src/bytecode.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,75 @@ pub enum ResumeType {
3333
AfterAwait = 3,
3434
}
3535

36+
/// CPython 3.11+ linetable location info codes
37+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
38+
#[repr(u8)]
39+
pub enum PyCodeLocationInfoKind {
40+
// Short forms are 0 to 9
41+
Short0 = 0,
42+
Short1 = 1,
43+
Short2 = 2,
44+
Short3 = 3,
45+
Short4 = 4,
46+
Short5 = 5,
47+
Short6 = 6,
48+
Short7 = 7,
49+
Short8 = 8,
50+
Short9 = 9,
51+
// One line forms are 10 to 12
52+
OneLine0 = 10,
53+
OneLine1 = 11,
54+
OneLine2 = 12,
55+
NoColumns = 13,
56+
Long = 14,
57+
None = 15,
58+
}
59+
60+
impl PyCodeLocationInfoKind {
61+
pub fn from_code(code: u8) -> Option<Self> {
62+
match code {
63+
0 => Some(Self::Short0),
64+
1 => Some(Self::Short1),
65+
2 => Some(Self::Short2),
66+
3 => Some(Self::Short3),
67+
4 => Some(Self::Short4),
68+
5 => Some(Self::Short5),
69+
6 => Some(Self::Short6),
70+
7 => Some(Self::Short7),
71+
8 => Some(Self::Short8),
72+
9 => Some(Self::Short9),
73+
10 => Some(Self::OneLine0),
74+
11 => Some(Self::OneLine1),
75+
12 => Some(Self::OneLine2),
76+
13 => Some(Self::NoColumns),
77+
14 => Some(Self::Long),
78+
15 => Some(Self::None),
79+
_ => Option::None,
80+
}
81+
}
82+
83+
pub fn is_short(&self) -> bool {
84+
(*self as u8) <= 9
85+
}
86+
87+
pub fn short_column_group(&self) -> Option<u8> {
88+
if self.is_short() {
89+
Some(*self as u8)
90+
} else {
91+
Option::None
92+
}
93+
}
94+
95+
pub fn one_line_delta(&self) -> Option<i32> {
96+
match self {
97+
Self::OneLine0 => Some(0),
98+
Self::OneLine1 => Some(1),
99+
Self::OneLine2 => Some(2),
100+
_ => Option::None,
101+
}
102+
}
103+
}
104+
36105
pub trait Constant: Sized {
37106
type Name: AsRef<str>;
38107

@@ -146,6 +215,10 @@ pub struct CodeObject<C: Constant = ConstantData> {
146215
pub varnames: Box<[C::Name]>,
147216
pub cellvars: Box<[C::Name]>,
148217
pub freevars: Box<[C::Name]>,
218+
pub linetable: Box<[u8]>,
219+
// Line number table (CPython 3.11+ format)
220+
pub exceptiontable: Box<[u8]>,
221+
// Exception handling table
149222
}
150223

151224
bitflags! {
@@ -1202,6 +1275,8 @@ impl<C: Constant> CodeObject<C> {
12021275
first_line_number: self.first_line_number,
12031276
max_stackdepth: self.max_stackdepth,
12041277
cell2arg: self.cell2arg,
1278+
linetable: self.linetable,
1279+
exceptiontable: self.exceptiontable,
12051280
}
12061281
}
12071282

@@ -1232,6 +1307,8 @@ impl<C: Constant> CodeObject<C> {
12321307
first_line_number: self.first_line_number,
12331308
max_stackdepth: self.max_stackdepth,
12341309
cell2arg: self.cell2arg.clone(),
1310+
linetable: self.linetable.clone(),
1311+
exceptiontable: self.exceptiontable.clone(),
12351312
}
12361313
}
12371314
}

compiler/core/src/marshal.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,16 @@ pub fn deserialize_code<R: Read, Bag: ConstantBag>(
251251
let cellvars = read_names()?;
252252
let freevars = read_names()?;
253253

254+
// Read linetable and exceptiontable
255+
let linetable_len = rdr.read_u32()?;
256+
let linetable = rdr.read_slice(linetable_len)?.to_vec().into_boxed_slice();
257+
258+
let exceptiontable_len = rdr.read_u32()?;
259+
let exceptiontable = rdr
260+
.read_slice(exceptiontable_len)?
261+
.to_vec()
262+
.into_boxed_slice();
263+
254264
Ok(CodeObject {
255265
instructions,
256266
locations,
@@ -269,6 +279,8 @@ pub fn deserialize_code<R: Read, Bag: ConstantBag>(
269279
varnames,
270280
cellvars,
271281
freevars,
282+
linetable,
283+
exceptiontable,
272284
})
273285
}
274286

@@ -684,4 +696,8 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
684696
write_names(&code.varnames);
685697
write_names(&code.cellvars);
686698
write_names(&code.freevars);
699+
700+
// Serialize linetable and exceptiontable
701+
write_vec(buf, &code.linetable);
702+
write_vec(buf, &code.exceptiontable);
687703
}

0 commit comments

Comments
 (0)