Skip to content

Commit 8aaadb1

Browse files
Merge pull request RustPython#234 from johndaniels/indentation
Handle mixed spaces and tabs.
2 parents 1b13b0c + 9468b65 commit 8aaadb1

File tree

2 files changed

+143
-24
lines changed

2 files changed

+143
-24
lines changed

parser/src/lexer.rs

+132-24
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,47 @@
44
pub use super::token::Tok;
55
use num_bigint::BigInt;
66
use num_traits::Num;
7+
use std::cmp::Ordering;
78
use std::collections::HashMap;
89
use std::str::FromStr;
910

11+
#[derive(Clone, Copy, PartialEq, Debug)]
12+
struct IndentationLevel {
13+
tabs: usize,
14+
spaces: usize,
15+
}
16+
17+
impl IndentationLevel {
18+
fn new() -> IndentationLevel {
19+
IndentationLevel { tabs: 0, spaces: 0 }
20+
}
21+
fn compare_strict(&self, other: &IndentationLevel) -> Option<Ordering> {
22+
// We only know for sure that we're smaller or bigger if tabs
23+
// and spaces both differ in the same direction. Otherwise we're
24+
// dependent on the size of tabs.
25+
if self.tabs < other.tabs {
26+
if self.spaces <= other.spaces {
27+
Some(Ordering::Less)
28+
} else {
29+
None
30+
}
31+
} else if self.tabs > other.tabs {
32+
if self.spaces >= other.spaces {
33+
Some(Ordering::Greater)
34+
} else {
35+
None
36+
}
37+
} else {
38+
Some(self.spaces.cmp(&other.spaces))
39+
}
40+
}
41+
}
42+
1043
pub struct Lexer<T: Iterator<Item = char>> {
1144
chars: T,
1245
at_begin_of_line: bool,
1346
nesting: usize, // Amount of parenthesis
14-
indentation_stack: Vec<usize>,
47+
indentation_stack: Vec<IndentationLevel>,
1548
pending: Vec<Spanned<Tok>>,
1649
chr0: Option<char>,
1750
chr1: Option<char>,
@@ -218,7 +251,7 @@ where
218251
chars: input,
219252
at_begin_of_line: true,
220253
nesting: 0,
221-
indentation_stack: vec![0],
254+
indentation_stack: vec![IndentationLevel::new()],
222255
pending: Vec::new(),
223256
chr0: None,
224257
location: Location::new(0, 0),
@@ -576,12 +609,23 @@ where
576609
self.at_begin_of_line = false;
577610

578611
// Determine indentation:
579-
let mut col: usize = 0;
612+
let mut spaces: usize = 0;
613+
let mut tabs: usize = 0;
580614
loop {
581615
match self.chr0 {
582616
Some(' ') => {
583617
self.next_char();
584-
col += 1;
618+
spaces += 1;
619+
}
620+
Some('\t') => {
621+
if spaces != 0 {
622+
// Don't allow tabs after spaces as part of indentation.
623+
// This is technically stricter than python3 but spaces before
624+
// tabs is even more insane than mixing spaces and tabs.
625+
panic!("Tabs not allowed as part of indentation after spaces");
626+
}
627+
self.next_char();
628+
tabs += 1;
585629
}
586630
Some('#') => {
587631
self.lex_comment();
@@ -601,34 +645,54 @@ where
601645
}
602646
}
603647

648+
let indentation_level = IndentationLevel { spaces, tabs };
649+
604650
if self.nesting == 0 {
605651
// Determine indent or dedent:
606652
let current_indentation = *self.indentation_stack.last().unwrap();
607-
if col == current_indentation {
608-
// Same same
609-
} else if col > current_indentation {
610-
// New indentation level:
611-
self.indentation_stack.push(col);
612-
let tok_start = self.get_pos();
613-
let tok_end = tok_start.clone();
614-
return Some(Ok((tok_start, Tok::Indent, tok_end)));
615-
} else if col < current_indentation {
616-
// One or more dedentations
617-
// Pop off other levels until col is found:
618-
619-
while col < *self.indentation_stack.last().unwrap() {
620-
self.indentation_stack.pop().unwrap();
653+
let ordering = indentation_level.compare_strict(&current_indentation);
654+
match ordering {
655+
Some(Ordering::Equal) => {
656+
// Same same
657+
}
658+
Some(Ordering::Greater) => {
659+
// New indentation level:
660+
self.indentation_stack.push(indentation_level);
621661
let tok_start = self.get_pos();
622662
let tok_end = tok_start.clone();
623-
self.pending.push(Ok((tok_start, Tok::Dedent, tok_end)));
663+
return Some(Ok((tok_start, Tok::Indent, tok_end)));
624664
}
665+
Some(Ordering::Less) => {
666+
// One or more dedentations
667+
// Pop off other levels until col is found:
668+
669+
loop {
670+
let ordering = indentation_level
671+
.compare_strict(self.indentation_stack.last().unwrap());
672+
match ordering {
673+
Some(Ordering::Less) => {
674+
self.indentation_stack.pop();
675+
let tok_start = self.get_pos();
676+
let tok_end = tok_start.clone();
677+
self.pending.push(Ok((tok_start, Tok::Dedent, tok_end)));
678+
}
679+
None => {
680+
panic!("inconsistent use of tabs and spaces in indentation")
681+
}
682+
_ => {
683+
break;
684+
}
685+
};
686+
}
625687

626-
if col != *self.indentation_stack.last().unwrap() {
627-
// TODO: handle wrong indentations
628-
panic!("Non matching indentation levels!");
629-
}
688+
if indentation_level != *self.indentation_stack.last().unwrap() {
689+
// TODO: handle wrong indentations
690+
panic!("Non matching indentation levels!");
691+
}
630692

631-
return Some(self.pending.remove(0));
693+
return Some(self.pending.remove(0));
694+
}
695+
None => panic!("inconsistent use of tabs and spaces in indentation"),
632696
}
633697
}
634698
}
@@ -1233,12 +1297,56 @@ mod tests {
12331297
}
12341298
}
12351299

1300+
macro_rules! test_double_dedent_with_tabs {
1301+
($($name:ident: $eol:expr,)*) => {
1302+
$(
1303+
#[test]
1304+
fn $name() {
1305+
let source = String::from(format!("def foo():{}\tif x:{}{}\t return 99{}{}", $eol, $eol, $eol, $eol, $eol));
1306+
let tokens = lex_source(&source);
1307+
assert_eq!(
1308+
tokens,
1309+
vec![
1310+
Tok::Def,
1311+
Tok::Name {
1312+
name: String::from("foo"),
1313+
},
1314+
Tok::Lpar,
1315+
Tok::Rpar,
1316+
Tok::Colon,
1317+
Tok::Newline,
1318+
Tok::Indent,
1319+
Tok::If,
1320+
Tok::Name {
1321+
name: String::from("x"),
1322+
},
1323+
Tok::Colon,
1324+
Tok::Newline,
1325+
Tok::Indent,
1326+
Tok::Return,
1327+
Tok::Int { value: BigInt::from(99) },
1328+
Tok::Newline,
1329+
Tok::Dedent,
1330+
Tok::Dedent,
1331+
]
1332+
);
1333+
}
1334+
)*
1335+
}
1336+
}
1337+
12361338
test_double_dedent_with_eol! {
12371339
test_double_dedent_windows_eol: WINDOWS_EOL,
12381340
test_double_dedent_mac_eol: MAC_EOL,
12391341
test_double_dedent_unix_eol: UNIX_EOL,
12401342
}
12411343

1344+
test_double_dedent_with_tabs! {
1345+
test_double_dedent_tabs_windows_eol: WINDOWS_EOL,
1346+
test_double_dedent_tabs_mac_eol: MAC_EOL,
1347+
test_double_dedent_tabs_unix_eol: UNIX_EOL,
1348+
}
1349+
12421350
macro_rules! test_newline_in_brackets {
12431351
($($name:ident: $eol:expr,)*) => {
12441352
$(

tests/snippets/indentation.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# WARNING! This file contains mixed tabs and spaces
2+
# (because that's what it is testing)
3+
4+
def weird_indentation():
5+
return_value = "hi"
6+
if False:
7+
return return_value
8+
return "hi"
9+
10+
assert weird_indentation() == "hi"
11+

0 commit comments

Comments
 (0)