@@ -6,12 +6,49 @@ use num_bigint::BigInt;
6
6
use num_traits:: Num ;
7
7
use std:: collections:: HashMap ;
8
8
use std:: str:: FromStr ;
9
+ use std:: cmp:: Ordering ;
10
+
11
+ #[ derive( Clone , Copy , PartialEq , Debug ) ]
12
+ struct IndentationLevel {
13
+ tabs : usize ,
14
+ spaces : usize ,
15
+ }
16
+
17
+ impl IndentationLevel {
18
+ fn new ( ) -> IndentationLevel {
19
+ IndentationLevel {
20
+ tabs : 0 ,
21
+ spaces : 0 ,
22
+ }
23
+ }
24
+ fn compare_strict ( & self , other : & IndentationLevel ) -> Option < Ordering > {
25
+ // We only know for sure that we're smaller or bigger if tabs
26
+ // and spaces both differ in the same direction. Otherwise we're
27
+ // dependent on the size of tabs.
28
+ if self . tabs < other. tabs {
29
+ if self . spaces <= other. spaces {
30
+ Some ( Ordering :: Less )
31
+ } else {
32
+ None
33
+ }
34
+ } else if self . tabs > other. tabs {
35
+ if self . spaces >= other. spaces {
36
+ Some ( Ordering :: Greater )
37
+ } else {
38
+ None
39
+ }
40
+
41
+ } else {
42
+ Some ( self . spaces . cmp ( & other. spaces ) )
43
+ }
44
+ }
45
+ }
9
46
10
47
pub struct Lexer < T : Iterator < Item = char > > {
11
48
chars : T ,
12
49
at_begin_of_line : bool ,
13
50
nesting : usize , // Amount of parenthesis
14
- indentation_stack : Vec < usize > ,
51
+ indentation_stack : Vec < IndentationLevel > ,
15
52
pending : Vec < Spanned < Tok > > ,
16
53
chr0 : Option < char > ,
17
54
chr1 : Option < char > ,
@@ -218,7 +255,7 @@ where
218
255
chars : input,
219
256
at_begin_of_line : true ,
220
257
nesting : 0 ,
221
- indentation_stack : vec ! [ 0 ] ,
258
+ indentation_stack : vec ! [ IndentationLevel :: new ( ) ] ,
222
259
pending : Vec :: new ( ) ,
223
260
chr0 : None ,
224
261
location : Location :: new ( 0 , 0 ) ,
@@ -576,13 +613,24 @@ where
576
613
self . at_begin_of_line = false ;
577
614
578
615
// Determine indentation:
579
- let mut col: usize = 0 ;
616
+ let mut spaces: usize = 0 ;
617
+ let mut tabs: usize = 0 ;
580
618
loop {
581
619
match self . chr0 {
582
620
Some ( ' ' ) => {
583
621
self . next_char ( ) ;
584
- col += 1 ;
585
- }
622
+ spaces += 1 ;
623
+ } ,
624
+ Some ( '\t' ) => {
625
+ if spaces != 0 {
626
+ // Don't allow tabs after spaces as part of indentation.
627
+ // This is technically stricter than python3 but spaces before
628
+ // tabs is even more insane than mixing spaces and tabs.
629
+ panic ! ( "Tabs not allowed as part of indentation after spaces" ) ;
630
+ }
631
+ self . next_char ( ) ;
632
+ tabs += 1 ;
633
+ } ,
586
634
Some ( '#' ) => {
587
635
self . lex_comment ( ) ;
588
636
self . at_begin_of_line = true ;
@@ -601,34 +649,54 @@ where
601
649
}
602
650
}
603
651
652
+ let indentation_level = IndentationLevel {
653
+ spaces,
654
+ tabs,
655
+ } ;
656
+
604
657
if self . nesting == 0 {
605
658
// Determine indent or dedent:
606
659
let current_indentation = * self . indentation_stack . last ( ) . unwrap ( ) ;
607
- if col == current_indentation {
608
- // Same same
609
- } else if col > current_indentation {
610
- // New indentation level:
611
- self . indentation_stack . push ( col) ;
612
- let tok_start = self . get_pos ( ) ;
613
- let tok_end = tok_start. clone ( ) ;
614
- return Some ( Ok ( ( tok_start, Tok :: Indent , tok_end) ) ) ;
615
- } else if col < current_indentation {
616
- // One or more dedentations
617
- // Pop off other levels until col is found:
618
-
619
- while col < * self . indentation_stack . last ( ) . unwrap ( ) {
620
- self . indentation_stack . pop ( ) . unwrap ( ) ;
660
+ let ordering = indentation_level. compare_strict ( & current_indentation) ;
661
+ match ordering {
662
+ Some ( Ordering :: Equal ) => {
663
+ // Same same
664
+ } ,
665
+ Some ( Ordering :: Greater ) => {
666
+ // New indentation level:
667
+ self . indentation_stack . push ( indentation_level) ;
621
668
let tok_start = self . get_pos ( ) ;
622
669
let tok_end = tok_start. clone ( ) ;
623
- self . pending . push ( Ok ( ( tok_start, Tok :: Dedent , tok_end) ) ) ;
670
+ return Some ( Ok ( ( tok_start, Tok :: Indent , tok_end) ) ) ;
624
671
}
672
+ Some ( Ordering :: Less ) => {
673
+ // One or more dedentations
674
+ // Pop off other levels until col is found:
675
+
676
+ loop {
677
+ let ordering = indentation_level. compare_strict ( self . indentation_stack . last ( ) . unwrap ( ) ) ;
678
+ match ordering {
679
+ Some ( Ordering :: Less ) => {
680
+ self . indentation_stack . pop ( ) ;
681
+ let tok_start = self . get_pos ( ) ;
682
+ let tok_end = tok_start. clone ( ) ;
683
+ self . pending . push ( Ok ( ( tok_start, Tok :: Dedent , tok_end) ) ) ;
684
+ } ,
685
+ None => panic ! ( "inconsistent use of tabs and spaces in indentation" ) ,
686
+ _ => {
687
+ break ;
688
+ } ,
689
+ } ;
690
+ }
625
691
626
- if col != * self . indentation_stack . last ( ) . unwrap ( ) {
627
- // TODO: handle wrong indentations
628
- panic ! ( "Non matching indentation levels!" ) ;
629
- }
692
+ if indentation_level != * self . indentation_stack . last ( ) . unwrap ( ) {
693
+ // TODO: handle wrong indentations
694
+ panic ! ( "Non matching indentation levels!" ) ;
695
+ }
630
696
631
- return Some ( self . pending . remove ( 0 ) ) ;
697
+ return Some ( self . pending . remove ( 0 ) ) ;
698
+ }
699
+ None => panic ! ( "inconsistent use of tabs and spaces in indentation" ) ,
632
700
}
633
701
}
634
702
}
@@ -1233,12 +1301,56 @@ mod tests {
1233
1301
}
1234
1302
}
1235
1303
1304
+ macro_rules! test_double_dedent_with_tabs {
1305
+ ( $( $name: ident: $eol: expr, ) * ) => {
1306
+ $(
1307
+ #[ test]
1308
+ fn $name( ) {
1309
+ let source = String :: from( format!( "def foo():{}\t if x:{}{}\t return 99{}{}" , $eol, $eol, $eol, $eol, $eol) ) ;
1310
+ let tokens = lex_source( & source) ;
1311
+ assert_eq!(
1312
+ tokens,
1313
+ vec![
1314
+ Tok :: Def ,
1315
+ Tok :: Name {
1316
+ name: String :: from( "foo" ) ,
1317
+ } ,
1318
+ Tok :: Lpar ,
1319
+ Tok :: Rpar ,
1320
+ Tok :: Colon ,
1321
+ Tok :: Newline ,
1322
+ Tok :: Indent ,
1323
+ Tok :: If ,
1324
+ Tok :: Name {
1325
+ name: String :: from( "x" ) ,
1326
+ } ,
1327
+ Tok :: Colon ,
1328
+ Tok :: Newline ,
1329
+ Tok :: Indent ,
1330
+ Tok :: Return ,
1331
+ Tok :: Int { value: BigInt :: from( 99 ) } ,
1332
+ Tok :: Newline ,
1333
+ Tok :: Dedent ,
1334
+ Tok :: Dedent ,
1335
+ ]
1336
+ ) ;
1337
+ }
1338
+ ) *
1339
+ }
1340
+ }
1341
+
1236
1342
test_double_dedent_with_eol ! {
1237
1343
test_double_dedent_windows_eol: WINDOWS_EOL ,
1238
1344
test_double_dedent_mac_eol: MAC_EOL ,
1239
1345
test_double_dedent_unix_eol: UNIX_EOL ,
1240
1346
}
1241
1347
1348
+ test_double_dedent_with_tabs ! {
1349
+ test_double_dedent_tabs_windows_eol: WINDOWS_EOL ,
1350
+ test_double_dedent_tabs_mac_eol: MAC_EOL ,
1351
+ test_double_dedent_tabs_unix_eol: UNIX_EOL ,
1352
+ }
1353
+
1242
1354
macro_rules! test_newline_in_brackets {
1243
1355
( $( $name: ident: $eol: expr, ) * ) => {
1244
1356
$(
0 commit comments