4
4
pub use super :: token:: Tok ;
5
5
use num_bigint:: BigInt ;
6
6
use num_traits:: Num ;
7
+ use std:: cmp:: Ordering ;
7
8
use std:: collections:: HashMap ;
8
9
use std:: str:: FromStr ;
9
10
11
+ #[ derive( Clone , Copy , PartialEq , Debug ) ]
12
+ struct IndentationLevel {
13
+ tabs : usize ,
14
+ spaces : usize ,
15
+ }
16
+
17
+ impl IndentationLevel {
18
+ fn new ( ) -> IndentationLevel {
19
+ IndentationLevel { tabs : 0 , spaces : 0 }
20
+ }
21
+ fn compare_strict ( & self , other : & IndentationLevel ) -> Option < Ordering > {
22
+ // We only know for sure that we're smaller or bigger if tabs
23
+ // and spaces both differ in the same direction. Otherwise we're
24
+ // dependent on the size of tabs.
25
+ if self . tabs < other. tabs {
26
+ if self . spaces <= other. spaces {
27
+ Some ( Ordering :: Less )
28
+ } else {
29
+ None
30
+ }
31
+ } else if self . tabs > other. tabs {
32
+ if self . spaces >= other. spaces {
33
+ Some ( Ordering :: Greater )
34
+ } else {
35
+ None
36
+ }
37
+ } else {
38
+ Some ( self . spaces . cmp ( & other. spaces ) )
39
+ }
40
+ }
41
+ }
42
+
10
43
pub struct Lexer < T : Iterator < Item = char > > {
11
44
chars : T ,
12
45
at_begin_of_line : bool ,
13
46
nesting : usize , // Amount of parenthesis
14
- indentation_stack : Vec < usize > ,
47
+ indentation_stack : Vec < IndentationLevel > ,
15
48
pending : Vec < Spanned < Tok > > ,
16
49
chr0 : Option < char > ,
17
50
chr1 : Option < char > ,
@@ -218,7 +251,7 @@ where
218
251
chars : input,
219
252
at_begin_of_line : true ,
220
253
nesting : 0 ,
221
- indentation_stack : vec ! [ 0 ] ,
254
+ indentation_stack : vec ! [ IndentationLevel :: new ( ) ] ,
222
255
pending : Vec :: new ( ) ,
223
256
chr0 : None ,
224
257
location : Location :: new ( 0 , 0 ) ,
@@ -576,12 +609,23 @@ where
576
609
self . at_begin_of_line = false ;
577
610
578
611
// Determine indentation:
579
- let mut col: usize = 0 ;
612
+ let mut spaces: usize = 0 ;
613
+ let mut tabs: usize = 0 ;
580
614
loop {
581
615
match self . chr0 {
582
616
Some ( ' ' ) => {
583
617
self . next_char ( ) ;
584
- col += 1 ;
618
+ spaces += 1 ;
619
+ }
620
+ Some ( '\t' ) => {
621
+ if spaces != 0 {
622
+ // Don't allow tabs after spaces as part of indentation.
623
+ // This is technically stricter than python3 but spaces before
624
+ // tabs is even more insane than mixing spaces and tabs.
625
+ panic ! ( "Tabs not allowed as part of indentation after spaces" ) ;
626
+ }
627
+ self . next_char ( ) ;
628
+ tabs += 1 ;
585
629
}
586
630
Some ( '#' ) => {
587
631
self . lex_comment ( ) ;
@@ -601,34 +645,54 @@ where
601
645
}
602
646
}
603
647
648
+ let indentation_level = IndentationLevel { spaces, tabs } ;
649
+
604
650
if self . nesting == 0 {
605
651
// Determine indent or dedent:
606
652
let current_indentation = * self . indentation_stack . last ( ) . unwrap ( ) ;
607
- if col == current_indentation {
608
- // Same same
609
- } else if col > current_indentation {
610
- // New indentation level:
611
- self . indentation_stack . push ( col) ;
612
- let tok_start = self . get_pos ( ) ;
613
- let tok_end = tok_start. clone ( ) ;
614
- return Some ( Ok ( ( tok_start, Tok :: Indent , tok_end) ) ) ;
615
- } else if col < current_indentation {
616
- // One or more dedentations
617
- // Pop off other levels until col is found:
618
-
619
- while col < * self . indentation_stack . last ( ) . unwrap ( ) {
620
- self . indentation_stack . pop ( ) . unwrap ( ) ;
653
+ let ordering = indentation_level. compare_strict ( & current_indentation) ;
654
+ match ordering {
655
+ Some ( Ordering :: Equal ) => {
656
+ // Same same
657
+ }
658
+ Some ( Ordering :: Greater ) => {
659
+ // New indentation level:
660
+ self . indentation_stack . push ( indentation_level) ;
621
661
let tok_start = self . get_pos ( ) ;
622
662
let tok_end = tok_start. clone ( ) ;
623
- self . pending . push ( Ok ( ( tok_start, Tok :: Dedent , tok_end) ) ) ;
663
+ return Some ( Ok ( ( tok_start, Tok :: Indent , tok_end) ) ) ;
624
664
}
665
+ Some ( Ordering :: Less ) => {
666
+ // One or more dedentations
667
+ // Pop off other levels until col is found:
668
+
669
+ loop {
670
+ let ordering = indentation_level
671
+ . compare_strict ( self . indentation_stack . last ( ) . unwrap ( ) ) ;
672
+ match ordering {
673
+ Some ( Ordering :: Less ) => {
674
+ self . indentation_stack . pop ( ) ;
675
+ let tok_start = self . get_pos ( ) ;
676
+ let tok_end = tok_start. clone ( ) ;
677
+ self . pending . push ( Ok ( ( tok_start, Tok :: Dedent , tok_end) ) ) ;
678
+ }
679
+ None => {
680
+ panic ! ( "inconsistent use of tabs and spaces in indentation" )
681
+ }
682
+ _ => {
683
+ break ;
684
+ }
685
+ } ;
686
+ }
625
687
626
- if col != * self . indentation_stack . last ( ) . unwrap ( ) {
627
- // TODO: handle wrong indentations
628
- panic ! ( "Non matching indentation levels!" ) ;
629
- }
688
+ if indentation_level != * self . indentation_stack . last ( ) . unwrap ( ) {
689
+ // TODO: handle wrong indentations
690
+ panic ! ( "Non matching indentation levels!" ) ;
691
+ }
630
692
631
- return Some ( self . pending . remove ( 0 ) ) ;
693
+ return Some ( self . pending . remove ( 0 ) ) ;
694
+ }
695
+ None => panic ! ( "inconsistent use of tabs and spaces in indentation" ) ,
632
696
}
633
697
}
634
698
}
@@ -1233,12 +1297,56 @@ mod tests {
1233
1297
}
1234
1298
}
1235
1299
1300
+ macro_rules! test_double_dedent_with_tabs {
1301
+ ( $( $name: ident: $eol: expr, ) * ) => {
1302
+ $(
1303
+ #[ test]
1304
+ fn $name( ) {
1305
+ let source = String :: from( format!( "def foo():{}\t if x:{}{}\t return 99{}{}" , $eol, $eol, $eol, $eol, $eol) ) ;
1306
+ let tokens = lex_source( & source) ;
1307
+ assert_eq!(
1308
+ tokens,
1309
+ vec![
1310
+ Tok :: Def ,
1311
+ Tok :: Name {
1312
+ name: String :: from( "foo" ) ,
1313
+ } ,
1314
+ Tok :: Lpar ,
1315
+ Tok :: Rpar ,
1316
+ Tok :: Colon ,
1317
+ Tok :: Newline ,
1318
+ Tok :: Indent ,
1319
+ Tok :: If ,
1320
+ Tok :: Name {
1321
+ name: String :: from( "x" ) ,
1322
+ } ,
1323
+ Tok :: Colon ,
1324
+ Tok :: Newline ,
1325
+ Tok :: Indent ,
1326
+ Tok :: Return ,
1327
+ Tok :: Int { value: BigInt :: from( 99 ) } ,
1328
+ Tok :: Newline ,
1329
+ Tok :: Dedent ,
1330
+ Tok :: Dedent ,
1331
+ ]
1332
+ ) ;
1333
+ }
1334
+ ) *
1335
+ }
1336
+ }
1337
+
1236
1338
test_double_dedent_with_eol ! {
1237
1339
test_double_dedent_windows_eol: WINDOWS_EOL ,
1238
1340
test_double_dedent_mac_eol: MAC_EOL ,
1239
1341
test_double_dedent_unix_eol: UNIX_EOL ,
1240
1342
}
1241
1343
1344
+ test_double_dedent_with_tabs ! {
1345
+ test_double_dedent_tabs_windows_eol: WINDOWS_EOL ,
1346
+ test_double_dedent_tabs_mac_eol: MAC_EOL ,
1347
+ test_double_dedent_tabs_unix_eol: UNIX_EOL ,
1348
+ }
1349
+
1242
1350
macro_rules! test_newline_in_brackets {
1243
1351
( $( $name: ident: $eol: expr, ) * ) => {
1244
1352
$(
0 commit comments