@@ -462,6 +462,19 @@ where
462
462
}
463
463
}
464
464
465
+ fn parse_octet ( & mut self , first : char ) -> char {
466
+ let mut octet_content = String :: new ( ) ;
467
+ octet_content. push ( first) ;
468
+ while octet_content. len ( ) < 3 {
469
+ if let Some ( '0' ..='7' ) = self . chr0 {
470
+ octet_content. push ( self . next_char ( ) . unwrap ( ) )
471
+ } else {
472
+ break ;
473
+ }
474
+ }
475
+ u8:: from_str_radix ( & octet_content, 8 ) . unwrap ( ) as char
476
+ }
477
+
465
478
fn lex_string (
466
479
& mut self ,
467
480
is_bytes : bool ,
@@ -521,8 +534,9 @@ where
521
534
}
522
535
Some ( 'u' ) => string_content. push ( self . unicode_literal ( 4 ) ?) ,
523
536
Some ( 'U' ) => string_content. push ( self . unicode_literal ( 8 ) ?) ,
524
- Some ( 'x' ) if !is_bytes => string_content. push ( self . unicode_literal ( 2 ) ?) ,
537
+ Some ( 'x' ) => string_content. push ( self . unicode_literal ( 2 ) ?) ,
525
538
Some ( 'v' ) => string_content. push ( '\x0b' ) ,
539
+ Some ( o @ '0' ..='7' ) => string_content. push ( self . parse_octet ( o) ) ,
526
540
Some ( c) => {
527
541
string_content. push ( '\\' ) ;
528
542
string_content. push ( c) ;
@@ -552,7 +566,7 @@ where
552
566
break ;
553
567
}
554
568
} else {
555
- if c == '\n' && !triple_quoted {
569
+ if ( c == '\n' && !triple_quoted) || ( is_bytes && !c . is_ascii ( ) ) {
556
570
return Err ( LexicalError {
557
571
error : LexicalErrorType :: StringError ,
558
572
location : self . get_pos ( ) ,
@@ -572,21 +586,8 @@ where
572
586
let end_pos = self . get_pos ( ) ;
573
587
574
588
let tok = if is_bytes {
575
- if string_content. is_ascii ( ) {
576
- let value = if is_raw {
577
- string_content. into_bytes ( )
578
- } else {
579
- lex_byte ( string_content) . map_err ( |error| LexicalError {
580
- error,
581
- location : self . get_pos ( ) ,
582
- } ) ?
583
- } ;
584
- Tok :: Bytes { value }
585
- } else {
586
- return Err ( LexicalError {
587
- error : LexicalErrorType :: StringError ,
588
- location : self . get_pos ( ) ,
589
- } ) ;
589
+ Tok :: Bytes {
590
+ value : string_content. chars ( ) . map ( |c| c as u8 ) . collect ( ) ,
590
591
}
591
592
} else {
592
593
Tok :: String {
@@ -1231,90 +1232,6 @@ where
1231
1232
}
1232
1233
}
1233
1234
1234
- #[ derive( Debug ) ]
1235
- enum EscapeMode {
1236
- NORMAL ,
1237
- HEX ,
1238
- OCTET ,
1239
- }
1240
-
1241
- fn lex_byte ( s : String ) -> Result < Vec < u8 > , LexicalErrorType > {
1242
- let mut res = vec ! [ ] ;
1243
- let mut escape: Option < EscapeMode > = None ;
1244
- let mut escape_buffer = String :: new ( ) ;
1245
-
1246
- let mut chars_iter = s. chars ( ) ;
1247
- let mut next_char = chars_iter. next ( ) ;
1248
-
1249
- while let Some ( c) = next_char {
1250
- match escape {
1251
- Some ( EscapeMode :: OCTET ) => {
1252
- if let '0' ..='7' = c {
1253
- escape_buffer. push ( c) ;
1254
- next_char = chars_iter. next ( ) ;
1255
- if escape_buffer. len ( ) < 3 {
1256
- continue ;
1257
- }
1258
- }
1259
- res. push ( u8:: from_str_radix ( & escape_buffer, 8 ) . unwrap ( ) ) ;
1260
- escape = None ;
1261
- escape_buffer. clear ( ) ;
1262
- }
1263
- Some ( EscapeMode :: HEX ) => {
1264
- if c. is_ascii_hexdigit ( ) {
1265
- if escape_buffer. is_empty ( ) {
1266
- escape_buffer. push ( c) ;
1267
- } else {
1268
- escape_buffer. push ( c) ;
1269
- res. push ( u8:: from_str_radix ( & escape_buffer, 16 ) . unwrap ( ) ) ;
1270
- escape = None ;
1271
- escape_buffer. clear ( ) ;
1272
- }
1273
- next_char = chars_iter. next ( ) ;
1274
- } else {
1275
- return Err ( LexicalErrorType :: StringError ) ;
1276
- }
1277
- }
1278
- Some ( EscapeMode :: NORMAL ) => {
1279
- match c {
1280
- '\\' => res. push ( b'\\' ) ,
1281
- 'x' => {
1282
- escape = Some ( EscapeMode :: HEX ) ;
1283
- next_char = chars_iter. next ( ) ;
1284
- continue ;
1285
- }
1286
- 't' => res. push ( b'\t' ) ,
1287
- 'n' => res. push ( b'\n' ) ,
1288
- 'r' => res. push ( b'\r' ) ,
1289
- '0' ..='7' => {
1290
- escape = Some ( EscapeMode :: OCTET ) ;
1291
- continue ;
1292
- }
1293
- x => {
1294
- res. push ( b'\\' ) ;
1295
- res. push ( x as u8 ) ;
1296
- }
1297
- }
1298
- escape = None ;
1299
- next_char = chars_iter. next ( ) ;
1300
- }
1301
- None => {
1302
- match c {
1303
- '\\' => escape = Some ( EscapeMode :: NORMAL ) ,
1304
- x => res. push ( x as u8 ) ,
1305
- }
1306
- next_char = chars_iter. next ( ) ;
1307
- }
1308
- }
1309
- }
1310
- match escape {
1311
- Some ( EscapeMode :: OCTET ) => res. push ( u8:: from_str_radix ( & escape_buffer, 8 ) . unwrap ( ) ) ,
1312
- Some ( EscapeMode :: HEX ) => return Err ( LexicalErrorType :: StringError ) ,
1313
- _ => ( ) ,
1314
- }
1315
- Ok ( res)
1316
- }
1317
-
1318
1235
#[ cfg( test) ]
1319
1236
mod tests {
1320
1237
use super :: { make_tokenizer, NewlineHandler , Tok } ;
@@ -1642,7 +1559,7 @@ mod tests {
1642
1559
1643
1560
#[ test]
1644
1561
fn test_string ( ) {
1645
- let source = r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\''"# ;
1562
+ let source = r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\'' '\200\0a' "# ;
1646
1563
let tokens = lex_source ( source) ;
1647
1564
assert_eq ! (
1648
1565
tokens,
@@ -1675,6 +1592,10 @@ mod tests {
1675
1592
value: String :: from( "raw\' " ) ,
1676
1593
is_fstring: false ,
1677
1594
} ,
1595
+ Tok :: String {
1596
+ value: String :: from( "\u{80} \u{0} a" ) ,
1597
+ is_fstring: false ,
1598
+ } ,
1678
1599
Tok :: Newline ,
1679
1600
]
1680
1601
) ;
0 commit comments