diff --git a/compiler/parser/src/lexer.rs b/compiler/parser/src/lexer.rs index 939e8a4a69..8251794586 100644 --- a/compiler/parser/src/lexer.rs +++ b/compiler/parser/src/lexer.rs @@ -241,59 +241,23 @@ where // Lexer helper functions: fn lex_identifier(&mut self) -> LexResult { - let mut name = String::new(); - let start_pos = self.get_pos(); - // Detect potential string like rb'' b'' f'' u'' r'' - let mut saw_b = false; - let mut saw_r = false; - let mut saw_u = false; - let mut saw_f = false; - loop { - // Detect r"", f"", b"" and u"" - if !(saw_b || saw_u || saw_f) && matches!(self.window[0], Some('b' | 'B')) { - saw_b = true; - } else if !(saw_b || saw_r || saw_u || saw_f) - && matches!(self.window[0], Some('u' | 'U')) - { - saw_u = true; - } else if !(saw_r || saw_u) && matches!(self.window[0], Some('r' | 'R')) { - saw_r = true; - } else if !(saw_b || saw_u || saw_f) && matches!(self.window[0], Some('f' | 'F')) { - saw_f = true; - } else { - break; + match self.window[..3] { + [Some(c), Some('"' | '\''), ..] => { + if let Ok(kind) = StringKind::try_from(c) { + return self.lex_string(kind); + } } - - // Take up char into name: - name.push(self.next_char().unwrap()); - - // Check if we have a string: - if matches!(self.window[0], Some('"' | '\'')) { - let kind = if saw_r { - if saw_b { - StringKind::RawBytes - } else if saw_f { - StringKind::RawFString - } else { - StringKind::RawString - } - } else if saw_b { - StringKind::Bytes - } else if saw_u { - StringKind::Unicode - } else if saw_f { - StringKind::FString - } else { - StringKind::String - }; - - return self - .lex_string(kind) - .map(|(_, tok, end_pos)| (start_pos, tok, end_pos)); + [Some(c1), Some(c2), Some('"' | '\'')] => { + if let Ok(kind) = StringKind::try_from([c1, c2]) { + return self.lex_string(kind); + } } - } + _ => {} + }; + let start_pos = self.get_pos(); + let mut name = String::new(); while self.is_identifier_continuation() { name.push(self.next_char().unwrap()); } @@ -495,6 +459,9 @@ where fn lex_string(&mut self, kind: StringKind) -> LexResult { let start_pos = self.get_pos(); + for _ in 0..kind.prefix_len() { + self.next_char(); + } let quote_char = self.next_char().unwrap(); let mut string_content = String::new(); diff --git a/compiler/parser/src/string_parser.rs b/compiler/parser/src/string_parser.rs index 4e94347aef..1d95cff8c6 100644 --- a/compiler/parser/src/string_parser.rs +++ b/compiler/parser/src/string_parser.rs @@ -26,7 +26,7 @@ impl<'a> StringParser<'a> { str_start: Location, str_end: Location, ) -> Self { - let offset = kind.to_string().len() + if triple_quoted { 3 } else { 1 }; + let offset = kind.prefix_len() + if triple_quoted { 3 } else { 1 }; Self { chars: source.chars().peekable(), kind, diff --git a/compiler/parser/src/token.rs b/compiler/parser/src/token.rs index ce48410eeb..c6185391db 100644 --- a/compiler/parser/src/token.rs +++ b/compiler/parser/src/token.rs @@ -240,6 +240,34 @@ pub enum StringKind { Unicode, } +impl TryFrom for StringKind { + type Error = String; + + fn try_from(ch: char) -> Result { + match ch { + 'r' | 'R' => Ok(StringKind::RawString), + 'f' | 'F' => Ok(StringKind::FString), + 'u' | 'U' => Ok(StringKind::Unicode), + 'b' | 'B' => Ok(StringKind::Bytes), + c => Err(format!("Unexpected string prefix: {c}")), + } + } +} + +impl TryFrom<[char; 2]> for StringKind { + type Error = String; + + fn try_from(chars: [char; 2]) -> Result { + match chars { + ['r' | 'R', 'f' | 'F'] => Ok(StringKind::RawFString), + ['f' | 'F', 'r' | 'R'] => Ok(StringKind::RawFString), + ['r' | 'R', 'b' | 'B'] => Ok(StringKind::RawBytes), + ['b' | 'B', 'r' | 'R'] => Ok(StringKind::RawBytes), + [c1, c2] => Err(format!("Unexpected string prefix: {c1}{c2}")), + } + } +} + impl fmt::Display for StringKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use StringKind::*; @@ -274,4 +302,13 @@ impl StringKind { pub fn is_unicode(&self) -> bool { matches!(self, StringKind::Unicode) } + + pub fn prefix_len(&self) -> usize { + use StringKind::*; + match self { + String => 0, + RawString | FString | Unicode | Bytes => 1, + RawFString | RawBytes => 2, + } + } }