From f02f3279b0b4e32232013e265fe45dd6b29b6281 Mon Sep 17 00:00:00 2001 From: harupy Date: Wed, 4 Jan 2023 23:57:50 +0900 Subject: [PATCH 1/3] Simplify string check --- compiler/parser/src/lexer.rs | 65 +++++++++--------------------------- compiler/parser/src/token.rs | 17 ++++++++++ 2 files changed, 33 insertions(+), 49 deletions(-) diff --git a/compiler/parser/src/lexer.rs b/compiler/parser/src/lexer.rs index 939e8a4a69..7ba9897e76 100644 --- a/compiler/parser/src/lexer.rs +++ b/compiler/parser/src/lexer.rs @@ -241,59 +241,23 @@ where // Lexer helper functions: fn lex_identifier(&mut self) -> LexResult { - let mut name = String::new(); - let start_pos = self.get_pos(); - // Detect potential string like rb'' b'' f'' u'' r'' - let mut saw_b = false; - let mut saw_r = false; - let mut saw_u = false; - let mut saw_f = false; - loop { - // Detect r"", f"", b"" and u"" - if !(saw_b || saw_u || saw_f) && matches!(self.window[0], Some('b' | 'B')) { - saw_b = true; - } else if !(saw_b || saw_r || saw_u || saw_f) - && matches!(self.window[0], Some('u' | 'U')) - { - saw_u = true; - } else if !(saw_r || saw_u) && matches!(self.window[0], Some('r' | 'R')) { - saw_r = true; - } else if !(saw_b || saw_u || saw_f) && matches!(self.window[0], Some('f' | 'F')) { - saw_f = true; - } else { - break; + match self.window[..3] { + [Some(c), Some('"' | '\''), ..] => { + if let Ok(kind) = c.to_string().try_into() { + return self.lex_string(kind); + } } - - // Take up char into name: - name.push(self.next_char().unwrap()); - - // Check if we have a string: - if matches!(self.window[0], Some('"' | '\'')) { - let kind = if saw_r { - if saw_b { - StringKind::RawBytes - } else if saw_f { - StringKind::RawFString - } else { - StringKind::RawString - } - } else if saw_b { - StringKind::Bytes - } else if saw_u { - StringKind::Unicode - } else if saw_f { - StringKind::FString - } else { - StringKind::String - }; - - return self - .lex_string(kind) - .map(|(_, tok, end_pos)| (start_pos, tok, end_pos)); + [Some(c1), Some(c2), Some('"' | '\'')] => { + if let Ok(kind) = format!("{c1}{c2}").try_into() { + return self.lex_string(kind); + } } - } + _ => {} + }; + let start_pos = self.get_pos(); + let mut name = String::new(); while self.is_identifier_continuation() { name.push(self.next_char().unwrap()); } @@ -495,6 +459,9 @@ where fn lex_string(&mut self, kind: StringKind) -> LexResult { let start_pos = self.get_pos(); + for _ in 0..kind.to_string().len() { + self.next_char(); + } let quote_char = self.next_char().unwrap(); let mut string_content = String::new(); diff --git a/compiler/parser/src/token.rs b/compiler/parser/src/token.rs index ce48410eeb..e915429b25 100644 --- a/compiler/parser/src/token.rs +++ b/compiler/parser/src/token.rs @@ -240,6 +240,23 @@ pub enum StringKind { Unicode, } +impl TryFrom for StringKind { + type Error = String; + + fn try_from(value: String) -> Result { + match value.as_str() { + "" => Ok(StringKind::String), + "r" | "R" => Ok(StringKind::RawString), + "u" | "U" => Ok(StringKind::Unicode), + "b" | "B" => Ok(StringKind::Bytes), + "f" | "F" => Ok(StringKind::FString), + "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" => Ok(StringKind::RawFString), + "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" => Ok(StringKind::RawBytes), + s => Err(format!("Unexpected string prefix: {s}")), + } + } +} + impl fmt::Display for StringKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use StringKind::*; From d775965e3d42a665506f97139982c0a994069f69 Mon Sep 17 00:00:00 2001 From: harupy Date: Thu, 5 Jan 2023 01:18:30 +0900 Subject: [PATCH 2/3] Use try_from --- compiler/parser/src/lexer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/parser/src/lexer.rs b/compiler/parser/src/lexer.rs index 7ba9897e76..d266f64958 100644 --- a/compiler/parser/src/lexer.rs +++ b/compiler/parser/src/lexer.rs @@ -244,12 +244,12 @@ where // Detect potential string like rb'' b'' f'' u'' r'' match self.window[..3] { [Some(c), Some('"' | '\''), ..] => { - if let Ok(kind) = c.to_string().try_into() { + if let Ok(kind) = StringKind::try_from(c.to_string()) { return self.lex_string(kind); } } [Some(c1), Some(c2), Some('"' | '\'')] => { - if let Ok(kind) = format!("{c1}{c2}").try_into() { + if let Ok(kind) = StringKind::try_from(format!("{c1}{c2}")) { return self.lex_string(kind); } } From c640312051f9cd9c9cc9796fa8932b7f92af89dc Mon Sep 17 00:00:00 2001 From: harupy Date: Thu, 5 Jan 2023 18:24:54 +0900 Subject: [PATCH 3/3] Address comments --- compiler/parser/src/lexer.rs | 6 ++-- compiler/parser/src/string_parser.rs | 2 +- compiler/parser/src/token.rs | 42 ++++++++++++++++++++-------- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/compiler/parser/src/lexer.rs b/compiler/parser/src/lexer.rs index d266f64958..8251794586 100644 --- a/compiler/parser/src/lexer.rs +++ b/compiler/parser/src/lexer.rs @@ -244,12 +244,12 @@ where // Detect potential string like rb'' b'' f'' u'' r'' match self.window[..3] { [Some(c), Some('"' | '\''), ..] => { - if let Ok(kind) = StringKind::try_from(c.to_string()) { + if let Ok(kind) = StringKind::try_from(c) { return self.lex_string(kind); } } [Some(c1), Some(c2), Some('"' | '\'')] => { - if let Ok(kind) = StringKind::try_from(format!("{c1}{c2}")) { + if let Ok(kind) = StringKind::try_from([c1, c2]) { return self.lex_string(kind); } } @@ -459,7 +459,7 @@ where fn lex_string(&mut self, kind: StringKind) -> LexResult { let start_pos = self.get_pos(); - for _ in 0..kind.to_string().len() { + for _ in 0..kind.prefix_len() { self.next_char(); } let quote_char = self.next_char().unwrap(); diff --git a/compiler/parser/src/string_parser.rs b/compiler/parser/src/string_parser.rs index 4e94347aef..1d95cff8c6 100644 --- a/compiler/parser/src/string_parser.rs +++ b/compiler/parser/src/string_parser.rs @@ -26,7 +26,7 @@ impl<'a> StringParser<'a> { str_start: Location, str_end: Location, ) -> Self { - let offset = kind.to_string().len() + if triple_quoted { 3 } else { 1 }; + let offset = kind.prefix_len() + if triple_quoted { 3 } else { 1 }; Self { chars: source.chars().peekable(), kind, diff --git a/compiler/parser/src/token.rs b/compiler/parser/src/token.rs index e915429b25..c6185391db 100644 --- a/compiler/parser/src/token.rs +++ b/compiler/parser/src/token.rs @@ -240,19 +240,30 @@ pub enum StringKind { Unicode, } -impl TryFrom for StringKind { +impl TryFrom for StringKind { type Error = String; - fn try_from(value: String) -> Result { - match value.as_str() { - "" => Ok(StringKind::String), - "r" | "R" => Ok(StringKind::RawString), - "u" | "U" => Ok(StringKind::Unicode), - "b" | "B" => Ok(StringKind::Bytes), - "f" | "F" => Ok(StringKind::FString), - "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" => Ok(StringKind::RawFString), - "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" => Ok(StringKind::RawBytes), - s => Err(format!("Unexpected string prefix: {s}")), + fn try_from(ch: char) -> Result { + match ch { + 'r' | 'R' => Ok(StringKind::RawString), + 'f' | 'F' => Ok(StringKind::FString), + 'u' | 'U' => Ok(StringKind::Unicode), + 'b' | 'B' => Ok(StringKind::Bytes), + c => Err(format!("Unexpected string prefix: {c}")), + } + } +} + +impl TryFrom<[char; 2]> for StringKind { + type Error = String; + + fn try_from(chars: [char; 2]) -> Result { + match chars { + ['r' | 'R', 'f' | 'F'] => Ok(StringKind::RawFString), + ['f' | 'F', 'r' | 'R'] => Ok(StringKind::RawFString), + ['r' | 'R', 'b' | 'B'] => Ok(StringKind::RawBytes), + ['b' | 'B', 'r' | 'R'] => Ok(StringKind::RawBytes), + [c1, c2] => Err(format!("Unexpected string prefix: {c1}{c2}")), } } } @@ -291,4 +302,13 @@ impl StringKind { pub fn is_unicode(&self) -> bool { matches!(self, StringKind::Unicode) } + + pub fn prefix_len(&self) -> usize { + use StringKind::*; + match self { + String => 0, + RawString | FString | Unicode | Bytes => 1, + RawFString | RawBytes => 2, + } + } }