Skip to content

Commit eb2d0b0

Browse files
committed
refactor lex byte
1 parent 9c57ae4 commit eb2d0b0

File tree

1 file changed

+49
-79
lines changed

1 file changed

+49
-79
lines changed

parser/src/lexer.rs

Lines changed: 49 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ where
542542
let tok = if is_bytes {
543543
if string_content.is_ascii() {
544544
Tok::Bytes {
545-
value: self.lex_byte(string_content)?,
545+
value: lex_byte(string_content)?,
546546
}
547547
} else {
548548
return Err(LexicalError::StringError);
@@ -1105,84 +1105,6 @@ where
11051105
let tok_end = self.get_pos();
11061106
Ok((tok_start, ty, tok_end))
11071107
}
1108-
1109-
fn lex_byte(&self, s: String) -> Result<Vec<u8>, LexicalError> {
1110-
let mut res = vec![];
1111-
let mut escape = false; //flag if previous was \
1112-
let mut hex_on = false; // hex mode on or off
1113-
let mut hex_value = String::new();
1114-
1115-
for c in s.chars() {
1116-
match c {
1117-
'\\' => {
1118-
if escape {
1119-
res.push(92);
1120-
escape = false;
1121-
} else {
1122-
escape = true;
1123-
}
1124-
}
1125-
1126-
'x' => {
1127-
if escape {
1128-
hex_on = true;
1129-
} else {
1130-
res.push(120);
1131-
}
1132-
escape = false;
1133-
}
1134-
't' => {
1135-
if escape {
1136-
res.push(9);
1137-
} else {
1138-
res.push(116);
1139-
}
1140-
escape = false;
1141-
}
1142-
'n' => {
1143-
if escape {
1144-
res.push(10);
1145-
} else {
1146-
res.push(110)
1147-
}
1148-
escape = false;
1149-
}
1150-
'r' => {
1151-
if escape {
1152-
res.push(13);
1153-
} else {
1154-
res.push(114)
1155-
}
1156-
escape = false;
1157-
}
1158-
x => {
1159-
if hex_on {
1160-
if x.is_ascii_hexdigit() {
1161-
if hex_value.is_empty() {
1162-
hex_value.push(x);
1163-
continue;
1164-
} else {
1165-
hex_value.push(x);
1166-
res.push(u8::from_str_radix(&hex_value, 16).unwrap());
1167-
hex_on = false;
1168-
hex_value.clear();
1169-
}
1170-
} else {
1171-
return Err(LexicalError::StringError);
1172-
}
1173-
} else {
1174-
if escape {
1175-
res.push(92);
1176-
}
1177-
res.push(x as u8);
1178-
}
1179-
escape = false;
1180-
}
1181-
}
1182-
}
1183-
1184-
Ok(res)
1185-
}
11861108
}
11871109

11881110
/* Implement iterator pattern for the get_tok function.
@@ -1211,6 +1133,54 @@ where
12111133
}
12121134
}
12131135

1136+
fn lex_byte(s: String) -> Result<Vec<u8>, LexicalError> {
1137+
let mut res = vec![];
1138+
let mut escape = false; //flag if previous was \
1139+
let mut hex_on = false; // hex mode on or off
1140+
let mut hex_value = String::new();
1141+
1142+
for c in s.chars() {
1143+
if hex_on {
1144+
if c.is_ascii_hexdigit() {
1145+
if hex_value.is_empty() {
1146+
hex_value.push(c);
1147+
continue;
1148+
} else {
1149+
hex_value.push(c);
1150+
res.push(u8::from_str_radix(&hex_value, 16).unwrap());
1151+
hex_on = false;
1152+
hex_value.clear();
1153+
}
1154+
} else {
1155+
return Err(LexicalError::StringError);
1156+
}
1157+
} else {
1158+
match (c, escape) {
1159+
('\\', true) => res.push(b'\\'),
1160+
('\\', false) => {
1161+
escape = true;
1162+
continue;
1163+
}
1164+
('x', true) => hex_on = true,
1165+
('x', false) => res.push(b'x'),
1166+
('t', true) => res.push(b'\t'),
1167+
('t', false) => res.push(b't'),
1168+
('n', true) => res.push(b'\n'),
1169+
('n', false) => res.push(b'n'),
1170+
('r', true) => res.push(b'\r'),
1171+
('r', false) => res.push(b'r'),
1172+
(x, true) => {
1173+
res.push(b'\\');
1174+
res.push(x as u8);
1175+
}
1176+
(x, false) => res.push(x as u8),
1177+
}
1178+
escape = false;
1179+
}
1180+
}
1181+
Ok(res)
1182+
}
1183+
12141184
#[cfg(test)]
12151185
mod tests {
12161186
use super::{make_tokenizer, NewlineHandler, Tok};

0 commit comments

Comments
 (0)