diff --git a/Cargo.lock b/Cargo.lock index c01dc468ff..2e126f01af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1007,7 +1007,6 @@ dependencies = [ "num-bigint 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)", "unic-emoji-char 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "wtf8 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/compiler/src/error.rs b/compiler/src/error.rs index b0e631b55c..679a914bf7 100644 --- a/compiler/src/error.rs +++ b/compiler/src/error.rs @@ -1,5 +1,5 @@ use rustpython_parser::error::{ParseError, ParseErrorType}; -use rustpython_parser::lexer::Location; +use rustpython_parser::location::Location; use std::error::Error; use std::fmt; diff --git a/compiler/src/symboltable.rs b/compiler/src/symboltable.rs index 04e5d9ede5..961f028231 100644 --- a/compiler/src/symboltable.rs +++ b/compiler/src/symboltable.rs @@ -9,7 +9,7 @@ Inspirational file: https://github.com/python/cpython/blob/master/Python/symtabl use crate::error::{CompileError, CompileErrorType}; use rustpython_parser::ast; -use rustpython_parser::lexer::Location; +use rustpython_parser::location::Location; use std::collections::HashMap; pub fn make_symbol_table(program: &ast::Program) -> Result { diff --git a/parser/Cargo.toml b/parser/Cargo.toml index af0b19d39f..7e5e3d149b 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -16,5 +16,4 @@ num-bigint = "0.2" num-traits = "0.2" unicode-xid = "0.1.0" unic-emoji-char = "0.9.0" -serde = { version = "1.0.66", features = ["derive"] } wtf8 = "0.0.3" diff --git a/parser/src/ast.rs b/parser/src/ast.rs index 0a32fe3bfa..cccc17180d 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -2,9 +2,8 @@ //! //! Roughly equivalent to this: https://docs.python.org/3/library/ast.html -pub use super::lexer::Location; +pub use crate::location::Location; use num_bigint::BigInt; -use serde::{Deserialize, Serialize}; /* #[derive(Debug)] @@ -390,7 +389,7 @@ pub enum Number { } /// Transforms a value prior to formatting it. -#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Copy, Clone, Debug, PartialEq)] pub enum ConversionFlag { /// Converts by calling `str()`. Str, diff --git a/parser/src/error.rs b/parser/src/error.rs index 359ee74a4a..129692d2d6 100644 --- a/parser/src/error.rs +++ b/parser/src/error.rs @@ -1,14 +1,89 @@ //! Define internal parse error types //! The goal is to provide a matching and a safe error API, maksing errors from LALR -extern crate lalrpop_util; -use self::lalrpop_util::ParseError as InnerError; +use lalrpop_util::ParseError as InnerError; +use lalrpop_util::ParseError as LalrpopError; -use crate::lexer::{LexicalError, LexicalErrorType, Location}; +use crate::location::Location; use crate::token::Tok; use std::error::Error; use std::fmt; +/// Represents an error during lexical scanning. +#[derive(Debug, PartialEq)] +pub struct LexicalError { + pub error: LexicalErrorType, + pub location: Location, +} + +#[derive(Debug, PartialEq)] +pub enum LexicalErrorType { + StringError, + UnicodeError, + NestingError, + UnrecognizedToken { tok: char }, + FStringError(FStringErrorType), + OtherError(String), +} + +impl fmt::Display for LexicalErrorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + LexicalErrorType::StringError => write!(f, "Got unexpected string"), + LexicalErrorType::FStringError(error) => write!(f, "Got error in f-string: {}", error), + LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"), + LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"), + LexicalErrorType::UnrecognizedToken { tok } => { + write!(f, "Got unexpected token {}", tok) + } + LexicalErrorType::OtherError(msg) => write!(f, "{}", msg), + } + } +} + +// TODO: consolidate these with ParseError +#[derive(Debug, PartialEq)] +pub struct FStringError { + pub error: FStringErrorType, + pub location: Location, +} + +#[derive(Debug, PartialEq)] +pub enum FStringErrorType { + UnclosedLbrace, + UnopenedRbrace, + InvalidExpression(Box), + InvalidConversionFlag, + EmptyExpression, + MismatchedDelimiter, +} + +impl fmt::Display for FStringErrorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FStringErrorType::UnclosedLbrace => write!(f, "Unclosed '('"), + FStringErrorType::UnopenedRbrace => write!(f, "Unopened ')'"), + FStringErrorType::InvalidExpression(error) => { + write!(f, "Invalid expression: {}", error) + } + FStringErrorType::InvalidConversionFlag => write!(f, "Invalid conversion flag"), + FStringErrorType::EmptyExpression => write!(f, "Empty expression"), + FStringErrorType::MismatchedDelimiter => write!(f, "Mismatched delimiter"), + } + } +} + +impl From for LalrpopError { + fn from(err: FStringError) -> Self { + lalrpop_util::ParseError::User { + error: LexicalError { + error: LexicalErrorType::FStringError(err.error), + location: err.location, + }, + } + } +} + /// Represents an error during parsing #[derive(Debug, PartialEq)] pub struct ParseError { diff --git a/parser/src/fstring.rs b/parser/src/fstring.rs index e1e758907e..6b90a9fcdb 100644 --- a/parser/src/fstring.rs +++ b/parser/src/fstring.rs @@ -2,37 +2,14 @@ use std::iter; use std::mem; use std::str; -use lalrpop_util::ParseError as LalrpopError; - use crate::ast::{ConversionFlag, StringGroup}; -use crate::lexer::{LexicalError, LexicalErrorType, Location, Tok}; +use crate::error::{FStringError, FStringErrorType}; +use crate::location::Location; use crate::parser::parse_expression; -use self::FStringError::*; +use self::FStringErrorType::*; use self::StringGroup::*; -// TODO: consolidate these with ParseError -#[derive(Debug, PartialEq)] -pub enum FStringError { - UnclosedLbrace, - UnopenedRbrace, - InvalidExpression, - InvalidConversionFlag, - EmptyExpression, - MismatchedDelimiter, -} - -impl From for LalrpopError { - fn from(_err: FStringError) -> Self { - lalrpop_util::ParseError::User { - error: LexicalError { - error: LexicalErrorType::StringError, - location: Default::default(), - }, - } - } -} - struct FStringParser<'a> { chars: iter::Peekable>, } @@ -44,7 +21,7 @@ impl<'a> FStringParser<'a> { } } - fn parse_formatted_value(&mut self) -> Result { + fn parse_formatted_value(&mut self) -> Result { let mut expression = String::new(); let mut spec = String::new(); let mut delims = Vec::new(); @@ -103,7 +80,8 @@ impl<'a> FStringParser<'a> { } return Ok(FormattedValue { value: Box::new( - parse_expression(expression.trim()).map_err(|_| InvalidExpression)?, + parse_expression(expression.trim()) + .map_err(|e| InvalidExpression(Box::new(e.error)))?, ), conversion, spec, @@ -127,7 +105,7 @@ impl<'a> FStringParser<'a> { Err(UnclosedLbrace) } - fn parse(mut self) -> Result { + fn parse(mut self) -> Result { let mut content = String::new(); let mut values = vec![]; @@ -175,10 +153,20 @@ impl<'a> FStringParser<'a> { } } -pub fn parse_fstring(source: &str) -> Result { +/// Parse an f-string into a string group. +fn parse_fstring(source: &str) -> Result { FStringParser::new(source).parse() } +/// Parse an fstring from a string, located at a certain position in the sourcecode. +/// In case of errors, we will get the location and the error returned. +pub fn parse_located_fstring( + source: &str, + location: Location, +) -> Result { + parse_fstring(source).map_err(|error| FStringError { error, location }) +} + #[cfg(test)] mod tests { use crate::ast; @@ -232,6 +220,8 @@ mod tests { fn test_parse_invalid_fstring() { assert_eq!(parse_fstring("{"), Err(UnclosedLbrace)); assert_eq!(parse_fstring("}"), Err(UnopenedRbrace)); - assert_eq!(parse_fstring("{class}"), Err(InvalidExpression)); + + // TODO: check for InvalidExpression enum? + assert!(parse_fstring("{class}").is_err()); } } diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index e2ac5d2f88..ef66d12d99 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -5,12 +5,12 @@ extern crate unic_emoji_char; extern crate unicode_xid; pub use super::token::Tok; +use crate::error::{LexicalError, LexicalErrorType}; +use crate::location::Location; use num_bigint::BigInt; use num_traits::Num; -use serde::{Deserialize, Serialize}; use std::cmp::Ordering; use std::collections::HashMap; -use std::fmt; use std::str::FromStr; use unic_emoji_char::is_emoji_presentation; use unicode_xid::UnicodeXID; @@ -60,61 +60,6 @@ pub struct Lexer> { keywords: HashMap, } -#[derive(Debug, PartialEq)] -pub struct LexicalError { - pub error: LexicalErrorType, - pub location: Location, -} - -#[derive(Debug, PartialEq)] -pub enum LexicalErrorType { - StringError, - UnicodeError, - NestingError, - UnrecognizedToken { tok: char }, - OtherError(String), -} - -impl fmt::Display for LexicalErrorType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - LexicalErrorType::StringError => write!(f, "Got unexpected string"), - LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"), - LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"), - LexicalErrorType::UnrecognizedToken { tok } => { - write!(f, "Got unexpected token {}", tok) - } - LexicalErrorType::OtherError(ref msg) => write!(f, "{}", msg), - } - } -} - -#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] -pub struct Location { - row: usize, - column: usize, -} - -impl fmt::Display for Location { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "line {} column {}", self.row, self.column) - } -} - -impl Location { - pub fn new(row: usize, column: usize) -> Self { - Location { row, column } - } - - pub fn row(&self) -> usize { - self.row - } - - pub fn column(&self) -> usize { - self.column - } -} - pub fn get_keywords() -> HashMap { let mut keywords: HashMap = HashMap::new(); @@ -299,8 +244,7 @@ where lxr.next_char(); lxr.next_char(); // Start at top row (=1) left column (=1) - lxr.location.row = 1; - lxr.location.column = 1; + lxr.location.reset(); lxr } @@ -615,7 +559,10 @@ where let tok = if is_bytes { if string_content.is_ascii() { Tok::Bytes { - value: lex_byte(string_content)?, + value: lex_byte(string_content).map_err(|error| LexicalError { + error, + location: self.get_pos(), + })?, } } else { return Err(LexicalError { @@ -684,7 +631,7 @@ where let nxt = self.chars.next(); self.chr0 = self.chr1; self.chr1 = nxt; - self.location.column += 1; + self.location.go_right(); c } @@ -693,8 +640,7 @@ where } fn new_line(&mut self) { - self.location.row += 1; - self.location.column = 1; + self.location.newline(); } /// Given we are at the start of a line, count the number of spaces and/or tabs until the first character. @@ -1252,7 +1198,7 @@ where } } -fn lex_byte(s: String) -> Result, LexicalError> { +fn lex_byte(s: String) -> Result, LexicalErrorType> { let mut res = vec![]; let mut escape = false; //flag if previous was \ let mut hex_on = false; // hex mode on or off @@ -1271,10 +1217,7 @@ fn lex_byte(s: String) -> Result, LexicalError> { hex_value.clear(); } } else { - return Err(LexicalError { - error: LexicalErrorType::StringError, - location: Default::default(), - }); + return Err(LexicalErrorType::StringError); } } else { match (c, escape) { diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 5cc8ca3c44..85b15c2fb2 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -6,6 +6,7 @@ pub mod ast; pub mod error; mod fstring; pub mod lexer; +pub mod location; pub mod parser; lalrpop_mod!( #[allow(clippy::all)] diff --git a/parser/src/location.rs b/parser/src/location.rs new file mode 100644 index 0000000000..c7ecb40aee --- /dev/null +++ b/parser/src/location.rs @@ -0,0 +1,41 @@ +use std::fmt; + +#[derive(Clone, Debug, Default, PartialEq)] +pub struct Location { + row: usize, + column: usize, +} + +impl fmt::Display for Location { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "line {} column {}", self.row, self.column) + } +} + +impl Location { + pub fn new(row: usize, column: usize) -> Self { + Location { row, column } + } + + pub fn row(&self) -> usize { + self.row + } + + pub fn column(&self) -> usize { + self.column + } + + pub fn reset(&mut self) { + self.row = 1; + self.column = 1; + } + + pub fn go_right(&mut self) { + self.column += 1; + } + + pub fn newline(&mut self) { + self.row += 1; + self.column = 1; + } +} diff --git a/parser/src/python.lalrpop b/parser/src/python.lalrpop index 60a81e0203..3cba641f0e 100644 --- a/parser/src/python.lalrpop +++ b/parser/src/python.lalrpop @@ -6,8 +6,10 @@ use std::iter::FromIterator; use crate::ast; -use crate::fstring::parse_fstring; +use crate::fstring::parse_located_fstring; +use crate::error::LexicalError; use crate::lexer; +use crate::location; use num_bigint::BigInt; @@ -982,11 +984,11 @@ Number: ast::Number = { }; StringGroup: ast::StringGroup = { - =>? { + =>? { let mut values = vec![]; for (value, is_fstring) in s { values.push(if is_fstring { - parse_fstring(&value)? + parse_located_fstring(&value, loc.clone())? } else { ast::StringGroup::Constant { value } }) @@ -1010,8 +1012,8 @@ Identifier: String = => s; // Hook external lexer: extern { - type Location = lexer::Location; - type Error = lexer::LexicalError; + type Location = location::Location; + type Error = LexicalError; enum lexer::Tok { indent => lexer::Tok::Indent,