Skip to content

parser error handling improvements. #1112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion compiler/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use rustpython_parser::error::{ParseError, ParseErrorType};
use rustpython_parser::lexer::Location;
use rustpython_parser::location::Location;

use std::error::Error;
use std::fmt;
Expand Down
2 changes: 1 addition & 1 deletion compiler/src/symboltable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Inspirational file: https://github.com/python/cpython/blob/master/Python/symtabl

use crate::error::{CompileError, CompileErrorType};
use rustpython_parser::ast;
use rustpython_parser::lexer::Location;
use rustpython_parser::location::Location;
use std::collections::HashMap;

pub fn make_symbol_table(program: &ast::Program) -> Result<SymbolScope, SymbolTableError> {
Expand Down
1 change: 0 additions & 1 deletion parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,4 @@ num-bigint = "0.2"
num-traits = "0.2"
unicode-xid = "0.1.0"
unic-emoji-char = "0.9.0"
serde = { version = "1.0.66", features = ["derive"] }
wtf8 = "0.0.3"
5 changes: 2 additions & 3 deletions parser/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
//!
//! Roughly equivalent to this: https://docs.python.org/3/library/ast.html

pub use super::lexer::Location;
pub use crate::location::Location;
use num_bigint::BigInt;
use serde::{Deserialize, Serialize};

/*
#[derive(Debug)]
Expand Down Expand Up @@ -390,7 +389,7 @@ pub enum Number {
}

/// Transforms a value prior to formatting it.
#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum ConversionFlag {
/// Converts by calling `str(<value>)`.
Str,
Expand Down
81 changes: 78 additions & 3 deletions parser/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,89 @@
//! Define internal parse error types
//! The goal is to provide a matching and a safe error API, maksing errors from LALR
extern crate lalrpop_util;
use self::lalrpop_util::ParseError as InnerError;
use lalrpop_util::ParseError as InnerError;
use lalrpop_util::ParseError as LalrpopError;

use crate::lexer::{LexicalError, LexicalErrorType, Location};
use crate::location::Location;
use crate::token::Tok;

use std::error::Error;
use std::fmt;

/// Represents an error during lexical scanning.
#[derive(Debug, PartialEq)]
pub struct LexicalError {
pub error: LexicalErrorType,
pub location: Location,
}

#[derive(Debug, PartialEq)]
pub enum LexicalErrorType {
StringError,
UnicodeError,
NestingError,
UnrecognizedToken { tok: char },
FStringError(FStringErrorType),
OtherError(String),
}

impl fmt::Display for LexicalErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
LexicalErrorType::FStringError(error) => write!(f, "Got error in f-string: {}", error),
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
LexicalErrorType::UnrecognizedToken { tok } => {
write!(f, "Got unexpected token {}", tok)
}
LexicalErrorType::OtherError(msg) => write!(f, "{}", msg),
}
}
}

// TODO: consolidate these with ParseError
#[derive(Debug, PartialEq)]
pub struct FStringError {
pub error: FStringErrorType,
pub location: Location,
}

#[derive(Debug, PartialEq)]
pub enum FStringErrorType {
UnclosedLbrace,
UnopenedRbrace,
InvalidExpression(Box<ParseErrorType>),
InvalidConversionFlag,
EmptyExpression,
MismatchedDelimiter,
}

impl fmt::Display for FStringErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
FStringErrorType::UnclosedLbrace => write!(f, "Unclosed '('"),
FStringErrorType::UnopenedRbrace => write!(f, "Unopened ')'"),
FStringErrorType::InvalidExpression(error) => {
write!(f, "Invalid expression: {}", error)
}
FStringErrorType::InvalidConversionFlag => write!(f, "Invalid conversion flag"),
FStringErrorType::EmptyExpression => write!(f, "Empty expression"),
FStringErrorType::MismatchedDelimiter => write!(f, "Mismatched delimiter"),
}
}
}

impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
fn from(err: FStringError) -> Self {
lalrpop_util::ParseError::User {
error: LexicalError {
error: LexicalErrorType::FStringError(err.error),
location: err.location,
},
}
}
}

/// Represents an error during parsing
#[derive(Debug, PartialEq)]
pub struct ParseError {
Expand Down
52 changes: 21 additions & 31 deletions parser/src/fstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,14 @@ use std::iter;
use std::mem;
use std::str;

use lalrpop_util::ParseError as LalrpopError;

use crate::ast::{ConversionFlag, StringGroup};
use crate::lexer::{LexicalError, LexicalErrorType, Location, Tok};
use crate::error::{FStringError, FStringErrorType};
use crate::location::Location;
use crate::parser::parse_expression;

use self::FStringError::*;
use self::FStringErrorType::*;
use self::StringGroup::*;

// TODO: consolidate these with ParseError
#[derive(Debug, PartialEq)]
pub enum FStringError {
UnclosedLbrace,
UnopenedRbrace,
InvalidExpression,
InvalidConversionFlag,
EmptyExpression,
MismatchedDelimiter,
}

impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
fn from(_err: FStringError) -> Self {
lalrpop_util::ParseError::User {
error: LexicalError {
error: LexicalErrorType::StringError,
location: Default::default(),
},
}
}
}

struct FStringParser<'a> {
chars: iter::Peekable<str::Chars<'a>>,
}
Expand All @@ -44,7 +21,7 @@ impl<'a> FStringParser<'a> {
}
}

fn parse_formatted_value(&mut self) -> Result<StringGroup, FStringError> {
fn parse_formatted_value(&mut self) -> Result<StringGroup, FStringErrorType> {
let mut expression = String::new();
let mut spec = String::new();
let mut delims = Vec::new();
Expand Down Expand Up @@ -103,7 +80,8 @@ impl<'a> FStringParser<'a> {
}
return Ok(FormattedValue {
value: Box::new(
parse_expression(expression.trim()).map_err(|_| InvalidExpression)?,
parse_expression(expression.trim())
.map_err(|e| InvalidExpression(Box::new(e.error)))?,
),
conversion,
spec,
Expand All @@ -127,7 +105,7 @@ impl<'a> FStringParser<'a> {
Err(UnclosedLbrace)
}

fn parse(mut self) -> Result<StringGroup, FStringError> {
fn parse(mut self) -> Result<StringGroup, FStringErrorType> {
let mut content = String::new();
let mut values = vec![];

Expand Down Expand Up @@ -175,10 +153,20 @@ impl<'a> FStringParser<'a> {
}
}

pub fn parse_fstring(source: &str) -> Result<StringGroup, FStringError> {
/// Parse an f-string into a string group.
fn parse_fstring(source: &str) -> Result<StringGroup, FStringErrorType> {
FStringParser::new(source).parse()
}

/// Parse an fstring from a string, located at a certain position in the sourcecode.
/// In case of errors, we will get the location and the error returned.
pub fn parse_located_fstring(
source: &str,
location: Location,
) -> Result<StringGroup, FStringError> {
parse_fstring(source).map_err(|error| FStringError { error, location })
}

#[cfg(test)]
mod tests {
use crate::ast;
Expand Down Expand Up @@ -232,6 +220,8 @@ mod tests {
fn test_parse_invalid_fstring() {
assert_eq!(parse_fstring("{"), Err(UnclosedLbrace));
assert_eq!(parse_fstring("}"), Err(UnopenedRbrace));
assert_eq!(parse_fstring("{class}"), Err(InvalidExpression));

// TODO: check for InvalidExpression enum?
assert!(parse_fstring("{class}").is_err());
}
}
79 changes: 11 additions & 68 deletions parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ extern crate unic_emoji_char;
extern crate unicode_xid;

pub use super::token::Tok;
use crate::error::{LexicalError, LexicalErrorType};
use crate::location::Location;
use num_bigint::BigInt;
use num_traits::Num;
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::collections::HashMap;
use std::fmt;
use std::str::FromStr;
use unic_emoji_char::is_emoji_presentation;
use unicode_xid::UnicodeXID;
Expand Down Expand Up @@ -60,61 +60,6 @@ pub struct Lexer<T: Iterator<Item = char>> {
keywords: HashMap<String, Tok>,
}

#[derive(Debug, PartialEq)]
pub struct LexicalError {
pub error: LexicalErrorType,
pub location: Location,
}

#[derive(Debug, PartialEq)]
pub enum LexicalErrorType {
StringError,
UnicodeError,
NestingError,
UnrecognizedToken { tok: char },
OtherError(String),
}

impl fmt::Display for LexicalErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
LexicalErrorType::UnrecognizedToken { tok } => {
write!(f, "Got unexpected token {}", tok)
}
LexicalErrorType::OtherError(ref msg) => write!(f, "{}", msg),
}
}
}

#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub struct Location {
row: usize,
column: usize,
}

impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "line {} column {}", self.row, self.column)
}
}

impl Location {
pub fn new(row: usize, column: usize) -> Self {
Location { row, column }
}

pub fn row(&self) -> usize {
self.row
}

pub fn column(&self) -> usize {
self.column
}
}

pub fn get_keywords() -> HashMap<String, Tok> {
let mut keywords: HashMap<String, Tok> = HashMap::new();

Expand Down Expand Up @@ -299,8 +244,7 @@ where
lxr.next_char();
lxr.next_char();
// Start at top row (=1) left column (=1)
lxr.location.row = 1;
lxr.location.column = 1;
lxr.location.reset();
lxr
}

Expand Down Expand Up @@ -615,7 +559,10 @@ where
let tok = if is_bytes {
if string_content.is_ascii() {
Tok::Bytes {
value: lex_byte(string_content)?,
value: lex_byte(string_content).map_err(|error| LexicalError {
error,
location: self.get_pos(),
})?,
}
} else {
return Err(LexicalError {
Expand Down Expand Up @@ -684,7 +631,7 @@ where
let nxt = self.chars.next();
self.chr0 = self.chr1;
self.chr1 = nxt;
self.location.column += 1;
self.location.go_right();
c
}

Expand All @@ -693,8 +640,7 @@ where
}

fn new_line(&mut self) {
self.location.row += 1;
self.location.column = 1;
self.location.newline();
}

/// Given we are at the start of a line, count the number of spaces and/or tabs until the first character.
Expand Down Expand Up @@ -1252,7 +1198,7 @@ where
}
}

fn lex_byte(s: String) -> Result<Vec<u8>, LexicalError> {
fn lex_byte(s: String) -> Result<Vec<u8>, LexicalErrorType> {
let mut res = vec![];
let mut escape = false; //flag if previous was \
let mut hex_on = false; // hex mode on or off
Expand All @@ -1271,10 +1217,7 @@ fn lex_byte(s: String) -> Result<Vec<u8>, LexicalError> {
hex_value.clear();
}
} else {
return Err(LexicalError {
error: LexicalErrorType::StringError,
location: Default::default(),
});
return Err(LexicalErrorType::StringError);
}
} else {
match (c, escape) {
Expand Down
1 change: 1 addition & 0 deletions parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub mod ast;
pub mod error;
mod fstring;
pub mod lexer;
pub mod location;
pub mod parser;
lalrpop_mod!(
#[allow(clippy::all)]
Expand Down
Loading