Skip to content

Pass more information in user defined parse error #1106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions compiler/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use rustpython_parser::error::ParseError;
use rustpython_parser::error::{ParseError, ParseErrorType};
use rustpython_parser::lexer::Location;

use std::error::Error;
Expand All @@ -13,8 +13,8 @@ pub struct CompileError {
impl From<ParseError> for CompileError {
fn from(error: ParseError) -> Self {
CompileError {
error: CompileErrorType::Parse(error),
location: Default::default(), // TODO: extract location from parse error!
error: CompileErrorType::Parse(error.error),
location: error.location,
}
}
}
Expand All @@ -28,7 +28,7 @@ pub enum CompileErrorType {
/// Expected an expression got a statement
ExpectExpr,
/// Parser error
Parse(ParseError),
Parse(ParseErrorType),
SyntaxError(String),
/// Multiple `*` detected
StarArgs,
Expand Down Expand Up @@ -56,7 +56,7 @@ impl fmt::Display for CompileError {
}?;

// Print line number:
write!(f, " at line {:?}", self.location.row())
write!(f, " at {}", self.location)
}
}

Expand Down
78 changes: 45 additions & 33 deletions parser/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,71 +3,83 @@
extern crate lalrpop_util;
use self::lalrpop_util::ParseError as InnerError;

use crate::lexer::{LexicalError, Location};
use crate::lexer::{LexicalError, LexicalErrorType, Location};
use crate::token::Tok;

use std::error::Error;
use std::fmt;

// A token of type `Tok` was observed, with a span given by the two Location values
type TokSpan = (Location, Tok, Location);

/// Represents an error during parsing
#[derive(Debug, PartialEq)]
pub enum ParseError {
pub struct ParseError {
pub error: ParseErrorType,
pub location: Location,
}

#[derive(Debug, PartialEq)]
pub enum ParseErrorType {
/// Parser encountered an unexpected end of input
EOF(Option<Location>),
EOF,
/// Parser encountered an extra token
ExtraToken(TokSpan),
ExtraToken(Tok),
/// Parser encountered an invalid token
InvalidToken(Location),
InvalidToken,
/// Parser encountered an unexpected token
UnrecognizedToken(TokSpan, Vec<String>),
UnrecognizedToken(Tok, Vec<String>),
/// Maps to `User` type from `lalrpop-util`
Other,
Lexical(LexicalErrorType),
}

/// Convert `lalrpop_util::ParseError` to our internal type
impl From<InnerError<Location, Tok, LexicalError>> for ParseError {
fn from(err: InnerError<Location, Tok, LexicalError>) -> Self {
match err {
// TODO: Are there cases where this isn't an EOF?
InnerError::InvalidToken { location } => ParseError::EOF(Some(location)),
InnerError::ExtraToken { token } => ParseError::ExtraToken(token),
// Inner field is a unit-like enum `LexicalError::StringError` with no useful info
InnerError::User { .. } => ParseError::Other,
InnerError::InvalidToken { location } => ParseError {
error: ParseErrorType::EOF,
location,
},
InnerError::ExtraToken { token } => ParseError {
error: ParseErrorType::ExtraToken(token.1),
location: token.0,
},
InnerError::User { error } => ParseError {
error: ParseErrorType::Lexical(error.error),
location: error.location,
},
InnerError::UnrecognizedToken { token, expected } => {
match token {
Some(tok) => ParseError::UnrecognizedToken(tok, expected),
Some(tok) => ParseError {
error: ParseErrorType::UnrecognizedToken(tok.1, expected),
location: tok.0,
},
// EOF was observed when it was unexpected
None => ParseError::EOF(None),
None => ParseError {
error: ParseErrorType::EOF,
location: Default::default(),
},
}
}
}
}
}

impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} at {}", self.error, self.location)
}
}

impl fmt::Display for ParseErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ParseError::EOF(ref location) => {
if let Some(l) = location {
write!(f, "Got unexpected EOF at: {:?}", l)
} else {
write!(f, "Got unexpected EOF")
}
}
ParseError::ExtraToken(ref t_span) => {
write!(f, "Got extraneous token: {:?} at: {:?}", t_span.1, t_span.0)
}
ParseError::InvalidToken(ref location) => {
write!(f, "Got invalid token at: {:?}", location)
}
ParseError::UnrecognizedToken(ref t_span, _) => {
write!(f, "Got unexpected token: {:?} at {:?}", t_span.1, t_span.0)
ParseErrorType::EOF => write!(f, "Got unexpected EOF"),
ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {:?}", tok),
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
ParseErrorType::UnrecognizedToken(ref tok, _) => {
write!(f, "Got unexpected token: {:?}", tok)
}
// This is user defined, it probably means a more useful error should have been given upstream.
ParseError::Other => write!(f, "Got unsupported token(s)"),
ParseErrorType::Lexical(ref error) => write!(f, "{}", error),
}
}
}
Expand Down
25 changes: 23 additions & 2 deletions parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use num_traits::Num;
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::collections::HashMap;
use std::fmt;
use std::str::FromStr;
use unic_emoji_char::is_emoji_presentation;
use unicode_xid::UnicodeXID;
Expand Down Expand Up @@ -59,13 +60,13 @@ pub struct Lexer<T: Iterator<Item = char>> {
keywords: HashMap<String, Tok>,
}

#[derive(Debug)]
#[derive(Debug, PartialEq)]
pub struct LexicalError {
pub error: LexicalErrorType,
pub location: Location,
}

#[derive(Debug)]
#[derive(Debug, PartialEq)]
pub enum LexicalErrorType {
StringError,
UnicodeError,
Expand All @@ -74,12 +75,32 @@ pub enum LexicalErrorType {
OtherError(String),
}

impl fmt::Display for LexicalErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
LexicalErrorType::UnrecognizedToken { tok } => {
write!(f, "Got unexpected token {}", tok)
}
LexicalErrorType::OtherError(ref msg) => write!(f, "{}", msg),
}
}
}

#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub struct Location {
row: usize,
column: usize,
}

impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "line {} column {}", self.row, self.column)
}
}

impl Location {
pub fn new(row: usize, column: usize) -> Self {
Location { row, column }
Expand Down
6 changes: 3 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ extern crate rustyline;

use clap::{App, Arg};
use rustpython_compiler::{compile, error::CompileError, error::CompileErrorType};
use rustpython_parser::error::ParseError;
use rustpython_parser::error::ParseErrorType;
use rustpython_vm::{
frame::Scope,
import,
Expand Down Expand Up @@ -181,7 +181,7 @@ fn shell_exec(vm: &VirtualMachine, source: &str, scope: Scope) -> Result<(), Com
// Don't inject syntax errors for line continuation
Err(
err @ CompileError {
error: CompileErrorType::Parse(ParseError::EOF(_)),
error: CompileErrorType::Parse(ParseErrorType::EOF),
..
},
) => Err(err),
Expand Down Expand Up @@ -258,7 +258,7 @@ fn run_shell(vm: &VirtualMachine) -> PyResult {

match shell_exec(vm, &input, vars.clone()) {
Err(CompileError {
error: CompileErrorType::Parse(ParseError::EOF(_)),
error: CompileErrorType::Parse(ParseErrorType::EOF),
..
}) => {
continuing = true;
Expand Down
31 changes: 7 additions & 24 deletions wasm/lib/src/vm_class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,30 +281,13 @@ impl WASMVirtualMachine {
let code = vm.compile(&source, &mode, "<wasm>".to_string());
let code = code.map_err(|err| {
let js_err = SyntaxError::new(&format!("Error parsing Python code: {}", err));
if let CompileErrorType::Parse(ref parse_error) = err.error {
use rustpython_parser::error::ParseError;
if let ParseError::EOF(Some(ref loc))
| ParseError::ExtraToken((ref loc, ..))
| ParseError::InvalidToken(ref loc)
| ParseError::UnrecognizedToken((ref loc, ..), _) = parse_error
{
let _ =
Reflect::set(&js_err, &"row".into(), &(loc.row() as u32).into());
let _ =
Reflect::set(&js_err, &"col".into(), &(loc.column() as u32).into());
}
if let ParseError::ExtraToken((_, _, ref loc))
| ParseError::UnrecognizedToken((_, _, ref loc), _) = parse_error
{
let _ =
Reflect::set(&js_err, &"endrow".into(), &(loc.row() as u32).into());
let _ = Reflect::set(
&js_err,
&"endcol".into(),
&(loc.column() as u32).into(),
);
}
}
let _ =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, adding the location attribute clearly was beneficial!

Reflect::set(&js_err, &"row".into(), &(err.location.row() as u32).into());
let _ = Reflect::set(
&js_err,
&"col".into(),
&(err.location.column() as u32).into(),
);
js_err
})?;
let result = vm.run_code_obj(code, scope.borrow().clone());
Expand Down