diff --git a/compiler/src/error.rs b/compiler/src/error.rs index 62546ddb4d..60ea733967 100644 --- a/compiler/src/error.rs +++ b/compiler/src/error.rs @@ -1,5 +1,6 @@ use rustpython_parser::error::{LexicalErrorType, ParseError, ParseErrorType}; use rustpython_parser::location::Location; +use rustpython_parser::token::Tok; use std::error::Error; use std::fmt; @@ -41,6 +42,20 @@ pub enum CompileErrorType { } impl CompileError { + pub fn is_indentation_error(&self) -> bool { + if let CompileErrorType::Parse(parse) = &self.error { + match parse { + ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true, + ParseErrorType::UnrecognizedToken(token, expected) => { + *token == Tok::Indent || expected.clone() == Some("Indent".to_string()) + } + _ => false, + } + } else { + false + } + } + pub fn is_tab_error(&self) -> bool { if let CompileErrorType::Parse(parse) = &self.error { if let ParseErrorType::Lexical(lex) = parse { diff --git a/parser/src/error.rs b/parser/src/error.rs index b6df47fd97..7b6e1369a6 100644 --- a/parser/src/error.rs +++ b/parser/src/error.rs @@ -20,6 +20,7 @@ pub enum LexicalErrorType { StringError, UnicodeError, NestingError, + IndentationError, TabError, DefaultArgumentError, PositionalArgumentError, @@ -36,6 +37,9 @@ impl fmt::Display for LexicalErrorType { LexicalErrorType::FStringError(error) => write!(f, "Got error in f-string: {}", error), LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"), LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"), + LexicalErrorType::IndentationError => { + write!(f, "unindent does not match any outer indentation level") + } LexicalErrorType::TabError => { write!(f, "inconsistent use of tabs and spaces in indentation") } @@ -121,7 +125,7 @@ pub enum ParseErrorType { /// Parser encountered an invalid token InvalidToken, /// Parser encountered an unexpected token - UnrecognizedToken(Tok, Vec), + UnrecognizedToken(Tok, Option), /// Maps to `User` type from `lalrpop-util` Lexical(LexicalErrorType), } @@ -143,10 +147,19 @@ impl From> for ParseError { error: ParseErrorType::Lexical(error.error), location: error.location, }, - LalrpopError::UnrecognizedToken { token, expected } => ParseError { - error: ParseErrorType::UnrecognizedToken(token.1, expected), - location: token.0, - }, + LalrpopError::UnrecognizedToken { token, expected } => { + // Hacky, but it's how CPython does it. See PyParser_AddToken, + // in particular "Only one possible expected token" comment. + let expected = if expected.len() == 1 { + Some(expected[0].clone()) + } else { + None + }; + ParseError { + error: ParseErrorType::UnrecognizedToken(token.1, expected), + location: token.0, + } + } LalrpopError::UnrecognizedEOF { location, .. } => ParseError { error: ParseErrorType::EOF, location, @@ -167,8 +180,14 @@ impl fmt::Display for ParseErrorType { ParseErrorType::EOF => write!(f, "Got unexpected EOF"), ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {:?}", tok), ParseErrorType::InvalidToken => write!(f, "Got invalid token"), - ParseErrorType::UnrecognizedToken(ref tok, _) => { - write!(f, "Got unexpected token {}", tok) + ParseErrorType::UnrecognizedToken(ref tok, ref expected) => { + if *tok == Tok::Indent { + write!(f, "unexpected indent") + } else if expected.clone() == Some("Indent".to_string()) { + write!(f, "expected an indented block") + } else { + write!(f, "Got unexpected token {}", tok) + } } ParseErrorType::Lexical(ref error) => write!(f, "{}", error), } diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 6155faee9c..0f4584a733 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -791,11 +791,8 @@ where break; } Ordering::Greater => { - // TODO: handle wrong indentations return Err(LexicalError { - error: LexicalErrorType::OtherError( - "Non matching indentation levels!".to_string(), - ), + error: LexicalErrorType::IndentationError, location: self.get_pos(), }); } diff --git a/parser/src/python.lalrpop b/parser/src/python.lalrpop index 8e15645b2c..58ea002461 100644 --- a/parser/src/python.lalrpop +++ b/parser/src/python.lalrpop @@ -39,7 +39,7 @@ FileLine: ast::Suite = { Suite: ast::Suite = { SimpleStatement, - "\n" indent dedent => s.into_iter().flatten().collect(), + "\n" Indent Dedent => s.into_iter().flatten().collect(), }; Statement: ast::Suite = { @@ -1124,8 +1124,8 @@ extern { type Error = LexicalError; enum lexer::Tok { - indent => lexer::Tok::Indent, - dedent => lexer::Tok::Dedent, + Indent => lexer::Tok::Indent, + Dedent => lexer::Tok::Dedent, StartProgram => lexer::Tok::StartProgram, StartStatement => lexer::Tok::StartStatement, StartExpression => lexer::Tok::StartExpression, diff --git a/tests/snippets/invalid_syntax.py b/tests/snippets/invalid_syntax.py index faa3d2474a..cd903372e3 100644 --- a/tests/snippets/invalid_syntax.py +++ b/tests/snippets/invalid_syntax.py @@ -14,6 +14,32 @@ def valid_func(): else: raise AssertionError("Must throw syntax error") +src = """ +if True: +pass +""" + +with assert_raises(IndentationError): + compile(src, '', 'exec') + +src = """ +if True: + pass + pass +""" + +with assert_raises(IndentationError): + compile(src, '', 'exec') + +src = """ +if True: + pass + pass +""" + +with assert_raises(IndentationError): + compile(src, '', 'exec') + src = """ if True: pass diff --git a/vm/src/vm.rs b/vm/src/vm.rs index f4145b846f..39e241116e 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -416,7 +416,9 @@ impl VirtualMachine { #[cfg(feature = "rustpython-compiler")] pub fn new_syntax_error(&self, error: &CompileError) -> PyObjectRef { - let syntax_error_type = if error.is_tab_error() { + let syntax_error_type = if error.is_indentation_error() { + self.ctx.exceptions.indentation_error.clone() + } else if error.is_tab_error() { self.ctx.exceptions.tab_error.clone() } else { self.ctx.exceptions.syntax_error.clone()