From a06bd3ecea60b8d3dbd4a0afe48142db5b799312 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Wed, 24 Jul 2019 20:12:13 +0200 Subject: [PATCH 1/2] Add an example usecase of the parser. --- examples/parse_folder.rs | 70 ++++++++++++++++++++++++++++++++++++++++ parser/src/lexer.rs | 5 ++- 2 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 examples/parse_folder.rs diff --git a/examples/parse_folder.rs b/examples/parse_folder.rs new file mode 100644 index 0000000000..63c75a161c --- /dev/null +++ b/examples/parse_folder.rs @@ -0,0 +1,70 @@ +/// This an example usage of the rustpython_parser crate. +/// This program crawls over a directory of python files and +/// tries to parse them into an abstract syntax tree (AST) +/// +/// example usage: +/// $ RUST_LOG=info cargo run --release parse_folder /usr/lib/python3.7 + +#[macro_use] +extern crate clap; +extern crate env_logger; +#[macro_use] +extern crate log; + +use clap::{App, Arg}; + +use rustpython_parser::{ast, parser}; +use std::path::Path; + +fn main() { + env_logger::init(); + let app = App::new("RustPython") + .version(crate_version!()) + .author(crate_authors!()) + .about("Walks over all .py files in a folder, and parses them.") + .arg( + Arg::with_name("folder") + .help("Folder to scan") + .required(true), + ); + let matches = app.get_matches(); + + let folder = Path::new(matches.value_of("folder").unwrap()); + if folder.exists() && folder.is_dir() { + println!("Parsing folder of python code: {:?}", folder); + let res = parse_folder(&folder).unwrap(); + println!("Processed {:?} files", res.len()); + } else { + println!("{:?} is not a folder.", folder); + } +} + +fn parse_folder(path: &Path) -> std::io::Result> { + let mut res = vec![]; + info!("Parsing folder of python code: {:?}", path); + for entry in path.read_dir()? { + debug!("Entry: {:?}", entry); + let entry = entry?; + let metadata = entry.metadata()?; + + let path = entry.path(); + if metadata.is_dir() { + let x = parse_folder(&path)?; + res.extend(x); + } + + if metadata.is_file() && path.extension().map(|s| s.to_str().unwrap()) == Some("py") { + match parse_python_file(&path) { + Ok(x) => res.push(x), + Err(y) => error!("Erreur in file {:?} {:?}", path, y), + } + } + } + Ok(res) +} + +fn parse_python_file(filename: &Path) -> Result { + info!("Parsing file {:?}", filename); + let source = std::fs::read_to_string(filename).map_err(|e| e.to_string())?; + parser::parse_program(&source).map_err(|e| e.to_string()) +} diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 2cd1148656..5d3b89de02 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -1157,9 +1157,12 @@ where self.emit((tok_start, Tok::Newline, tok_end)); } } - ' ' => { + ' ' | '\t' => { // Skip whitespaces self.next_char(); + while self.chr0 == Some(' ') || self.chr0 == Some('\t') { + self.next_char(); + } } _ => { let c = self.next_char(); From 1fddce461f12871fb89345357b4931d55cf65840 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Thu, 25 Jul 2019 23:54:31 +0200 Subject: [PATCH 2/2] Implement review comments and improve parsing a bit. --- compiler/src/compile.rs | 60 ++++++++++++++--------- compiler/src/symboltable.rs | 17 ++----- examples/parse_folder.rs | 4 +- parser/src/ast.rs | 16 ++---- parser/src/lexer.rs | 12 ++++- parser/src/parser.rs | 2 + parser/src/python.lalrpop | 40 ++++++--------- vm/src/stdlib/ast.rs | 97 +++++++++++++++++++++---------------- 8 files changed, 129 insertions(+), 119 deletions(-) diff --git a/compiler/src/compile.rs b/compiler/src/compile.rs index b06262c60f..3fa6bdbe89 100644 --- a/compiler/src/compile.rs +++ b/compiler/src/compile.rs @@ -385,35 +385,47 @@ impl Compiler { } self.set_label(end_label); } - With { items, body } => { - let end_label = self.new_label(); - for item in items { - self.compile_expression(&item.context_expr)?; - self.emit(Instruction::SetupWith { end: end_label }); - match &item.optional_vars { - Some(var) => { - self.compile_store(var)?; - } - None => { - self.emit(Instruction::Pop); + With { + is_async, + items, + body, + } => { + if *is_async { + unimplemented!("async with"); + } else { + let end_label = self.new_label(); + for item in items { + self.compile_expression(&item.context_expr)?; + self.emit(Instruction::SetupWith { end: end_label }); + match &item.optional_vars { + Some(var) => { + self.compile_store(var)?; + } + None => { + self.emit(Instruction::Pop); + } } } - } - self.compile_statements(body)?; - for _ in 0..items.len() { - self.emit(Instruction::CleanupWith { end: end_label }); + self.compile_statements(body)?; + for _ in 0..items.len() { + self.emit(Instruction::CleanupWith { end: end_label }); + } + self.set_label(end_label); } - self.set_label(end_label); } For { + is_async, target, iter, body, orelse, - } => self.compile_for(target, iter, body, orelse)?, - AsyncFor { .. } => { - unimplemented!("async for"); + } => { + if *is_async { + unimplemented!("async for"); + } else { + self.compile_for(target, iter, body, orelse)? + } } Raise { exception, cause } => match exception { Some(value) => { @@ -439,14 +451,18 @@ impl Compiler { finalbody, } => self.compile_try_statement(body, handlers, orelse, finalbody)?, FunctionDef { + is_async, name, args, body, decorator_list, returns, - } => self.compile_function_def(name, args, body, decorator_list, returns)?, - AsyncFunctionDef { .. } => { - unimplemented!("async def"); + } => { + if *is_async { + unimplemented!("async def"); + } else { + self.compile_function_def(name, args, body, decorator_list, returns)? + } } ClassDef { name, diff --git a/compiler/src/symboltable.rs b/compiler/src/symboltable.rs index d80755d336..b12e0f92cb 100644 --- a/compiler/src/symboltable.rs +++ b/compiler/src/symboltable.rs @@ -240,13 +240,7 @@ impl SymbolTableBuilder { args, decorator_list, returns, - } - | AsyncFunctionDef { - name, - body, - args, - decorator_list, - returns, + .. } => { self.scan_expressions(decorator_list)?; self.register_name(name, SymbolRole::Assigned)?; @@ -289,12 +283,7 @@ impl SymbolTableBuilder { iter, body, orelse, - } - | AsyncFor { - target, - iter, - body, - orelse, + .. } => { self.scan_expression(target)?; self.scan_expression(iter)?; @@ -346,7 +335,7 @@ impl SymbolTableBuilder { self.scan_expression(target)?; self.scan_expression(value)?; } - With { items, body } => { + With { items, body, .. } => { for item in items { self.scan_expression(&item.context_expr)?; if let Some(expression) = &item.optional_vars { diff --git a/examples/parse_folder.rs b/examples/parse_folder.rs index 63c75a161c..9bd3d05aed 100644 --- a/examples/parse_folder.rs +++ b/examples/parse_folder.rs @@ -18,7 +18,7 @@ use std::path::Path; fn main() { env_logger::init(); - let app = App::new("RustPython") + let app = App::new("parse_folders") .version(crate_version!()) .author(crate_authors!()) .about("Walks over all .py files in a folder, and parses them.") @@ -53,7 +53,7 @@ fn parse_folder(path: &Path) -> std::io::Result> { res.extend(x); } - if metadata.is_file() && path.extension().map(|s| s.to_str().unwrap()) == Some("py") { + if metadata.is_file() && path.extension().and_then(|s| s.to_str()) == Some("py") { match parse_python_file(&path) { Ok(x) => res.push(x), Err(y) => error!("Erreur in file {:?} {:?}", path, y), diff --git a/parser/src/ast.rs b/parser/src/ast.rs index 85cd7d8b26..b063841ee3 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -94,16 +94,12 @@ pub enum StatementType { orelse: Option>, }, With { + is_async: bool, items: Vec, body: Vec, }, For { - target: Expression, - iter: Expression, - body: Vec, - orelse: Option>, - }, - AsyncFor { + is_async: bool, target: Expression, iter: Expression, body: Vec, @@ -127,13 +123,7 @@ pub enum StatementType { decorator_list: Vec, }, FunctionDef { - name: String, - args: Parameters, - body: Vec, - decorator_list: Vec, - returns: Option, - }, - AsyncFunctionDef { + is_async: bool, name: String, args: Parameters, body: Vec, diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 5d3b89de02..6c12e58682 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -694,6 +694,13 @@ where spaces = 0; tabs = 0; } + Some('\x0C') => { + // Form feed character! + // Reset indentation for the Emacs user. + self.next_char(); + spaces = 0; + tabs = 0; + } Some('\n') => { // Empty line! self.next_char(); @@ -1157,10 +1164,11 @@ where self.emit((tok_start, Tok::Newline, tok_end)); } } - ' ' | '\t' => { + ' ' | '\t' | '\x0C' => { // Skip whitespaces self.next_char(); - while self.chr0 == Some(' ') || self.chr0 == Some('\t') { + while self.chr0 == Some(' ') || self.chr0 == Some('\t') || self.chr0 == Some('\x0C') + { self.next_char(); } } diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 60d61be22c..b0e3ec5ab5 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -306,6 +306,7 @@ mod tests { ast::Statement { location: ast::Location::new(2, 2), node: ast::StatementType::FunctionDef { + is_async: false, name: String::from("__init__"), args: ast::Parameters { args: vec![ast::Parameter { @@ -329,6 +330,7 @@ mod tests { ast::Statement { location: ast::Location::new(4, 2), node: ast::StatementType::FunctionDef { + is_async: false, name: String::from("method_with_default"), args: ast::Parameters { args: vec![ diff --git a/parser/src/python.lalrpop b/parser/src/python.lalrpop index 3ea502f7c6..8eb44ff2f7 100644 --- a/parser/src/python.lalrpop +++ b/parser/src/python.lalrpop @@ -334,14 +334,11 @@ WhileStatement: ast::Statement = { ForStatement: ast::Statement = { "for" "in" ":" => { + let is_async = is_async.is_some(); let orelse = s2.map(|s| s.2); ast::Statement { location, - node: if is_async.is_some() { - ast::StatementType::AsyncFor { target, iter, body, orelse } - } else { - ast::StatementType::For { target, iter, body, orelse } - }, + node: ast::StatementType::For { is_async, target, iter, body, orelse }, } }, }; @@ -380,10 +377,11 @@ ExceptClause: ast::ExceptHandler = { }; WithStatement: ast::Statement = { - "with" > ":" => { + "with" > ":" => { + let is_async = is_async.is_some(); ast::Statement { location, - node: ast::StatementType::With { items: items, body: s }, + node: ast::StatementType::With { is_async, items, body }, } }, }; @@ -396,26 +394,18 @@ WithItem: ast::WithItem = { }; FuncDef: ast::Statement = { - "def" " Test)?> ":" => { + "def" " Test)?> ":" => { + let is_async = is_async.is_some(); ast::Statement { location, - node: if is_async.is_some() { - ast::StatementType::AsyncFunctionDef { - name: i, - args: a, - body: s, - decorator_list: d, - returns: r.map(|x| x.1), - } - } else { - ast::StatementType::FunctionDef { - name: i, - args: a, - body: s, - decorator_list: d, - returns: r.map(|x| x.1), - } - } + node: ast::StatementType::FunctionDef { + is_async, + name, + args: a, + body, + decorator_list: d, + returns: r.map(|x| x.1), + } } }, }; diff --git a/vm/src/stdlib/ast.rs b/vm/src/stdlib/ast.rs index 9687edbd52..ac41ca0fa2 100644 --- a/vm/src/stdlib/ast.rs +++ b/vm/src/stdlib/ast.rs @@ -82,31 +82,31 @@ fn statement_to_ast(vm: &VirtualMachine, statement: &ast::Statement) -> PyResult decorator_list => expressions_to_ast(vm, decorator_list)?, }), FunctionDef { + is_async, name, args, body, decorator_list, returns, - } => node!(vm, FunctionDef, { - name => vm.ctx.new_str(name.to_string()), - args => parameters_to_ast(vm, args)?, - body => statements_to_ast(vm, body)?, - decorator_list => expressions_to_ast(vm, decorator_list)?, - returns => optional_expression_to_ast(vm, returns)? - }), - AsyncFunctionDef { - name, - args, - body, - decorator_list, - returns, - } => node!(vm, AsyncFunctionDef, { - name => vm.ctx.new_str(name.to_string()), - args => parameters_to_ast(vm, args)?, - body => statements_to_ast(vm, body)?, - decorator_list => expressions_to_ast(vm, decorator_list)?, - returns => optional_expression_to_ast(vm, returns)? - }), + } => { + if *is_async { + node!(vm, AsyncFunctionDef, { + name => vm.ctx.new_str(name.to_string()), + args => parameters_to_ast(vm, args)?, + body => statements_to_ast(vm, body)?, + decorator_list => expressions_to_ast(vm, decorator_list)?, + returns => optional_expression_to_ast(vm, returns)? + }) + } else { + node!(vm, FunctionDef, { + name => vm.ctx.new_str(name.to_string()), + args => parameters_to_ast(vm, args)?, + body => statements_to_ast(vm, body)?, + decorator_list => expressions_to_ast(vm, decorator_list)?, + returns => optional_expression_to_ast(vm, returns)? + }) + } + } Continue => node!(vm, Continue), Break => node!(vm, Break), Pass => node!(vm, Pass), @@ -131,36 +131,50 @@ fn statement_to_ast(vm: &VirtualMachine, statement: &ast::Statement) -> PyResult orelse => optional_statements_to_ast(vm, orelse)? }), For { + is_async, target, iter, body, orelse, - } => node!(vm, For, { - target => expression_to_ast(vm, target)?, - iter => expression_to_ast(vm, iter)?, - body => statements_to_ast(vm, body)?, - orelse => optional_statements_to_ast(vm, orelse)? - }), - AsyncFor { - target, - iter, - body, - orelse, - } => node!(vm, AsyncFor, { - target => expression_to_ast(vm, target)?, - iter => expression_to_ast(vm, iter)?, - body => statements_to_ast(vm, body)?, - orelse => optional_statements_to_ast(vm, orelse)? - }), + } => { + if *is_async { + node!(vm, AsyncFor, { + target => expression_to_ast(vm, target)?, + iter => expression_to_ast(vm, iter)?, + body => statements_to_ast(vm, body)?, + orelse => optional_statements_to_ast(vm, orelse)? + }) + } else { + node!(vm, For, { + target => expression_to_ast(vm, target)?, + iter => expression_to_ast(vm, iter)?, + body => statements_to_ast(vm, body)?, + orelse => optional_statements_to_ast(vm, orelse)? + }) + } + } While { test, body, orelse } => node!(vm, While, { test => expression_to_ast(vm, test)?, body => statements_to_ast(vm, body)?, orelse => optional_statements_to_ast(vm, orelse)? }), - With { items, body } => node!(vm, With, { - items => map_ast(with_item_to_ast, vm, items)?, - body => statements_to_ast(vm, body)? - }), + With { + is_async, + items, + body, + } => { + if *is_async { + node!(vm, AsyncWith, { + items => map_ast(with_item_to_ast, vm, items)?, + body => statements_to_ast(vm, body)? + }) + } else { + node!(vm, With, { + items => map_ast(with_item_to_ast, vm, items)?, + body => statements_to_ast(vm, body)? + }) + } + } Try { body, handlers, @@ -597,6 +611,7 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { "AugAssign" => py_class!(ctx, "AugAssign", ast_base.clone(), {}), "AsyncFor" => py_class!(ctx, "AsyncFor", ast_base.clone(), {}), "AsyncFunctionDef" => py_class!(ctx, "AsyncFunctionDef", ast_base.clone(), {}), + "AsyncWith" => py_class!(ctx, "AsyncWith", ast_base.clone(), {}), "Assert" => py_class!(ctx, "Assert", ast_base.clone(), {}), "Attribute" => py_class!(ctx, "Attribute", ast_base.clone(), {}), "Await" => py_class!(ctx, "Await", ast_base.clone(), {}),