Skip to content

Add an example usecase of the parser. #1177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 38 additions & 22 deletions compiler/src/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,35 +385,47 @@ impl Compiler {
}
self.set_label(end_label);
}
With { items, body } => {
let end_label = self.new_label();
for item in items {
self.compile_expression(&item.context_expr)?;
self.emit(Instruction::SetupWith { end: end_label });
match &item.optional_vars {
Some(var) => {
self.compile_store(var)?;
}
None => {
self.emit(Instruction::Pop);
With {
is_async,
items,
body,
} => {
if *is_async {
unimplemented!("async with");
} else {
let end_label = self.new_label();
for item in items {
self.compile_expression(&item.context_expr)?;
self.emit(Instruction::SetupWith { end: end_label });
match &item.optional_vars {
Some(var) => {
self.compile_store(var)?;
}
None => {
self.emit(Instruction::Pop);
}
}
}
}

self.compile_statements(body)?;
for _ in 0..items.len() {
self.emit(Instruction::CleanupWith { end: end_label });
self.compile_statements(body)?;
for _ in 0..items.len() {
self.emit(Instruction::CleanupWith { end: end_label });
}
self.set_label(end_label);
}
self.set_label(end_label);
}
For {
is_async,
target,
iter,
body,
orelse,
} => self.compile_for(target, iter, body, orelse)?,
AsyncFor { .. } => {
unimplemented!("async for");
} => {
if *is_async {
unimplemented!("async for");
} else {
self.compile_for(target, iter, body, orelse)?
}
}
Raise { exception, cause } => match exception {
Some(value) => {
Expand All @@ -439,14 +451,18 @@ impl Compiler {
finalbody,
} => self.compile_try_statement(body, handlers, orelse, finalbody)?,
FunctionDef {
is_async,
name,
args,
body,
decorator_list,
returns,
} => self.compile_function_def(name, args, body, decorator_list, returns)?,
AsyncFunctionDef { .. } => {
unimplemented!("async def");
} => {
if *is_async {
unimplemented!("async def");
} else {
self.compile_function_def(name, args, body, decorator_list, returns)?
}
}
ClassDef {
name,
Expand Down
17 changes: 3 additions & 14 deletions compiler/src/symboltable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,7 @@ impl SymbolTableBuilder {
args,
decorator_list,
returns,
}
| AsyncFunctionDef {
name,
body,
args,
decorator_list,
returns,
..
} => {
self.scan_expressions(decorator_list)?;
self.register_name(name, SymbolRole::Assigned)?;
Expand Down Expand Up @@ -289,12 +283,7 @@ impl SymbolTableBuilder {
iter,
body,
orelse,
}
| AsyncFor {
target,
iter,
body,
orelse,
..
} => {
self.scan_expression(target)?;
self.scan_expression(iter)?;
Expand Down Expand Up @@ -346,7 +335,7 @@ impl SymbolTableBuilder {
self.scan_expression(target)?;
self.scan_expression(value)?;
}
With { items, body } => {
With { items, body, .. } => {
for item in items {
self.scan_expression(&item.context_expr)?;
if let Some(expression) = &item.optional_vars {
Expand Down
70 changes: 70 additions & 0 deletions examples/parse_folder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/// This an example usage of the rustpython_parser crate.
/// This program crawls over a directory of python files and
/// tries to parse them into an abstract syntax tree (AST)
///
/// example usage:
/// $ RUST_LOG=info cargo run --release parse_folder /usr/lib/python3.7

#[macro_use]
extern crate clap;
extern crate env_logger;
#[macro_use]
extern crate log;

use clap::{App, Arg};

use rustpython_parser::{ast, parser};
use std::path::Path;

fn main() {
env_logger::init();
let app = App::new("parse_folders")
.version(crate_version!())
.author(crate_authors!())
.about("Walks over all .py files in a folder, and parses them.")
.arg(
Arg::with_name("folder")
.help("Folder to scan")
.required(true),
);
let matches = app.get_matches();

let folder = Path::new(matches.value_of("folder").unwrap());
if folder.exists() && folder.is_dir() {
println!("Parsing folder of python code: {:?}", folder);
let res = parse_folder(&folder).unwrap();
println!("Processed {:?} files", res.len());
Copy link
Member

@coolreader18 coolreader18 Jul 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it print the parsed files or something?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have some ideas for metrics:

  • parsed lines per second
  • print the amount of functions / classes

For now, I just use it to try and scan my cpython lib folder

} else {
println!("{:?} is not a folder.", folder);
}
}

fn parse_folder(path: &Path) -> std::io::Result<Vec<ast::Program>> {
let mut res = vec![];
info!("Parsing folder of python code: {:?}", path);
for entry in path.read_dir()? {
debug!("Entry: {:?}", entry);
let entry = entry?;
let metadata = entry.metadata()?;

let path = entry.path();
if metadata.is_dir() {
let x = parse_folder(&path)?;
res.extend(x);
}

if metadata.is_file() && path.extension().and_then(|s| s.to_str()) == Some("py") {
match parse_python_file(&path) {
Ok(x) => res.push(x),
Err(y) => error!("Erreur in file {:?} {:?}", path, y),
}
}
}
Ok(res)
}

fn parse_python_file(filename: &Path) -> Result<ast::Program, String> {
info!("Parsing file {:?}", filename);
let source = std::fs::read_to_string(filename).map_err(|e| e.to_string())?;
parser::parse_program(&source).map_err(|e| e.to_string())
}
16 changes: 3 additions & 13 deletions parser/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,12 @@ pub enum StatementType {
orelse: Option<Vec<Statement>>,
},
With {
is_async: bool,
items: Vec<WithItem>,
body: Vec<Statement>,
},
For {
target: Expression,
iter: Expression,
body: Vec<Statement>,
orelse: Option<Vec<Statement>>,
},
AsyncFor {
is_async: bool,
target: Expression,
iter: Expression,
body: Vec<Statement>,
Expand All @@ -127,13 +123,7 @@ pub enum StatementType {
decorator_list: Vec<Expression>,
},
FunctionDef {
name: String,
args: Parameters,
body: Vec<Statement>,
decorator_list: Vec<Expression>,
returns: Option<Expression>,
},
AsyncFunctionDef {
is_async: bool,
name: String,
args: Parameters,
body: Vec<Statement>,
Expand Down
13 changes: 12 additions & 1 deletion parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,13 @@ where
spaces = 0;
tabs = 0;
}
Some('\x0C') => {
// Form feed character!
// Reset indentation for the Emacs user.
self.next_char();
spaces = 0;
tabs = 0;
}
Some('\n') => {
// Empty line!
self.next_char();
Expand Down Expand Up @@ -1157,9 +1164,13 @@ where
self.emit((tok_start, Tok::Newline, tok_end));
}
}
' ' => {
' ' | '\t' | '\x0C' => {
// Skip whitespaces
self.next_char();
while self.chr0 == Some(' ') || self.chr0 == Some('\t') || self.chr0 == Some('\x0C')
{
self.next_char();
}
}
_ => {
let c = self.next_char();
Expand Down
2 changes: 2 additions & 0 deletions parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ mod tests {
ast::Statement {
location: ast::Location::new(2, 2),
node: ast::StatementType::FunctionDef {
is_async: false,
name: String::from("__init__"),
args: ast::Parameters {
args: vec![ast::Parameter {
Expand All @@ -329,6 +330,7 @@ mod tests {
ast::Statement {
location: ast::Location::new(4, 2),
node: ast::StatementType::FunctionDef {
is_async: false,
name: String::from("method_with_default"),
args: ast::Parameters {
args: vec![
Expand Down
40 changes: 15 additions & 25 deletions parser/src/python.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -334,14 +334,11 @@ WhileStatement: ast::Statement = {

ForStatement: ast::Statement = {
<location:@L> <is_async:"async"?> "for" <target:ExpressionList> "in" <iter:TestList> ":" <body:Suite> <s2:("else" ":" Suite)?> => {
let is_async = is_async.is_some();
let orelse = s2.map(|s| s.2);
ast::Statement {
location,
node: if is_async.is_some() {
ast::StatementType::AsyncFor { target, iter, body, orelse }
} else {
ast::StatementType::For { target, iter, body, orelse }
},
node: ast::StatementType::For { is_async, target, iter, body, orelse },
}
},
};
Expand Down Expand Up @@ -380,10 +377,11 @@ ExceptClause: ast::ExceptHandler = {
};

WithStatement: ast::Statement = {
<location:@L> "with" <items:OneOrMore<WithItem>> ":" <s:Suite> => {
<location:@L> <is_async:"async"?> "with" <items:OneOrMore<WithItem>> ":" <body:Suite> => {
let is_async = is_async.is_some();
ast::Statement {
location,
node: ast::StatementType::With { items: items, body: s },
node: ast::StatementType::With { is_async, items, body },
}
},
};
Expand All @@ -396,26 +394,18 @@ WithItem: ast::WithItem = {
};

FuncDef: ast::Statement = {
<d:Decorator*> <location:@L> <is_async:"async"?> "def" <i:Identifier> <a:Parameters> <r:("->" Test)?> ":" <s:Suite> => {
<d:Decorator*> <location:@L> <is_async:"async"?> "def" <name:Identifier> <a:Parameters> <r:("->" Test)?> ":" <body:Suite> => {
let is_async = is_async.is_some();
ast::Statement {
location,
node: if is_async.is_some() {
ast::StatementType::AsyncFunctionDef {
name: i,
args: a,
body: s,
decorator_list: d,
returns: r.map(|x| x.1),
}
} else {
ast::StatementType::FunctionDef {
name: i,
args: a,
body: s,
decorator_list: d,
returns: r.map(|x| x.1),
}
}
node: ast::StatementType::FunctionDef {
is_async,
name,
args: a,
body,
decorator_list: d,
returns: r.map(|x| x.1),
}
}
},
};
Expand Down
Loading