From c1a5e31c3ca0213514a602238c0daf202cc47590 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Tue, 5 Mar 2019 20:24:48 +0100 Subject: [PATCH 1/2] Extend re module. --- tests/snippets/test_re.py | 6 +- vm/src/stdlib/re.rs | 206 ++++++++++++++++++++++++++++++++------ 2 files changed, 183 insertions(+), 29 deletions(-) diff --git a/tests/snippets/test_re.py b/tests/snippets/test_re.py index 24a2ee3b4a..d9914b38d5 100644 --- a/tests/snippets/test_re.py +++ b/tests/snippets/test_re.py @@ -4,5 +4,9 @@ haystack = "Hello world" needle = 'ello' -print(re.search(needle, haystack)) +mo = re.search(needle, haystack) +print(mo) +assert isinstance(mo, re.Match) +assert mo.start() == 1 +assert mo.end() == 5 diff --git a/vm/src/stdlib/re.rs b/vm/src/stdlib/re.rs index b262ff76ae..b257bd8183 100644 --- a/vm/src/stdlib/re.rs +++ b/vm/src/stdlib/re.rs @@ -5,19 +5,59 @@ * system. */ -extern crate regex; -use self::regex::Regex; +// extern crate regex; +use crate::import; +use regex::{Match, Regex}; +use std::path::PathBuf; use crate::obj::objstr; -use crate::pyobject::{PyContext, PyFuncArgs, PyObjectRef, PyResult, TypeProtocol}; +use crate::pyobject::{ + PyContext, PyFuncArgs, PyObject, PyObjectPayload, PyObjectRef, PyResult, TypeProtocol, +}; use crate::VirtualMachine; +/// Create the python `re` module with all its members. +pub fn mk_module(ctx: &PyContext) -> PyObjectRef { + let match_type = py_class!(ctx, "Match", ctx.object(), { + "start" => ctx.new_rustfunc(match_start), + "end" => ctx.new_rustfunc(match_end) + }); + + let pattern_type = py_class!(ctx, "Pattern", ctx.object(), { + "match" => ctx.new_rustfunc(pattern_match), + "search" => ctx.new_rustfunc(pattern_search) + }); + + py_module!(ctx, "re", { + "compile" => ctx.new_rustfunc(re_compile), + "Match" => match_type, + "match" => ctx.new_rustfunc(re_match), + "Pattern" => pattern_type, + "search" => ctx.new_rustfunc(re_search) + }) +} + +/// Implement re.match +/// See also: +/// https://docs.python.org/3/library/re.html#re.match fn re_match(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult { - // TODO: - error!("TODO: implement match"); - re_search(vm, args) + arg_check!( + vm, + args, + required = [ + (pattern, Some(vm.ctx.str_type())), + (string, Some(vm.ctx.str_type())) + ] + ); + let regex = make_regex(vm, pattern)?; + let search_text = objstr::get_value(string); + + do_match(vm, ®ex, search_text) } +/// Implement re.search +/// See also: +/// https://docs.python.org/3/library/re.html#re.search fn re_search(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult { arg_check!( vm, @@ -28,33 +68,143 @@ fn re_search(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult { ] ); - let pattern_str = objstr::get_value(&pattern); - let search_text = objstr::get_value(&string); + // let pattern_str = objstr::get_value(&pattern); + let regex = make_regex(vm, pattern)?; + let search_text = objstr::get_value(string); + + do_search(vm, ®ex, search_text) +} + +fn do_match(vm: &mut VirtualMachine, regex: &Regex, search_text: String) -> PyResult { + // TODO: implement match! + do_search(vm, regex, search_text) +} + +fn do_search(vm: &mut VirtualMachine, regex: &Regex, search_text: String) -> PyResult { + match regex.find(&search_text) { + None => Ok(vm.get_none()), + Some(result) => create_match(vm, &result), + } +} + +fn make_regex(vm: &mut VirtualMachine, pattern: &PyObjectRef) -> PyResult { + let pattern_str = objstr::get_value(pattern); match Regex::new(&pattern_str) { - Ok(regex) => { - // Now use regex to search: - match regex.find(&search_text) { - None => Ok(vm.get_none()), - Some(result) => { - // Return match object: - // TODO: implement match object - // TODO: how to refer to match object defined in this - // module? - Ok(vm.ctx.new_str(result.as_str().to_string())) - } - } - } + Ok(regex) => Ok(regex), Err(err) => Err(vm.new_value_error(format!("Error in regex: {:?}", err))), } } -pub fn mk_module(ctx: &PyContext) -> PyObjectRef { - let match_type = py_class!(ctx, "Match", ctx.object(), {}); +/// Inner data for a match object. +struct PyMatch { + start: usize, + end: usize, +} - py_module!(ctx, "re", { - "Match" => match_type, - "match" => ctx.new_rustfunc(re_match), - "search" => ctx.new_rustfunc(re_search) - }) +/// Take a found regular expression and convert it to proper match object. +fn create_match(vm: &mut VirtualMachine, match_value: &Match) -> PyResult { + // Return match object: + // TODO: implement match object + // TODO: how to refer to match object defined in this + let module = import::import_module(vm, PathBuf::default(), "re").unwrap(); + let match_class = vm.ctx.get_attr(&module, "Match").unwrap(); + + // let mo = vm.invoke(match_class, PyFuncArgs::default())?; + // let txt = vm.ctx.new_str(result.as_str().to_string()); + // vm.ctx.set_attr(&mo, "str", txt); + let match_value = PyMatch { + start: match_value.start(), + end: match_value.end(), + }; + + Ok(PyObject::new( + PyObjectPayload::AnyRustValue { + value: Box::new(match_value), + }, + match_class.clone(), + )) +} + +/// Compile a regular expression into a Pattern object. +/// See also: +/// https://docs.python.org/3/library/re.html#re.compile +fn re_compile(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult { + arg_check!( + vm, + args, + required = [(pattern, Some(vm.ctx.str_type()))] // TODO: flags=0 + ); + + let regex = make_regex(vm, pattern)?; + // TODO: retrieval of this module is akward: + let module = import::import_module(vm, PathBuf::default(), "re").unwrap(); + let pattern_class = vm.ctx.get_attr(&module, "Pattern").unwrap(); + + Ok(PyObject::new( + PyObjectPayload::AnyRustValue { + value: Box::new(regex), + }, + pattern_class.clone(), + )) +} + +fn pattern_match(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult { + arg_check!( + vm, + args, + required = [(zelf, None), (text, Some(vm.ctx.str_type()))] + ); + + let regex = get_regex(zelf); + let search_text = objstr::get_value(text); + do_match(vm, ®ex, search_text) +} + +fn pattern_search(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult { + arg_check!( + vm, + args, + required = [(zelf, None), (text, Some(vm.ctx.str_type()))] + ); + + let regex = get_regex(zelf); + let search_text = objstr::get_value(text); + do_search(vm, ®ex, search_text) +} + +/// Returns start of match +/// see: https://docs.python.org/3/library/re.html#re.Match.start +fn match_start(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult { + arg_check!(vm, args, required = [(zelf, None)]); + // TODO: implement groups + let m = get_match(zelf); + Ok(vm.new_int(m.start)) +} + +fn match_end(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult { + arg_check!(vm, args, required = [(zelf, None)]); + // TODO: implement groups + let m = get_match(zelf); + Ok(vm.new_int(m.end)) +} + +/// Retrieve inner rust regex from python object: +fn get_regex<'a>(obj: &'a PyObjectRef) -> &'a Regex { + if let PyObjectPayload::AnyRustValue { ref value } = obj.payload { + if let Some(regex) = value.downcast_ref::() { + return regex; + } + } + panic!("Inner error getting regex {:?}", obj); +} + +/// Retrieve inner rust match from python object: +fn get_match<'a>(obj: &'a PyObjectRef) -> &'a PyMatch { + if let PyObjectPayload::AnyRustValue { ref value } = obj.payload { + if let Some(value) = value.downcast_ref::() { + return value; + } + } + panic!("Inner error getting match {:?}", obj); } From f3791a386748504808d82ddffc109b44c1a18b52 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Wed, 6 Mar 2019 11:25:16 +0100 Subject: [PATCH 2/2] Fix test script for python3.6 --- tests/snippets/test_re.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/snippets/test_re.py b/tests/snippets/test_re.py index d9914b38d5..2463f380b5 100644 --- a/tests/snippets/test_re.py +++ b/tests/snippets/test_re.py @@ -7,6 +7,7 @@ mo = re.search(needle, haystack) print(mo) -assert isinstance(mo, re.Match) +# Does not work on python 3.6: +# assert isinstance(mo, re.Match) assert mo.start() == 1 assert mo.end() == 5