Skip to content

Commit 03b4199

Browse files
authored
Merge pull request RustPython#591 from RustPython/re_module
Re module
2 parents 11d0de2 + f3791a3 commit 03b4199

File tree

2 files changed

+184
-29
lines changed

2 files changed

+184
-29
lines changed

tests/snippets/test_re.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,10 @@
44
haystack = "Hello world"
55
needle = 'ello'
66

7-
print(re.search(needle, haystack))
7+
mo = re.search(needle, haystack)
8+
print(mo)
89

10+
# Does not work on python 3.6:
11+
# assert isinstance(mo, re.Match)
12+
assert mo.start() == 1
13+
assert mo.end() == 5

vm/src/stdlib/re.rs

+178-28
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,59 @@
55
* system.
66
*/
77

8-
extern crate regex;
9-
use self::regex::Regex;
8+
// extern crate regex;
9+
use crate::import;
10+
use regex::{Match, Regex};
11+
use std::path::PathBuf;
1012

1113
use crate::obj::objstr;
12-
use crate::pyobject::{PyContext, PyFuncArgs, PyObjectRef, PyResult, TypeProtocol};
14+
use crate::pyobject::{
15+
PyContext, PyFuncArgs, PyObject, PyObjectPayload, PyObjectRef, PyResult, TypeProtocol,
16+
};
1317
use crate::VirtualMachine;
1418

19+
/// Create the python `re` module with all its members.
20+
pub fn mk_module(ctx: &PyContext) -> PyObjectRef {
21+
let match_type = py_class!(ctx, "Match", ctx.object(), {
22+
"start" => ctx.new_rustfunc(match_start),
23+
"end" => ctx.new_rustfunc(match_end)
24+
});
25+
26+
let pattern_type = py_class!(ctx, "Pattern", ctx.object(), {
27+
"match" => ctx.new_rustfunc(pattern_match),
28+
"search" => ctx.new_rustfunc(pattern_search)
29+
});
30+
31+
py_module!(ctx, "re", {
32+
"compile" => ctx.new_rustfunc(re_compile),
33+
"Match" => match_type,
34+
"match" => ctx.new_rustfunc(re_match),
35+
"Pattern" => pattern_type,
36+
"search" => ctx.new_rustfunc(re_search)
37+
})
38+
}
39+
40+
/// Implement re.match
41+
/// See also:
42+
/// https://docs.python.org/3/library/re.html#re.match
1543
fn re_match(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
16-
// TODO:
17-
error!("TODO: implement match");
18-
re_search(vm, args)
44+
arg_check!(
45+
vm,
46+
args,
47+
required = [
48+
(pattern, Some(vm.ctx.str_type())),
49+
(string, Some(vm.ctx.str_type()))
50+
]
51+
);
52+
let regex = make_regex(vm, pattern)?;
53+
let search_text = objstr::get_value(string);
54+
55+
do_match(vm, &regex, search_text)
1956
}
2057

58+
/// Implement re.search
59+
/// See also:
60+
/// https://docs.python.org/3/library/re.html#re.search
2161
fn re_search(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
2262
arg_check!(
2363
vm,
@@ -28,33 +68,143 @@ fn re_search(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
2868
]
2969
);
3070

31-
let pattern_str = objstr::get_value(&pattern);
32-
let search_text = objstr::get_value(&string);
71+
// let pattern_str = objstr::get_value(&pattern);
72+
let regex = make_regex(vm, pattern)?;
73+
let search_text = objstr::get_value(string);
74+
75+
do_search(vm, &regex, search_text)
76+
}
77+
78+
fn do_match(vm: &mut VirtualMachine, regex: &Regex, search_text: String) -> PyResult {
79+
// TODO: implement match!
80+
do_search(vm, regex, search_text)
81+
}
82+
83+
fn do_search(vm: &mut VirtualMachine, regex: &Regex, search_text: String) -> PyResult {
84+
match regex.find(&search_text) {
85+
None => Ok(vm.get_none()),
86+
Some(result) => create_match(vm, &result),
87+
}
88+
}
89+
90+
fn make_regex(vm: &mut VirtualMachine, pattern: &PyObjectRef) -> PyResult<Regex> {
91+
let pattern_str = objstr::get_value(pattern);
3392

3493
match Regex::new(&pattern_str) {
35-
Ok(regex) => {
36-
// Now use regex to search:
37-
match regex.find(&search_text) {
38-
None => Ok(vm.get_none()),
39-
Some(result) => {
40-
// Return match object:
41-
// TODO: implement match object
42-
// TODO: how to refer to match object defined in this
43-
// module?
44-
Ok(vm.ctx.new_str(result.as_str().to_string()))
45-
}
46-
}
47-
}
94+
Ok(regex) => Ok(regex),
4895
Err(err) => Err(vm.new_value_error(format!("Error in regex: {:?}", err))),
4996
}
5097
}
5198

52-
pub fn mk_module(ctx: &PyContext) -> PyObjectRef {
53-
let match_type = py_class!(ctx, "Match", ctx.object(), {});
99+
/// Inner data for a match object.
100+
struct PyMatch {
101+
start: usize,
102+
end: usize,
103+
}
54104

55-
py_module!(ctx, "re", {
56-
"Match" => match_type,
57-
"match" => ctx.new_rustfunc(re_match),
58-
"search" => ctx.new_rustfunc(re_search)
59-
})
105+
/// Take a found regular expression and convert it to proper match object.
106+
fn create_match(vm: &mut VirtualMachine, match_value: &Match) -> PyResult {
107+
// Return match object:
108+
// TODO: implement match object
109+
// TODO: how to refer to match object defined in this
110+
let module = import::import_module(vm, PathBuf::default(), "re").unwrap();
111+
let match_class = vm.ctx.get_attr(&module, "Match").unwrap();
112+
113+
// let mo = vm.invoke(match_class, PyFuncArgs::default())?;
114+
// let txt = vm.ctx.new_str(result.as_str().to_string());
115+
// vm.ctx.set_attr(&mo, "str", txt);
116+
let match_value = PyMatch {
117+
start: match_value.start(),
118+
end: match_value.end(),
119+
};
120+
121+
Ok(PyObject::new(
122+
PyObjectPayload::AnyRustValue {
123+
value: Box::new(match_value),
124+
},
125+
match_class.clone(),
126+
))
127+
}
128+
129+
/// Compile a regular expression into a Pattern object.
130+
/// See also:
131+
/// https://docs.python.org/3/library/re.html#re.compile
132+
fn re_compile(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
133+
arg_check!(
134+
vm,
135+
args,
136+
required = [(pattern, Some(vm.ctx.str_type()))] // TODO: flags=0
137+
);
138+
139+
let regex = make_regex(vm, pattern)?;
140+
// TODO: retrieval of this module is akward:
141+
let module = import::import_module(vm, PathBuf::default(), "re").unwrap();
142+
let pattern_class = vm.ctx.get_attr(&module, "Pattern").unwrap();
143+
144+
Ok(PyObject::new(
145+
PyObjectPayload::AnyRustValue {
146+
value: Box::new(regex),
147+
},
148+
pattern_class.clone(),
149+
))
150+
}
151+
152+
fn pattern_match(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
153+
arg_check!(
154+
vm,
155+
args,
156+
required = [(zelf, None), (text, Some(vm.ctx.str_type()))]
157+
);
158+
159+
let regex = get_regex(zelf);
160+
let search_text = objstr::get_value(text);
161+
do_match(vm, &regex, search_text)
162+
}
163+
164+
fn pattern_search(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
165+
arg_check!(
166+
vm,
167+
args,
168+
required = [(zelf, None), (text, Some(vm.ctx.str_type()))]
169+
);
170+
171+
let regex = get_regex(zelf);
172+
let search_text = objstr::get_value(text);
173+
do_search(vm, &regex, search_text)
174+
}
175+
176+
/// Returns start of match
177+
/// see: https://docs.python.org/3/library/re.html#re.Match.start
178+
fn match_start(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
179+
arg_check!(vm, args, required = [(zelf, None)]);
180+
// TODO: implement groups
181+
let m = get_match(zelf);
182+
Ok(vm.new_int(m.start))
183+
}
184+
185+
fn match_end(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
186+
arg_check!(vm, args, required = [(zelf, None)]);
187+
// TODO: implement groups
188+
let m = get_match(zelf);
189+
Ok(vm.new_int(m.end))
190+
}
191+
192+
/// Retrieve inner rust regex from python object:
193+
fn get_regex<'a>(obj: &'a PyObjectRef) -> &'a Regex {
194+
if let PyObjectPayload::AnyRustValue { ref value } = obj.payload {
195+
if let Some(regex) = value.downcast_ref::<Regex>() {
196+
return regex;
197+
}
198+
}
199+
panic!("Inner error getting regex {:?}", obj);
200+
}
201+
202+
/// Retrieve inner rust match from python object:
203+
fn get_match<'a>(obj: &'a PyObjectRef) -> &'a PyMatch {
204+
if let PyObjectPayload::AnyRustValue { ref value } = obj.payload {
205+
if let Some(value) = value.downcast_ref::<PyMatch>() {
206+
return value;
207+
}
208+
}
209+
panic!("Inner error getting match {:?}", obj);
60210
}

0 commit comments

Comments
 (0)