Skip to content

Commit 02cec85

Browse files
authored
Merge pull request #5202 from youknowone/sre-engine
Import sre-engine repository to main RustPython
2 parents df363c0 + 1dd9a2f commit 02cec85

File tree

13 files changed

+2388
-6
lines changed

13 files changed

+2388
-6
lines changed

Cargo.lock

Lines changed: 34 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ include = ["LICENSE", "Cargo.toml", "src/**/*.rs"]
1313
resolver = "2"
1414
members = [
1515
"compiler", "compiler/core", "compiler/codegen",
16-
".", "common", "derive", "jit", "vm", "pylib", "stdlib", "wasm/lib", "derive-impl",
16+
".", "common", "derive", "jit", "vm", "vm/sre_engine", "pylib", "stdlib", "wasm/lib", "derive-impl",
1717
]
1818

1919
[workspace.dependencies]
@@ -27,6 +27,7 @@ rustpython-jit = { path = "jit", version = "0.3.0" }
2727
rustpython-vm = { path = "vm", default-features = false, version = "0.3.0" }
2828
rustpython-pylib = { path = "pylib", version = "0.3.0" }
2929
rustpython-stdlib = { path = "stdlib", default-features = false, version = "0.3.0" }
30+
rustpython-sre_engine = { path = "vm/sre_engine", version = "0.6.0" }
3031
rustpython-doc = { git = "https://github.com/RustPython/__doc__", tag = "0.3.0", version = "0.3.0" }
3132

3233
rustpython-literal = { git = "https://github.com/RustPython/Parser.git", rev = "29c4728dbedc7e69cc2560b9b34058bbba9b1303" }
@@ -64,7 +65,7 @@ malachite-base = "0.4.4"
6465
num-complex = "0.4.0"
6566
num-integer = "0.1.44"
6667
num-traits = "0.2"
67-
num_enum = "0.5.7"
68+
num_enum = "0.7"
6869
once_cell = "1.18"
6970
parking_lot = "0.12.1"
7071
paste = "1.0.7"

vm/sre_engine/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/target
2+
Cargo.lock

vm/sre_engine/.vscode/launch.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"version": "0.2.0",
3+
"configurations": [
4+
{
5+
"type": "lldb",
6+
"request": "launch",
7+
"name": "Debug Unit Test",
8+
"cargo": {
9+
"args": [
10+
"test",
11+
"--no-run"
12+
],
13+
"filter": {
14+
"kind": "test"
15+
}
16+
},
17+
"args": [],
18+
"cwd": "${workspaceFolder}"
19+
}
20+
]
21+
}

vm/sre_engine/Cargo.toml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[package]
2+
name = "rustpython-sre_engine"
3+
version = "0.6.0"
4+
authors = ["Kangzhi Shi <shikangzhi@gmail.com>", "RustPython Team"]
5+
description = "A low-level implementation of Python's SRE regex engine"
6+
repository = "https://github.com/RustPython/RustPython"
7+
license = "MIT"
8+
edition = "2021"
9+
keywords = ["regex"]
10+
include = ["LICENSE", "src/**/*.rs"]
11+
12+
[dependencies]
13+
num_enum = { workspace = true }
14+
bitflags = { workspace = true }
15+
optional = "0.5"

vm/sre_engine/LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2020 RustPython Team
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

vm/sre_engine/benches/benches.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#![feature(test)]
2+
3+
extern crate test;
4+
use test::Bencher;
5+
6+
use sre_engine::{Request, State, StrDrive};
7+
8+
struct Pattern {
9+
code: &'static [u32],
10+
}
11+
12+
impl Pattern {
13+
fn state<'a, S: StrDrive>(&self, string: S) -> (Request<'a, S>, State) {
14+
self.state_range(string, 0..usize::MAX)
15+
}
16+
17+
fn state_range<'a, S: StrDrive>(
18+
&self,
19+
string: S,
20+
range: std::ops::Range<usize>,
21+
) -> (Request<'a, S>, State) {
22+
let req = Request::new(string, range.start, range.end, self.code, false);
23+
let state = State::default();
24+
(req, state)
25+
}
26+
}
27+
28+
#[bench]
29+
fn benchmarks(b: &mut Bencher) {
30+
// # test common prefix
31+
// pattern p1 = re.compile('Python|Perl') # , 'Perl'), # Alternation
32+
// START GENERATED by generate_tests.py
33+
#[rustfmt::skip] let p1 = Pattern { code: &[14, 8, 1, 4, 6, 1, 1, 80, 0, 16, 80, 7, 13, 16, 121, 16, 116, 16, 104, 16, 111, 16, 110, 15, 11, 9, 16, 101, 16, 114, 16, 108, 15, 2, 0, 1] };
34+
// END GENERATED
35+
// pattern p2 = re.compile('(Python|Perl)') #, 'Perl'), # Grouped alternation
36+
// START GENERATED by generate_tests.py
37+
#[rustfmt::skip] let p2 = Pattern { code: &[14, 8, 1, 4, 6, 1, 0, 80, 0, 17, 0, 16, 80, 7, 13, 16, 121, 16, 116, 16, 104, 16, 111, 16, 110, 15, 11, 9, 16, 101, 16, 114, 16, 108, 15, 2, 0, 17, 1, 1] };
38+
// END GENERATED
39+
// pattern p3 = re.compile('Python|Perl|Tcl') #, 'Perl'), # Alternation
40+
// START GENERATED by generate_tests.py
41+
#[rustfmt::skip] let p3 = Pattern { code: &[14, 9, 4, 3, 6, 16, 80, 16, 84, 0, 7, 15, 16, 80, 16, 121, 16, 116, 16, 104, 16, 111, 16, 110, 15, 22, 11, 16, 80, 16, 101, 16, 114, 16, 108, 15, 11, 9, 16, 84, 16, 99, 16, 108, 15, 2, 0, 1] };
42+
// END GENERATED
43+
// pattern p4 = re.compile('(Python|Perl|Tcl)') #, 'Perl'), # Grouped alternation
44+
// START GENERATED by generate_tests.py
45+
#[rustfmt::skip] let p4 = Pattern { code: &[14, 9, 4, 3, 6, 16, 80, 16, 84, 0, 17, 0, 7, 15, 16, 80, 16, 121, 16, 116, 16, 104, 16, 111, 16, 110, 15, 22, 11, 16, 80, 16, 101, 16, 114, 16, 108, 15, 11, 9, 16, 84, 16, 99, 16, 108, 15, 2, 0, 17, 1, 1] };
46+
// END GENERATED
47+
// pattern p5 = re.compile('(Python)\\1') #, 'PythonPython'), # Backreference
48+
// START GENERATED by generate_tests.py
49+
#[rustfmt::skip] let p5 = Pattern { code: &[14, 18, 1, 12, 12, 6, 0, 80, 121, 116, 104, 111, 110, 0, 0, 0, 0, 0, 0, 17, 0, 16, 80, 16, 121, 16, 116, 16, 104, 16, 111, 16, 110, 17, 1, 11, 0, 1] };
50+
// END GENERATED
51+
// pattern p6 = re.compile('([0a-z][a-z0-9]*,)+') #, 'a5,b7,c9,'), # Disable the fastmap optimization
52+
// START GENERATED by generate_tests.py
53+
#[rustfmt::skip] let p6 = Pattern { code: &[14, 4, 0, 2, 4294967295, 23, 31, 1, 4294967295, 17, 0, 13, 7, 16, 48, 22, 97, 122, 0, 24, 13, 0, 4294967295, 13, 8, 22, 97, 122, 22, 48, 57, 0, 1, 16, 44, 17, 1, 18, 1] };
54+
// END GENERATED
55+
// pattern p7 = re.compile('([a-z][a-z0-9]*,)+') #, 'a5,b7,c9,'), # A few sets
56+
// START GENERATED by generate_tests.py
57+
#[rustfmt::skip] let p7 = Pattern { code: &[14, 4, 0, 2, 4294967295, 23, 29, 1, 4294967295, 17, 0, 13, 5, 22, 97, 122, 0, 24, 13, 0, 4294967295, 13, 8, 22, 97, 122, 22, 48, 57, 0, 1, 16, 44, 17, 1, 18, 1] };
58+
// END GENERATED
59+
// pattern p8 = re.compile('Python') #, 'Python'), # Simple text literal
60+
// START GENERATED by generate_tests.py
61+
#[rustfmt::skip] let p8 = Pattern { code: &[14, 18, 3, 6, 6, 6, 6, 80, 121, 116, 104, 111, 110, 0, 0, 0, 0, 0, 0, 16, 80, 16, 121, 16, 116, 16, 104, 16, 111, 16, 110, 1] };
62+
// END GENERATED
63+
// pattern p9 = re.compile('.*Python') #, 'Python'), # Bad text literal
64+
// START GENERATED by generate_tests.py
65+
#[rustfmt::skip] let p9 = Pattern { code: &[14, 4, 0, 6, 4294967295, 24, 5, 0, 4294967295, 2, 1, 16, 80, 16, 121, 16, 116, 16, 104, 16, 111, 16, 110, 1] };
66+
// END GENERATED
67+
// pattern p10 = re.compile('.*Python.*') #, 'Python'), # Worse text literal
68+
// START GENERATED by generate_tests.py
69+
#[rustfmt::skip] let p10 = Pattern { code: &[14, 4, 0, 6, 4294967295, 24, 5, 0, 4294967295, 2, 1, 16, 80, 16, 121, 16, 116, 16, 104, 16, 111, 16, 110, 24, 5, 0, 4294967295, 2, 1, 1] };
70+
// END GENERATED
71+
// pattern p11 = re.compile('.*(Python)') #, 'Python'), # Bad text literal with grouping
72+
// START GENERATED by generate_tests.py
73+
#[rustfmt::skip] let p11 = Pattern { code: &[14, 4, 0, 6, 4294967295, 24, 5, 0, 4294967295, 2, 1, 17, 0, 16, 80, 16, 121, 16, 116, 16, 104, 16, 111, 16, 110, 17, 1, 1] };
74+
// END GENERATED
75+
76+
let tests = [
77+
(p1, "Perl"),
78+
(p2, "Perl"),
79+
(p3, "Perl"),
80+
(p4, "Perl"),
81+
(p5, "PythonPython"),
82+
(p6, "a5,b7,c9,"),
83+
(p7, "a5,b7,c9,"),
84+
(p8, "Python"),
85+
(p9, "Python"),
86+
(p10, "Python"),
87+
(p11, "Python"),
88+
];
89+
90+
b.iter(move || {
91+
for (p, s) in &tests {
92+
let (req, mut state) = p.state(s.clone());
93+
assert!(state.search(req));
94+
let (req, mut state) = p.state(s.clone());
95+
assert!(state.pymatch(&req));
96+
let (mut req, mut state) = p.state(s.clone());
97+
req.match_all = true;
98+
assert!(state.pymatch(&req));
99+
let s2 = format!("{}{}{}", " ".repeat(10000), s, " ".repeat(10000));
100+
let (req, mut state) = p.state_range(s2.as_str(), 0..usize::MAX);
101+
assert!(state.search(req));
102+
let (req, mut state) = p.state_range(s2.as_str(), 10000..usize::MAX);
103+
assert!(state.pymatch(&req));
104+
let (req, mut state) = p.state_range(s2.as_str(), 10000..10000 + s.len());
105+
assert!(state.pymatch(&req));
106+
let (mut req, mut state) = p.state_range(s2.as_str(), 10000..10000 + s.len());
107+
req.match_all = true;
108+
assert!(state.pymatch(&req));
109+
}
110+
})
111+
}

vm/sre_engine/generate_tests.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import os
2+
from pathlib import Path
3+
import re
4+
import sre_constants
5+
import sre_compile
6+
import sre_parse
7+
import json
8+
from itertools import chain
9+
10+
m = re.search(r"const SRE_MAGIC: usize = (\d+);", open("src/constants.rs").read())
11+
sre_engine_magic = int(m.group(1))
12+
del m
13+
14+
assert sre_constants.MAGIC == sre_engine_magic
15+
16+
class CompiledPattern:
17+
@classmethod
18+
def compile(cls, pattern, flags=0):
19+
p = sre_parse.parse(pattern)
20+
code = sre_compile._code(p, flags)
21+
self = cls()
22+
self.pattern = pattern
23+
self.code = code
24+
self.flags = re.RegexFlag(flags | p.state.flags)
25+
return self
26+
27+
for k, v in re.RegexFlag.__members__.items():
28+
setattr(CompiledPattern, k, v)
29+
30+
31+
# matches `// pattern {varname} = re.compile(...)`
32+
pattern_pattern = re.compile(r"^((\s*)\/\/\s*pattern\s+(\w+)\s+=\s+(.+?))$(?:.+?END GENERATED)?", re.M | re.S)
33+
def replace_compiled(m):
34+
line, indent, varname, pattern = m.groups()
35+
pattern = eval(pattern, {"re": CompiledPattern})
36+
pattern = f"Pattern {{ code: &{json.dumps(pattern.code)} }}"
37+
return f'''{line}
38+
{indent}// START GENERATED by generate_tests.py
39+
{indent}#[rustfmt::skip] let {varname} = {pattern};
40+
{indent}// END GENERATED'''
41+
42+
with os.scandir("tests") as t, os.scandir("benches") as b:
43+
for f in chain(t, b):
44+
path = Path(f.path)
45+
if path.suffix == ".rs":
46+
replaced = pattern_pattern.sub(replace_compiled, path.read_text())
47+
path.write_text(replaced)

0 commit comments

Comments
 (0)