Skip to content

Commit f502c73

Browse files
committed
Bring SRE up do date with Python 2.1
1 parent 5cf87aa commit f502c73

File tree

7 files changed

+410
-170
lines changed

7 files changed

+410
-170
lines changed

Lib/sre.py

+65-16
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44
# re-compatible interface for the sre matching engine
55
#
6-
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
6+
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
77
#
88
# This version of the SRE library can be redistributed under CNRI's
99
# Python 1.6 license. For any other use, please contact Secret Labs
@@ -14,60 +14,92 @@
1414
# other compatibility work.
1515
#
1616

17-
# FIXME: change all FIXME's to XXX ;-)
18-
1917
import sre_compile
2018
import sre_parse
2119

20+
# public symbols
21+
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
22+
"compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
23+
"U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
24+
"UNICODE", "error" ]
25+
26+
__version__ = "2.1b2"
27+
28+
# this module works under 1.5.2 and later. don't use string methods
2229
import string
2330

2431
# flags
25-
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
26-
L = LOCALE = sre_compile.SRE_FLAG_LOCALE
27-
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
28-
S = DOTALL = sre_compile.SRE_FLAG_DOTALL
29-
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
32+
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
33+
L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
34+
U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
35+
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
36+
S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
37+
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
3038

31-
# sre extensions (may or may not be in 1.6/2.0 final)
32-
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
33-
U = UNICODE = sre_compile.SRE_FLAG_UNICODE
39+
# sre extensions (experimental, don't rely on these)
40+
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
41+
DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
3442

3543
# sre exception
3644
error = sre_compile.error
3745

3846
# --------------------------------------------------------------------
3947
# public interface
4048

41-
# FIXME: add docstrings
42-
4349
def match(pattern, string, flags=0):
50+
"""Try to apply the pattern at the start of the string, returning
51+
a match object, or None if no match was found."""
4452
return _compile(pattern, flags).match(string)
4553

4654
def search(pattern, string, flags=0):
55+
"""Scan through string looking for a match to the pattern, returning
56+
a match object, or None if no match was found."""
4757
return _compile(pattern, flags).search(string)
4858

4959
def sub(pattern, repl, string, count=0):
60+
"""Return the string obtained by replacing the leftmost
61+
non-overlapping occurrences of the pattern in string by the
62+
replacement repl"""
5063
return _compile(pattern, 0).sub(repl, string, count)
5164

5265
def subn(pattern, repl, string, count=0):
66+
"""Return a 2-tuple containing (new_string, number).
67+
new_string is the string obtained by replacing the leftmost
68+
non-overlapping occurrences of the pattern in the source
69+
string by the replacement repl. number is the number of
70+
substitutions that were made."""
5371
return _compile(pattern, 0).subn(repl, string, count)
5472

5573
def split(pattern, string, maxsplit=0):
74+
"""Split the source string by the occurrences of the pattern,
75+
returning a list containing the resulting substrings."""
5676
return _compile(pattern, 0).split(string, maxsplit)
5777

5878
def findall(pattern, string, maxsplit=0):
79+
"""Return a list of all non-overlapping matches in the string.
80+
81+
If one or more groups are present in the pattern, return a
82+
list of groups; this will be a list of tuples if the pattern
83+
has more than one group.
84+
85+
Empty matches are included in the result."""
5986
return _compile(pattern, 0).findall(string, maxsplit)
6087

6188
def compile(pattern, flags=0):
89+
"Compile a regular expression pattern, returning a pattern object."
6290
return _compile(pattern, flags)
6391

6492
def purge():
93+
"Clear the regular expression cache"
6594
_cache.clear()
95+
_cache_repl.clear()
6696

6797
def template(pattern, flags=0):
98+
"Compile a template pattern, returning a pattern object"
6899
return _compile(pattern, flags|T)
69100

70101
def escape(pattern):
102+
"Escape all non-alphanumeric characters in pattern."
71103
s = list(pattern)
72104
for i in range(len(pattern)):
73105
c = pattern[i]
@@ -82,6 +114,8 @@ def escape(pattern):
82114
# internals
83115

84116
_cache = {}
117+
_cache_repl = {}
118+
85119
_MAXCACHE = 100
86120

87121
def _join(seq, sep):
@@ -105,6 +139,21 @@ def _compile(*key):
105139
_cache[key] = p
106140
return p
107141

142+
def _compile_repl(*key):
143+
# internal: compile replacement pattern
144+
p = _cache_repl.get(key)
145+
if p is not None:
146+
return p
147+
repl, pattern = key
148+
try:
149+
p = sre_parse.parse_template(repl, pattern)
150+
except error, v:
151+
raise error, v # invalid expression
152+
if len(_cache_repl) >= _MAXCACHE:
153+
_cache_repl.clear()
154+
_cache_repl[key] = p
155+
return p
156+
108157
def _expand(pattern, match, template):
109158
# internal: match.expand implementation hook
110159
template = sre_parse.parse_template(template, pattern)
@@ -119,7 +168,7 @@ def _subn(pattern, template, string, count=0):
119168
if callable(template):
120169
filter = template
121170
else:
122-
template = sre_parse.parse_template(template, pattern)
171+
template = _compile_repl(template, pattern)
123172
def filter(match, template=template):
124173
return sre_parse.expand_template(template, match)
125174
n = i = 0
@@ -158,7 +207,7 @@ def _split(pattern, string, maxsplit=0):
158207
continue
159208
append(string[i:b])
160209
if g and b != e:
161-
extend(m.groups())
210+
extend(list(m.groups()))
162211
i = e
163212
n = n + 1
164213
append(string[i:])
@@ -204,7 +253,7 @@ def scan(self, string):
204253
break
205254
action = self.lexicon[m.lastindex][1]
206255
if callable(action):
207-
self.match = match
256+
self.match = m
208257
action = action(self, m.group())
209258
if action is not None:
210259
append(action)

Lib/sre_compile.py

+16-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44
# convert template to internal format
55
#
6-
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
6+
# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
77
#
88
# See the sre.py file for information on usage and redistribution.
99
#
@@ -12,6 +12,8 @@
1212

1313
from sre_constants import *
1414

15+
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
16+
1517
MAXCODE = 65535
1618

1719
def _compile(code, pattern, flags):
@@ -21,9 +23,10 @@ def _compile(code, pattern, flags):
2123
if op in (LITERAL, NOT_LITERAL):
2224
if flags & SRE_FLAG_IGNORECASE:
2325
emit(OPCODES[OP_IGNORE[op]])
26+
emit(_sre.getlower(av, flags))
2427
else:
2528
emit(OPCODES[op])
26-
emit(av)
29+
emit(av)
2730
elif op is IN:
2831
if flags & SRE_FLAG_IGNORECASE:
2932
emit(OPCODES[OP_IGNORE[op]])
@@ -102,9 +105,12 @@ def fixup(literal, flags=flags):
102105
elif op is AT:
103106
emit(OPCODES[op])
104107
if flags & SRE_FLAG_MULTILINE:
105-
emit(ATCODES[AT_MULTILINE.get(av, av)])
106-
else:
107-
emit(ATCODES[av])
108+
av = AT_MULTILINE.get(av, av)
109+
if flags & SRE_FLAG_LOCALE:
110+
av = AT_LOCALE.get(av, av)
111+
elif flags & SRE_FLAG_UNICODE:
112+
av = AT_UNICODE.get(av, av)
113+
emit(ATCODES[av])
108114
elif op is BRANCH:
109115
emit(OPCODES[op])
110116
tail = []
@@ -121,11 +127,10 @@ def fixup(literal, flags=flags):
121127
elif op is CATEGORY:
122128
emit(OPCODES[op])
123129
if flags & SRE_FLAG_LOCALE:
124-
emit(CHCODES[CH_LOCALE[av]])
130+
av = CH_LOCALE[av]
125131
elif flags & SRE_FLAG_UNICODE:
126-
emit(CHCODES[CH_UNICODE[av]])
127-
else:
128-
emit(CHCODES[av])
132+
av = CH_UNICODE[av]
133+
emit(CHCODES[av])
129134
elif op is GROUPREF:
130135
if flags & SRE_FLAG_IGNORECASE:
131136
emit(OPCODES[OP_IGNORE[op]])
@@ -176,7 +181,7 @@ def _optimize_charset(charset, fixup):
176181
for i in range(fixup(av[0]), fixup(av[1])+1):
177182
charmap[i] = 1
178183
elif op is CATEGORY:
179-
# FIXME: could append to charmap tail
184+
# XXX: could append to charmap tail
180185
return charset # cannot compress
181186
except IndexError:
182187
# character set contains unicode characters
@@ -364,7 +369,7 @@ def compile(p, flags=0):
364369

365370
# print code
366371

367-
# FIXME: <fl> get rid of this limitation!
372+
# XXX: <fl> get rid of this limitation!
368373
assert p.pattern.groups <= 100,\
369374
"sorry, but this version only supports 100 named groups"
370375

Lib/sre_constants.py

+32-4
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,20 @@
44
# various symbols used by the regular expression engine.
55
# run this script to update the _sre include files!
66
#
7-
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
7+
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
88
#
99
# See the sre.py file for information on usage and redistribution.
1010
#
1111

12+
# update when constants are added or removed
13+
14+
MAGIC = 20010320
15+
16+
# max code word in this release
17+
1218
MAXREPEAT = 65535
1319

20+
# SRE standard exception (access as sre.error)
1421
# should this really be here?
1522

1623
class error(Exception):
@@ -54,10 +61,16 @@ class error(Exception):
5461
# positions
5562
AT_BEGINNING = "at_beginning"
5663
AT_BEGINNING_LINE = "at_beginning_line"
64+
AT_BEGINNING_STRING = "at_beginning_string"
5765
AT_BOUNDARY = "at_boundary"
5866
AT_NON_BOUNDARY = "at_non_boundary"
5967
AT_END = "at_end"
6068
AT_END_LINE = "at_end_line"
69+
AT_END_STRING = "at_end_string"
70+
AT_LOC_BOUNDARY = "at_loc_boundary"
71+
AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
72+
AT_UNI_BOUNDARY = "at_uni_boundary"
73+
AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
6174

6275
# categories
6376
CATEGORY_DIGIT = "category_digit"
@@ -109,8 +122,10 @@ class error(Exception):
109122
]
110123

111124
ATCODES = [
112-
AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
113-
AT_NON_BOUNDARY, AT_END, AT_END_LINE
125+
AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
126+
AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
127+
AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
128+
AT_UNI_NON_BOUNDARY
114129
]
115130

116131
CHCODES = [
@@ -148,6 +163,16 @@ def makedict(list):
148163
AT_END: AT_END_LINE
149164
}
150165

166+
AT_LOCALE = {
167+
AT_BOUNDARY: AT_LOC_BOUNDARY,
168+
AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
169+
}
170+
171+
AT_UNICODE = {
172+
AT_BOUNDARY: AT_UNI_BOUNDARY,
173+
AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
174+
}
175+
151176
CH_LOCALE = {
152177
CATEGORY_DIGIT: CATEGORY_DIGIT,
153178
CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
@@ -178,6 +203,7 @@ def makedict(list):
178203
SRE_FLAG_DOTALL = 16 # treat target as a single string
179204
SRE_FLAG_UNICODE = 32 # use unicode locale
180205
SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
206+
SRE_FLAG_DEBUG = 128 # debugging
181207

182208
# flags for INFO primitive
183209
SRE_INFO_PREFIX = 1 # has prefix
@@ -201,13 +227,15 @@ def dump(f, d, prefix):
201227
* NOTE: This file is generated by sre_constants.py. If you need
202228
* to change anything in here, edit sre_constants.py and run it.
203229
*
204-
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
230+
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
205231
*
206232
* See the _sre.c file for information on usage and redistribution.
207233
*/
208234
209235
""")
210236

237+
f.write("#define SRE_MAGIC %d\n" % MAGIC)
238+
211239
dump(f, OPCODES, "SRE_OP")
212240
dump(f, ATCODES, "SRE")
213241
dump(f, CHCODES, "SRE")

0 commit comments

Comments
 (0)