Skip to content

Commit 389a31f

Browse files
committed
Add Regex.sub() method, and asMatch and asGroupList parameters
1 parent 3497ee6 commit 389a31f

File tree

3 files changed

+82
-6
lines changed

3 files changed

+82
-6
lines changed

CHANGES

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,24 @@ Version 2.x.x - TBD
1515
Primary intent is more to be instructional than actually rigorous
1616
testing. Complex tests can still be added in the unitTests.py file.
1717

18+
- New features added to the Regex class:
19+
- optional asGroupList parameter, returns all the capture groups as
20+
a list
21+
- optional asMatch parameter, returns the raw re.match result
22+
- new sub(repl) method, which adds a parse action calling
23+
re.sub(pattern, repl, parsed_result). Simplifies creating
24+
Regex expressions to be used with transformString. Like re.sub,
25+
repl may be an ordinary string (similar to using pyparsing's
26+
replaceWith), or may contain references to capture groups by group
27+
number, or may be a callable that takes an re match group and
28+
returns a string.
29+
30+
For instance:
31+
expr = pp.Regex(r"([Hh]\d):\s*(.*)").sub(r"<\1>\2</\1>")
32+
expr.transformString("h1: This is the title")
33+
34+
will return
35+
<h1>This is the title</h1>
1836

1937
Version 2.2.1 - September, 2018
2038
-------------------------------

pyparsing.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class names, and the use of '+', '|' and '^' operators.
7575
"""
7676

7777
__version__ = "2.2.2"
78-
__versionTime__ = "25 Sep 2018 04:18 UTC"
78+
__versionTime__ = "29 Sep 2018 15:58 UTC"
7979
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
8080

8181
import string
@@ -2776,7 +2776,7 @@ class Regex(Token):
27762776
roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
27772777
"""
27782778
compiledREtype = type(re.compile("[A-Z]"))
2779-
def __init__( self, pattern, flags=0):
2779+
def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
27802780
"""The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
27812781
super(Regex,self).__init__()
27822782

@@ -2809,6 +2809,8 @@ def __init__( self, pattern, flags=0):
28092809
self.errmsg = "Expected " + self.name
28102810
self.mayIndexError = False
28112811
self.mayReturnEmpty = True
2812+
self.asGroupList = asGroupList
2813+
self.asMatch = asMatch
28122814

28132815
def parseImpl( self, instring, loc, doActions=True ):
28142816
result = self.re.match(instring,loc)
@@ -2817,10 +2819,15 @@ def parseImpl( self, instring, loc, doActions=True ):
28172819

28182820
loc = result.end()
28192821
d = result.groupdict()
2820-
ret = ParseResults(result.group())
2821-
if d:
2822-
for k in d:
2823-
ret[k] = d[k]
2822+
if self.asMatch:
2823+
ret = result
2824+
elif self.asGroupList:
2825+
ret = result.groups()
2826+
else:
2827+
ret = ParseResults(result.group())
2828+
if d:
2829+
for k in d:
2830+
ret[k] = d[k]
28242831
return loc,ret
28252832

28262833
def __str__( self ):
@@ -2834,6 +2841,12 @@ def __str__( self ):
28342841

28352842
return self.strRepr
28362843

2844+
def sub(self, repl):
2845+
"""
2846+
Return Regex with an attached parse action to transform the parsed
2847+
result as if called using C{re.sub(expr, repl, string)}.
2848+
"""
2849+
return self.addParseAction(lambda s, l, t: self.re.sub(repl, t[0]))
28372850

28382851
class QuotedString(Token):
28392852
r"""

unitTests.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,51 @@ def testMatch (expression, instring, shouldPass, expectedString=None):
17011701

17021702
invRe = pyparsing.Regex('')
17031703

1704+
class RegexAsTypeTest(ParseTestCase):
1705+
def runTest(self):
1706+
import pyparsing as pp
1707+
1708+
test_str = "sldkjfj 123 456 lsdfkj"
1709+
1710+
print_("return as list of match groups")
1711+
expr = pp.Regex(r"\w+ (\d+) (\d+) (\w+)", asGroupList=True)
1712+
expected_group_list = [tuple(test_str.split()[1:])]
1713+
result = expr.parseString(test_str)
1714+
print_(result.dump())
1715+
print_(expected_group_list)
1716+
assert result.asList() == expected_group_list, "incorrect group list returned by Regex"
1717+
1718+
print_("return as re.match instance")
1719+
expr = pp.Regex(r"\w+ (?P<num1>\d+) (?P<num2>\d+) (?P<last_word>\w+)", asMatch=True)
1720+
result = expr.parseString(test_str)
1721+
print_(result.dump())
1722+
print_(result[0].groups())
1723+
print_(expected_group_list)
1724+
assert result[0].groupdict() == {'num1': '123', 'num2': '456', 'last_word': 'lsdfkj'}, 'invalid group dict from Regex(asMatch=True)'
1725+
assert result[0].groups() == expected_group_list[0], "incorrect group list returned by Regex(asMatch)"
1726+
1727+
class RegexSubTest(ParseTestCase):
1728+
def runTest(self):
1729+
import pyparsing as pp
1730+
1731+
print_("test sub with string")
1732+
expr = pp.Regex(r"<title>").sub("'Richard III'")
1733+
result = expr.transformString("This is the title: <title>")
1734+
print_(result)
1735+
assert result == "This is the title: 'Richard III'", "incorrect Regex.sub result with simple string"
1736+
1737+
print_("test sub with re string")
1738+
expr = pp.Regex(r"([Hh]\d):\s*(.*)").sub(r"<\1>\2</\1>")
1739+
result = expr.transformString("h1: This is the main heading\nh2: This is the sub-heading")
1740+
print_(result)
1741+
assert result == '<h1>This is the main heading</h1>\n<h2>This is the sub-heading</h2>', "incorrect Regex.sub result with re string"
1742+
1743+
print_("test sub with callable that return str")
1744+
expr = pp.Regex(r"<(.*?)>").sub(lambda m: m.group(1).upper())
1745+
result = expr.transformString("I want this in upcase: <what? what?>")
1746+
print_(result)
1747+
assert result == 'I want this in upcase: WHAT? WHAT?', "incorrect Regex.sub result with callable"
1748+
17041749
class CountedArrayTest(ParseTestCase):
17051750
def runTest(self):
17061751
from pyparsing import Word,nums,OneOrMore,countedArray

0 commit comments

Comments
 (0)