Skip to content

Commit 247d332

Browse files
committed
General code cleanup in QuotedString: use "|".join to build inner_pattern instead of string addition with sep character; PEP8 var names; (inspired by Issue #488)
1 parent 08f7b39 commit 247d332

File tree

4 files changed

+102
-68
lines changed

4 files changed

+102
-68
lines changed

CHANGES

+6
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
1212

1313
Version 3.2.0 will also discontinue support for Python versions 3.6 and 3.7.
1414

15+
16+
Version 3.1.1 - (in development)
17+
--------------------------------
18+
- Some general internal code cleanup. (Instigated by Michal Čihař, Issue #488)
19+
20+
1521
Version 3.1.0 - June, 2023
1622
--------------------------
1723
- Added `tag_emitter.py` to examples. This example demonstrates how to insert

pyparsing/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ def __repr__(self):
120120
return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"
121121

122122

123-
__version_info__ = version_info(3, 1, 0, "final", 1)
124-
__version_time__ = "18 Jun 2023 14:05 UTC"
123+
__version_info__ = version_info(3, 1, 1, "final", 1)
124+
__version_time__ = "30 Jun 2023 05:39 UTC"
125125
__version__ = __version_info__.__version__
126126
__versionTime__ = __version_time__
127127
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"

pyparsing/core.py

+83-66
Original file line numberDiff line numberDiff line change
@@ -3224,97 +3224,100 @@ def __init__(
32243224
convertWhitespaceEscapes: bool = True,
32253225
):
32263226
super().__init__()
3227-
escChar = escChar or esc_char
3228-
escQuote = escQuote or esc_quote
3229-
unquoteResults = unquoteResults and unquote_results
3230-
endQuoteChar = endQuoteChar or end_quote_char
3231-
convertWhitespaceEscapes = (
3227+
esc_char = escChar or esc_char
3228+
esc_quote = escQuote or esc_quote
3229+
unquote_results = unquoteResults and unquote_results
3230+
end_quote_char = endQuoteChar or end_quote_char
3231+
convert_whitespace_escapes = (
32323232
convertWhitespaceEscapes and convert_whitespace_escapes
32333233
)
32343234
quote_char = quoteChar or quote_char
32353235

3236-
# remove white space from quote chars - wont work anyway
3236+
# remove white space from quote chars
32373237
quote_char = quote_char.strip()
32383238
if not quote_char:
32393239
raise ValueError("quote_char cannot be the empty string")
32403240

3241-
if endQuoteChar is None:
3242-
endQuoteChar = quote_char
3241+
if end_quote_char is None:
3242+
end_quote_char = quote_char
32433243
else:
3244-
endQuoteChar = endQuoteChar.strip()
3245-
if not endQuoteChar:
3244+
end_quote_char = end_quote_char.strip()
3245+
if not end_quote_char:
32463246
raise ValueError("end_quote_char cannot be the empty string")
32473247

3248-
self.quoteChar: str = quote_char
3249-
self.quoteCharLen: int = len(quote_char)
3250-
self.firstQuoteChar: str = quote_char[0]
3251-
self.endQuoteChar: str = endQuoteChar
3252-
self.endQuoteCharLen: int = len(endQuoteChar)
3253-
self.escChar: str = escChar or ""
3254-
self.escQuote: str = escQuote or ""
3255-
self.unquoteResults: bool = unquoteResults
3256-
self.convertWhitespaceEscapes: bool = convertWhitespaceEscapes
3248+
self.quote_char: str = quote_char
3249+
self.quote_char_len: int = len(quote_char)
3250+
self.first_quote_char: str = quote_char[0]
3251+
self.end_quote_char: str = end_quote_char
3252+
self.end_quote_char_len: int = len(end_quote_char)
3253+
self.esc_char: str = esc_char or ""
3254+
self.has_esc_char: bool = esc_char is not None
3255+
self.esc_quote: str = esc_quote or ""
3256+
self.unquote_results: bool = unquote_results
3257+
self.convert_whitespace_escapes: bool = convert_whitespace_escapes
32573258
self.multiline = multiline
3259+
self.re_flags = re.RegexFlag(0)
32583260

3259-
sep = ""
3260-
inner_pattern = ""
3261+
# fmt: off
3262+
# build up re pattern for the content between the quote delimiters
3263+
inner_pattern = []
32613264

3262-
if escQuote:
3263-
inner_pattern += rf"{sep}(?:{re.escape(escQuote)})"
3264-
sep = "|"
3265+
if esc_quote:
3266+
inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
32653267

3266-
if escChar:
3267-
inner_pattern += rf"{sep}(?:{re.escape(escChar)}.)"
3268-
sep = "|"
3268+
if esc_char:
3269+
inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
32693270

3270-
if len(self.endQuoteChar) > 1:
3271-
inner_pattern += (
3272-
f"{sep}(?:"
3271+
if len(self.end_quote_char) > 1:
3272+
inner_pattern.append(
3273+
"(?:"
32733274
+ "|".join(
3274-
f"(?:{re.escape(self.endQuoteChar[:i])}(?!{re.escape(self.endQuoteChar[i:])}))"
3275-
for i in range(len(self.endQuoteChar) - 1, 0, -1)
3275+
f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3276+
for i in range(len(self.end_quote_char) - 1, 0, -1)
32763277
)
32773278
+ ")"
32783279
)
3279-
sep = "|"
32803280

3281-
self.flags = re.RegexFlag(0)
3282-
3283-
if multiline:
3284-
self.flags = re.MULTILINE | re.DOTALL
3285-
inner_pattern += (
3286-
rf"{sep}(?:[^{_escape_regex_range_chars(self.endQuoteChar[0])}"
3287-
rf"{(_escape_regex_range_chars(escChar) if escChar is not None else '')}])"
3281+
if self.multiline:
3282+
self.re_flags |= re.MULTILINE | re.DOTALL
3283+
inner_pattern.append(
3284+
rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3285+
rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"
32883286
)
32893287
else:
3290-
inner_pattern += (
3291-
rf"{sep}(?:[^{_escape_regex_range_chars(self.endQuoteChar[0])}\n\r"
3292-
rf"{(_escape_regex_range_chars(escChar) if escChar is not None else '')}])"
3288+
inner_pattern.append(
3289+
rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3290+
rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"
32933291
)
32943292

32953293
self.pattern = "".join(
32963294
[
3297-
re.escape(self.quoteChar),
3295+
re.escape(self.quote_char),
32983296
"(?:",
3299-
inner_pattern,
3297+
'|'.join(inner_pattern),
33003298
")*",
3301-
re.escape(self.endQuoteChar),
3299+
re.escape(self.end_quote_char),
33023300
]
33033301
)
33043302

3305-
if self.unquoteResults:
3306-
if self.convertWhitespaceEscapes:
3303+
if self.unquote_results:
3304+
if self.convert_whitespace_escapes:
33073305
self.unquote_scan_re = re.compile(
3308-
rf"({'|'.join(re.escape(k) for k in self.ws_map)})|({re.escape(self.escChar)}.)|(\n|.)",
3309-
flags=self.flags,
3306+
rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3307+
rf"|({re.escape(self.esc_char)}.)"
3308+
rf"|(\n|.)",
3309+
flags=self.re_flags,
33103310
)
33113311
else:
33123312
self.unquote_scan_re = re.compile(
3313-
rf"({re.escape(self.escChar)}.)|(\n|.)", flags=self.flags
3313+
rf"({re.escape(self.esc_char)}.)"
3314+
rf"|(\n|.)",
3315+
flags=self.re_flags
33143316
)
3317+
# fmt: on
33153318

33163319
try:
3317-
self.re = re.compile(self.pattern, self.flags)
3320+
self.re = re.compile(self.pattern, self.re_flags)
33183321
self.reString = self.pattern
33193322
self.re_match = self.re.match
33203323
except re.error:
@@ -3325,46 +3328,60 @@ def __init__(
33253328
self.mayReturnEmpty = True
33263329

33273330
def _generateDefaultName(self) -> str:
3328-
if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type):
3329-
return f"string enclosed in {self.quoteChar!r}"
3331+
if self.quote_char == self.end_quote_char and isinstance(
3332+
self.quote_char, str_type
3333+
):
3334+
return f"string enclosed in {self.quote_char!r}"
33303335

3331-
return f"quoted string, starting with {self.quoteChar} ending with {self.endQuoteChar}"
3336+
return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
33323337

33333338
def parseImpl(self, instring, loc, doActions=True):
3339+
# check first character of opening quote to see if that is a match
3340+
# before doing the more complicated regex match
33343341
result = (
3335-
instring[loc] == self.firstQuoteChar
3342+
instring[loc] == self.first_quote_char
33363343
and self.re_match(instring, loc)
33373344
or None
33383345
)
33393346
if not result:
33403347
raise ParseException(instring, loc, self.errmsg, self)
33413348

3349+
# get ending loc and matched string from regex matching result
33423350
loc = result.end()
33433351
ret = result.group()
33443352

3345-
if self.unquoteResults:
3353+
if self.unquote_results:
33463354
# strip off quotes
3347-
ret = ret[self.quoteCharLen : -self.endQuoteCharLen]
3355+
ret = ret[self.quote_char_len : -self.end_quote_char_len]
33483356

33493357
if isinstance(ret, str_type):
3350-
if self.convertWhitespaceEscapes:
3358+
# fmt: off
3359+
if self.convert_whitespace_escapes:
3360+
# as we iterate over matches in the input string,
3361+
# collect from whichever match group of the unquote_scan_re
3362+
# regex matches (only 1 group will match at any given time)
33513363
ret = "".join(
3352-
self.ws_map[match.group(1)]
3353-
if match.group(1)
3354-
else match.group(2)[-1]
3355-
if match.group(2)
3364+
# match group 1 matches \t, \n, etc.
3365+
self.ws_map[match.group(1)] if match.group(1)
3366+
# match group 2 matches escaped characters
3367+
else match.group(2)[-1] if match.group(2)
3368+
# match group 3 matches any character
33563369
else match.group(3)
33573370
for match in self.unquote_scan_re.finditer(ret)
33583371
)
33593372
else:
33603373
ret = "".join(
3361-
match.group(1)[-1] if match.group(1) else match.group(2)
3374+
# match group 1 matches escaped characters
3375+
match.group(1)[-1] if match.group(1)
3376+
# match group 2 matches any character
3377+
else match.group(2)
33623378
for match in self.unquote_scan_re.finditer(ret)
33633379
)
3380+
# fmt: on
33643381

33653382
# replace escaped quotes
3366-
if self.escQuote:
3367-
ret = ret.replace(self.escQuote, self.endQuoteChar)
3383+
if self.esc_quote:
3384+
ret = ret.replace(self.esc_quote, self.end_quote_char)
33683385

33693386
return loc, ret
33703387

tests/test_unit.py

+11
Original file line numberDiff line numberDiff line change
@@ -3448,6 +3448,17 @@ def testParseResultsExtendWithParseResults(self):
34483448
result1, expected, msg="issue with ParseResults.extend(ParseResults)"
34493449
)
34503450

3451+
def testQuotedStringLoc(self):
3452+
expr = pp.QuotedString("'")
3453+
expr.add_parse_action(lambda t: t[0].upper())
3454+
3455+
test_string = "Using 'quotes' for 'sarcasm' or 'emphasis' is not good 'style'."
3456+
transformed = expr.transform_string(test_string)
3457+
print(test_string)
3458+
print(transformed)
3459+
expected = re.sub(r"'([^']+)'", lambda match: match[1].upper(), test_string)
3460+
self.assertEqual(expected, transformed)
3461+
34513462
def testParseResultsWithNestedNames(self):
34523463
from pyparsing import (
34533464
Dict,

0 commit comments

Comments
 (0)