@@ -3224,97 +3224,100 @@ def __init__(
3224
3224
convertWhitespaceEscapes : bool = True ,
3225
3225
):
3226
3226
super ().__init__ ()
3227
- escChar = escChar or esc_char
3228
- escQuote = escQuote or esc_quote
3229
- unquoteResults = unquoteResults and unquote_results
3230
- endQuoteChar = endQuoteChar or end_quote_char
3231
- convertWhitespaceEscapes = (
3227
+ esc_char = escChar or esc_char
3228
+ esc_quote = escQuote or esc_quote
3229
+ unquote_results = unquoteResults and unquote_results
3230
+ end_quote_char = endQuoteChar or end_quote_char
3231
+ convert_whitespace_escapes = (
3232
3232
convertWhitespaceEscapes and convert_whitespace_escapes
3233
3233
)
3234
3234
quote_char = quoteChar or quote_char
3235
3235
3236
- # remove white space from quote chars - wont work anyway
3236
+ # remove white space from quote chars
3237
3237
quote_char = quote_char .strip ()
3238
3238
if not quote_char :
3239
3239
raise ValueError ("quote_char cannot be the empty string" )
3240
3240
3241
- if endQuoteChar is None :
3242
- endQuoteChar = quote_char
3241
+ if end_quote_char is None :
3242
+ end_quote_char = quote_char
3243
3243
else :
3244
- endQuoteChar = endQuoteChar .strip ()
3245
- if not endQuoteChar :
3244
+ end_quote_char = end_quote_char .strip ()
3245
+ if not end_quote_char :
3246
3246
raise ValueError ("end_quote_char cannot be the empty string" )
3247
3247
3248
- self .quoteChar : str = quote_char
3249
- self .quoteCharLen : int = len (quote_char )
3250
- self .firstQuoteChar : str = quote_char [0 ]
3251
- self .endQuoteChar : str = endQuoteChar
3252
- self .endQuoteCharLen : int = len (endQuoteChar )
3253
- self .escChar : str = escChar or ""
3254
- self .escQuote : str = escQuote or ""
3255
- self .unquoteResults : bool = unquoteResults
3256
- self .convertWhitespaceEscapes : bool = convertWhitespaceEscapes
3248
+ self .quote_char : str = quote_char
3249
+ self .quote_char_len : int = len (quote_char )
3250
+ self .first_quote_char : str = quote_char [0 ]
3251
+ self .end_quote_char : str = end_quote_char
3252
+ self .end_quote_char_len : int = len (end_quote_char )
3253
+ self .esc_char : str = esc_char or ""
3254
+ self .has_esc_char : bool = esc_char is not None
3255
+ self .esc_quote : str = esc_quote or ""
3256
+ self .unquote_results : bool = unquote_results
3257
+ self .convert_whitespace_escapes : bool = convert_whitespace_escapes
3257
3258
self .multiline = multiline
3259
+ self .re_flags = re .RegexFlag (0 )
3258
3260
3259
- sep = ""
3260
- inner_pattern = ""
3261
+ # fmt: off
3262
+ # build up re pattern for the content between the quote delimiters
3263
+ inner_pattern = []
3261
3264
3262
- if escQuote :
3263
- inner_pattern += rf"{ sep } (?:{ re .escape (escQuote )} )"
3264
- sep = "|"
3265
+ if esc_quote :
3266
+ inner_pattern .append (rf"(?:{ re .escape (esc_quote )} )" )
3265
3267
3266
- if escChar :
3267
- inner_pattern += rf"{ sep } (?:{ re .escape (escChar )} .)"
3268
- sep = "|"
3268
+ if esc_char :
3269
+ inner_pattern .append (rf"(?:{ re .escape (esc_char )} .)" )
3269
3270
3270
- if len (self .endQuoteChar ) > 1 :
3271
- inner_pattern += (
3272
- f" { sep } (?:"
3271
+ if len (self .end_quote_char ) > 1 :
3272
+ inner_pattern . append (
3273
+ " (?:"
3273
3274
+ "|" .join (
3274
- f"(?:{ re .escape (self .endQuoteChar [:i ])} (?!{ re .escape (self .endQuoteChar [i :])} ))"
3275
- for i in range (len (self .endQuoteChar ) - 1 , 0 , - 1 )
3275
+ f"(?:{ re .escape (self .end_quote_char [:i ])} (?!{ re .escape (self .end_quote_char [i :])} ))"
3276
+ for i in range (len (self .end_quote_char ) - 1 , 0 , - 1 )
3276
3277
)
3277
3278
+ ")"
3278
3279
)
3279
- sep = "|"
3280
3280
3281
- self .flags = re .RegexFlag (0 )
3282
-
3283
- if multiline :
3284
- self .flags = re .MULTILINE | re .DOTALL
3285
- inner_pattern += (
3286
- rf"{ sep } (?:[^{ _escape_regex_range_chars (self .endQuoteChar [0 ])} "
3287
- rf"{ (_escape_regex_range_chars (escChar ) if escChar is not None else '' )} ])"
3281
+ if self .multiline :
3282
+ self .re_flags |= re .MULTILINE | re .DOTALL
3283
+ inner_pattern .append (
3284
+ rf"(?:[^{ _escape_regex_range_chars (self .end_quote_char [0 ])} "
3285
+ rf"{ (_escape_regex_range_chars (esc_char ) if self .has_esc_char else '' )} ])"
3288
3286
)
3289
3287
else :
3290
- inner_pattern += (
3291
- rf"{ sep } (?:[^{ _escape_regex_range_chars (self .endQuoteChar [0 ])} \n\r"
3292
- rf"{ (_escape_regex_range_chars (escChar ) if escChar is not None else '' )} ])"
3288
+ inner_pattern . append (
3289
+ rf"(?:[^{ _escape_regex_range_chars (self .end_quote_char [0 ])} \n\r"
3290
+ rf"{ (_escape_regex_range_chars (esc_char ) if self . has_esc_char else '' )} ])"
3293
3291
)
3294
3292
3295
3293
self .pattern = "" .join (
3296
3294
[
3297
- re .escape (self .quoteChar ),
3295
+ re .escape (self .quote_char ),
3298
3296
"(?:" ,
3299
- inner_pattern ,
3297
+ '|' . join ( inner_pattern ) ,
3300
3298
")*" ,
3301
- re .escape (self .endQuoteChar ),
3299
+ re .escape (self .end_quote_char ),
3302
3300
]
3303
3301
)
3304
3302
3305
- if self .unquoteResults :
3306
- if self .convertWhitespaceEscapes :
3303
+ if self .unquote_results :
3304
+ if self .convert_whitespace_escapes :
3307
3305
self .unquote_scan_re = re .compile (
3308
- rf"({ '|' .join (re .escape (k ) for k in self .ws_map )} )|({ re .escape (self .escChar )} .)|(\n|.)" ,
3309
- flags = self .flags ,
3306
+ rf"({ '|' .join (re .escape (k ) for k in self .ws_map )} )"
3307
+ rf"|({ re .escape (self .esc_char )} .)"
3308
+ rf"|(\n|.)" ,
3309
+ flags = self .re_flags ,
3310
3310
)
3311
3311
else :
3312
3312
self .unquote_scan_re = re .compile (
3313
- rf"({ re .escape (self .escChar )} .)|(\n|.)" , flags = self .flags
3313
+ rf"({ re .escape (self .esc_char )} .)"
3314
+ rf"|(\n|.)" ,
3315
+ flags = self .re_flags
3314
3316
)
3317
+ # fmt: on
3315
3318
3316
3319
try :
3317
- self .re = re .compile (self .pattern , self .flags )
3320
+ self .re = re .compile (self .pattern , self .re_flags )
3318
3321
self .reString = self .pattern
3319
3322
self .re_match = self .re .match
3320
3323
except re .error :
@@ -3325,46 +3328,60 @@ def __init__(
3325
3328
self .mayReturnEmpty = True
3326
3329
3327
3330
def _generateDefaultName (self ) -> str :
3328
- if self .quoteChar == self .endQuoteChar and isinstance (self .quoteChar , str_type ):
3329
- return f"string enclosed in { self .quoteChar !r} "
3331
+ if self .quote_char == self .end_quote_char and isinstance (
3332
+ self .quote_char , str_type
3333
+ ):
3334
+ return f"string enclosed in { self .quote_char !r} "
3330
3335
3331
- return f"quoted string, starting with { self .quoteChar } ending with { self .endQuoteChar } "
3336
+ return f"quoted string, starting with { self .quote_char } ending with { self .end_quote_char } "
3332
3337
3333
3338
def parseImpl (self , instring , loc , doActions = True ):
3339
+ # check first character of opening quote to see if that is a match
3340
+ # before doing the more complicated regex match
3334
3341
result = (
3335
- instring [loc ] == self .firstQuoteChar
3342
+ instring [loc ] == self .first_quote_char
3336
3343
and self .re_match (instring , loc )
3337
3344
or None
3338
3345
)
3339
3346
if not result :
3340
3347
raise ParseException (instring , loc , self .errmsg , self )
3341
3348
3349
+ # get ending loc and matched string from regex matching result
3342
3350
loc = result .end ()
3343
3351
ret = result .group ()
3344
3352
3345
- if self .unquoteResults :
3353
+ if self .unquote_results :
3346
3354
# strip off quotes
3347
- ret = ret [self .quoteCharLen : - self .endQuoteCharLen ]
3355
+ ret = ret [self .quote_char_len : - self .end_quote_char_len ]
3348
3356
3349
3357
if isinstance (ret , str_type ):
3350
- if self .convertWhitespaceEscapes :
3358
+ # fmt: off
3359
+ if self .convert_whitespace_escapes :
3360
+ # as we iterate over matches in the input string,
3361
+ # collect from whichever match group of the unquote_scan_re
3362
+ # regex matches (only 1 group will match at any given time)
3351
3363
ret = "" .join (
3352
- self .ws_map [match .group (1 )]
3353
- if match .group (1 )
3354
- else match .group (2 )[- 1 ]
3355
- if match .group (2 )
3364
+ # match group 1 matches \t, \n, etc.
3365
+ self .ws_map [match .group (1 )] if match .group (1 )
3366
+ # match group 2 matches escaped characters
3367
+ else match .group (2 )[- 1 ] if match .group (2 )
3368
+ # match group 3 matches any character
3356
3369
else match .group (3 )
3357
3370
for match in self .unquote_scan_re .finditer (ret )
3358
3371
)
3359
3372
else :
3360
3373
ret = "" .join (
3361
- match .group (1 )[- 1 ] if match .group (1 ) else match .group (2 )
3374
+ # match group 1 matches escaped characters
3375
+ match .group (1 )[- 1 ] if match .group (1 )
3376
+ # match group 2 matches any character
3377
+ else match .group (2 )
3362
3378
for match in self .unquote_scan_re .finditer (ret )
3363
3379
)
3380
+ # fmt: on
3364
3381
3365
3382
# replace escaped quotes
3366
- if self .escQuote :
3367
- ret = ret .replace (self .escQuote , self .endQuoteChar )
3383
+ if self .esc_quote :
3384
+ ret = ret .replace (self .esc_quote , self .end_quote_char )
3368
3385
3369
3386
return loc , ret
3370
3387
0 commit comments