@@ -306,7 +306,7 @@ def characterErrorsUCS2(self, data):
306
306
skip = False
307
307
self .errors .append ("invalid-codepoint" )
308
308
309
- def charsUntil (self , characters , opposite = False ):
309
+ def charsUntilRe (self , characters , opposite = False ):
310
310
""" Returns a string of characters from the stream up to but not
311
311
including any character in 'characters' or EOF. 'characters' must be
312
312
a container that supports the 'in' method and iteration over its
@@ -353,6 +353,43 @@ def charsUntil(self, characters, opposite=False):
353
353
r = "" .join (rv )
354
354
return r
355
355
356
+ def charsUntilNoRe (self , characters , opposite = False ):
357
+ """Identical to charsUntil, but doesn't use re"""
358
+ chars = frozenset (characters )
359
+
360
+ if self .chunkOffset >= self .chunkSize :
361
+ if not self .readChunk ():
362
+ return ""
363
+
364
+ matching = []
365
+ while True :
366
+ end = self .chunkOffset
367
+ if opposite :
368
+ for i in range (self .chunkOffset , self .chunkSize ):
369
+ if self .chunk [i ] in chars :
370
+ end = i + 1
371
+ else :
372
+ break
373
+ else :
374
+ for i in range (self .chunkOffset , self .chunkSize ):
375
+ if self .chunk [i ] not in chars :
376
+ end = i + 1
377
+ else :
378
+ break
379
+
380
+ matching .append (self .chunk [self .chunkOffset :end ])
381
+ self .chunkOffset = end
382
+
383
+ if self .chunkOffset >= self .chunkSize :
384
+ if not self .readChunk ():
385
+ break
386
+ else :
387
+ break
388
+
389
+ r = "" .join (matching )
390
+ assert r is not None
391
+ return r
392
+
356
393
def unget (self , char ):
357
394
# Only one character is allowed to be ungotten at once - it must
358
395
# be consumed again before any further call to unget
0 commit comments