6
6
from constants import encodings
7
7
from utils import MethodDispatcher
8
8
9
+ #Non-unicode versions of constants for use in the pre-parser
10
+ spaceCharactersBytes = [str (item ) for item in spaceCharacters ]
11
+ asciiLettersBytes = [str (item ) for item in asciiLetters ]
12
+ asciiUppercaseBytes = [str (item ) for item in asciiUppercase ]
13
+
9
14
try :
10
15
from collections import deque
11
16
except ImportError :
@@ -357,7 +362,7 @@ def getCurrentByte(self):
357
362
358
363
currentByte = property (getCurrentByte )
359
364
360
- def skip (self , chars = spaceCharacters ):
365
+ def skip (self , chars = spaceCharactersBytes ):
361
366
"""Skip past a list of characters"""
362
367
while self .currentByte in chars :
363
368
self .position += 1
@@ -432,7 +437,7 @@ def handleComment(self):
432
437
return self .data .jumpTo ("-->" )
433
438
434
439
def handleMeta (self ):
435
- if self .data .currentByte not in spaceCharacters :
440
+ if self .data .currentByte not in spaceCharactersBytes :
436
441
#if we have <meta not followed by a space so just keep going
437
442
return True
438
443
#We have a valid meta element we want to search for attributes
@@ -462,7 +467,7 @@ def handlePossibleEndTag(self):
462
467
return self .handlePossibleTag (True )
463
468
464
469
def handlePossibleTag (self , endTag ):
465
- if self .data .currentByte not in asciiLetters :
470
+ if self .data .currentByte not in asciiLettersBytes :
466
471
#If the next byte is not an ascii letter either ignore this
467
472
#fragment (possible start tag case) or treat it according to
468
473
#handleOther
@@ -471,7 +476,7 @@ def handlePossibleTag(self, endTag):
471
476
self .handleOther ()
472
477
return True
473
478
474
- self .data .findNext (list (spaceCharacters ) + ["<" , ">" ])
479
+ self .data .findNext (list (spaceCharactersBytes ) + ["<" , ">" ])
475
480
if self .data .currentByte == "<" :
476
481
#return to the first step in the overall "two step" algorithm
477
482
#reprocessing the < byte
@@ -489,7 +494,7 @@ def handleOther(self):
489
494
def getAttribute (self ):
490
495
"""Return a name,value pair for the next attribute in the stream,
491
496
if one is found, or None"""
492
- self .data .skip (list (spaceCharacters )+ ["/" ])
497
+ self .data .skip (list (spaceCharactersBytes )+ ["/" ])
493
498
if self .data .currentByte == "<" :
494
499
self .data .position -= 1
495
500
return None
@@ -502,12 +507,12 @@ def getAttribute(self):
502
507
while True :
503
508
if self .data .currentByte == "=" and attrName :
504
509
break
505
- elif self .data .currentByte in spaceCharacters :
510
+ elif self .data .currentByte in spaceCharactersBytes :
506
511
spaceFound = True
507
512
break
508
513
elif self .data .currentByte in ("/" , "<" , ">" ):
509
514
return "" .join (attrName ), ""
510
- elif self .data .currentByte in asciiUppercase :
515
+ elif self .data .currentByte in asciiUppercaseBytes :
511
516
attrName .extend (self .data .currentByte .lower ())
512
517
else :
513
518
attrName .extend (self .data .currentByte )
@@ -536,23 +541,23 @@ def getAttribute(self):
536
541
self .data .position += 1
537
542
return "" .join (attrName ), "" .join (attrValue )
538
543
#11.4
539
- elif self .data .currentByte in asciiUppercase :
544
+ elif self .data .currentByte in asciiUppercaseBytes :
540
545
attrValue .extend (self .data .currentByte .lower ())
541
546
#11.5
542
547
else :
543
548
attrValue .extend (self .data .currentByte )
544
549
elif self .data .currentByte in (">" , "<" ):
545
550
return "" .join (attrName ), ""
546
- elif self .data .currentByte in asciiUppercase :
551
+ elif self .data .currentByte in asciiUppercaseBytes :
547
552
attrValue .extend (self .data .currentByte .lower ())
548
553
else :
549
554
attrValue .extend (self .data .currentByte )
550
555
while True :
551
556
self .data .position += 1
552
557
if self .data .currentByte in (
553
- list (spaceCharacters ) + [">" , "<" ]):
558
+ list (spaceCharactersBytes ) + [">" , "<" ]):
554
559
return "" .join (attrName ), "" .join (attrValue )
555
- elif self .data .currentByte in asciiUppercase :
560
+ elif self .data .currentByte in asciiUppercaseBytes :
556
561
attrValue .extend (self .data .currentByte .lower ())
557
562
else :
558
563
attrValue .extend (self .data .currentByte )
@@ -588,7 +593,7 @@ def parse(self):
588
593
#Unquoted value
589
594
oldPosition = self .data .position
590
595
try :
591
- self .data .findNext (spaceCharacters )
596
+ self .data .findNext (spaceCharactersBytes )
592
597
return self .data [oldPosition :self .data .position ]
593
598
except StopIteration :
594
599
#Return the whole remaining value
0 commit comments