@@ -471,7 +471,7 @@ class EncodingBytes(str):
471
471
If the position is ever greater than the string length then an exception is
472
472
raised"""
473
473
def __new__ (self , value ):
474
- return str .__new__ (self , value )
474
+ return str .__new__ (self , value . lower () )
475
475
476
476
def __init__ (self , value ):
477
477
self ._position = - 1
@@ -539,14 +539,12 @@ def skipUntil(self, chars):
539
539
self ._position = p
540
540
return None
541
541
542
- def matchBytes (self , bytes , lower = False ):
542
+ def matchBytes (self , bytes ):
543
543
"""Look for a sequence of bytes at the start of a string. If the bytes
544
544
are found return True and advance the position to the byte after the
545
545
match. Otherwise return False and leave the position alone"""
546
546
p = self .position
547
547
data = self [p :p + len (bytes )]
548
- if lower :
549
- data = data .lower ()
550
548
rv = data .startswith (bytes )
551
549
if rv :
552
550
self .position += len (bytes )
@@ -557,6 +555,9 @@ def jumpTo(self, bytes):
557
555
a match is found advance the position to the last byte of the match"""
558
556
newPosition = self [self .position :].find (bytes )
559
557
if newPosition > - 1 :
558
+ # XXX: This is ugly, but I can't see a nicer way to fix this.
559
+ if self ._position == - 1 :
560
+ self ._position = 0
560
561
self ._position += (newPosition + len (bytes )- 1 )
561
562
return True
562
563
else :
@@ -581,7 +582,7 @@ def getEncoding(self):
581
582
for byte in self .data :
582
583
keepParsing = True
583
584
for key , method in methodDispatch :
584
- if self .data .matchBytes (key , lower = True ):
585
+ if self .data .matchBytes (key ):
585
586
try :
586
587
keepParsing = method ()
587
588
break
@@ -659,72 +660,75 @@ def getAttribute(self):
659
660
"""Return a name,value pair for the next attribute in the stream,
660
661
if one is found, or None"""
661
662
data = self .data
663
+ # Step 1 (skip chars)
662
664
c = data .skip (spaceCharactersBytes | frozenset ("/" ))
663
- if c == "<" :
664
- data .previous ()
665
- return None
666
- elif c == ">" or c is None :
665
+ # Step 2
666
+ if c in (">" , None ):
667
667
return None
668
+ # Step 3
668
669
attrName = []
669
670
attrValue = []
670
- spaceFound = False
671
- #Step 5 attribute name
671
+ #Step 4 attribute name
672
672
while True :
673
673
if c == "=" and attrName :
674
674
break
675
675
elif c in spaceCharactersBytes :
676
- spaceFound = True
676
+ #Step 6!
677
+ c = data .skip ()
678
+ c = data .next ()
677
679
break
678
- elif c in ("/" , "<" , " >" ):
680
+ elif c in ("/" , ">" ):
679
681
return "" .join (attrName ), ""
680
682
elif c in asciiUppercaseBytes :
681
683
attrName .append (c .lower ())
684
+ elif c == None :
685
+ return None
682
686
else :
683
687
attrName .append (c )
684
- #Step 6
688
+ #Step 5
685
689
c = data .next ()
686
690
#Step 7
687
- if spaceFound :
688
- c = data .skip ()
689
- #Step 8
690
- if c != "=" :
691
- data .previous ()
692
- return "" .join (attrName ), ""
693
- #XXX need to advance position in both spaces and value case
694
- #Step 9
691
+ if c != "=" :
692
+ data .previous ()
693
+ return "" .join (attrName ), ""
694
+ #Step 8
695
695
data .next ()
696
- #Step 10
696
+ #Step 9
697
697
c = data .skip ()
698
- #Step 11
698
+ #Step 10
699
699
if c in ("'" , '"' ):
700
- #11 .1
700
+ #10 .1
701
701
quoteChar = c
702
702
while True :
703
- #11.3
703
+ #10.2
704
704
c = data .next ()
705
+ #10.3
705
706
if c == quoteChar :
706
707
data .next ()
707
708
return "" .join (attrName ), "" .join (attrValue )
708
- #11 .4
709
+ #10 .4
709
710
elif c in asciiUppercaseBytes :
710
711
attrValue .append (c .lower ())
711
- #11 .5
712
+ #10 .5
712
713
else :
713
714
attrValue .append (c )
714
- elif c in ( ">" , "<" ) :
715
+ elif c == ">" :
715
716
return "" .join (attrName ), ""
716
717
elif c in asciiUppercaseBytes :
717
718
attrValue .append (c .lower ())
718
719
elif c is None :
719
720
return None
720
721
else :
721
722
attrValue .append (c )
723
+ # Step 11
722
724
while True :
723
725
c = data .next ()
724
726
if c in spacesAngleBrackets :
725
727
return "" .join (attrName ), "" .join (attrValue )
726
728
elif c in asciiUppercaseBytes :
727
729
attrValue .append (c .lower ())
730
+ elif c is None :
731
+ return None
728
732
else :
729
733
attrValue .append (c )
730
734
@@ -734,10 +738,6 @@ def __init__(self, data):
734
738
self .data = data
735
739
def parse (self ):
736
740
try :
737
- #Skip to the first ";"
738
- self .data .jumpTo (";" )
739
- self .data .position += 1
740
- self .data .skip ()
741
741
#Check if the attr name is charset
742
742
#otherwise return
743
743
self .data .jumpTo ("charset" )
@@ -753,8 +753,10 @@ def parse(self):
753
753
quoteMark = self .data .currentByte
754
754
self .data .position += 1
755
755
oldPosition = self .data .position
756
- self .data .jumpTo (quoteMark )
757
- return self .data [oldPosition :self .data .position ]
756
+ if self .data .jumpTo (quoteMark ):
757
+ return self .data [oldPosition :self .data .position ]
758
+ else :
759
+ return None
758
760
else :
759
761
#Unquoted value
760
762
oldPosition = self .data .position
0 commit comments