@@ -83,9 +83,9 @@ def detectEncoding(self):
83
83
84
84
#If there is no BOM need to look for meta elements with encoding
85
85
#information
86
- # encoding = self.detectEncodingMeta()
87
- # if encoding is not None:
88
- # return encoding
86
+ encoding = self .detectEncodingMeta ()
87
+ if encoding is not None :
88
+ return encoding
89
89
90
90
#Guess with chardet, if avaliable
91
91
try :
@@ -345,6 +345,7 @@ def getAttribute(self):
345
345
attrParser = AttrParser (self .data [self .position :])
346
346
attr = attrParser .parse ()
347
347
self .position += attrParser .position
348
+ #print attr, attrParser.position, self.data[self.position]
348
349
return attr
349
350
350
351
def isValidEncoding (self , encoding ):
@@ -426,6 +427,8 @@ def parse(self):
426
427
class AttrParser (FragmentParser ):
427
428
def parse (self ):
428
429
self .skip (list (spaceCharacters )+ ["/" ])
430
+ if self .position == len (self .fragment ):
431
+ return None
429
432
if self .fragment [self .position ] == "<" :
430
433
self .position -= 1
431
434
return None
@@ -434,41 +437,63 @@ def parse(self):
434
437
attrName = []
435
438
attrValue = []
436
439
spaceFound = False
440
+ #Step 5 attribute name
437
441
while True :
438
- if self .fragment [self .position ] == "=" and attrName :
442
+ if self .position == len (self .fragment ):
443
+ return "" .join (attrName ), ""
444
+ elif self .fragment [self .position ] == "=" and attrName :
439
445
break
440
446
elif self .fragment [self .position ] in spaceCharacters :
441
447
spaceFound = True
442
448
break
443
449
elif self .fragment [self .position ] in ("/" , "<" , ">" ):
444
- self .position -= 1
450
+ # self.position -= 1
445
451
return "" .join (attrName ), ""
446
452
elif self .fragment [self .position ] in asciiUppercase :
447
453
attrName .extend (self .fragment [self .position ].lower ())
448
454
else :
449
455
attrName .extend (self .fragment [self .position ])
456
+ #Step 6
450
457
self .position += 1
458
+ #Step 7
451
459
if spaceFound :
452
460
self .skip ()
461
+ if self .position == len (self .fragment ):
462
+ return "" .join (attrName ), ""
463
+ #Step 8
453
464
if self .fragment [self .position ] != "=" :
454
- self .position -= 1
465
+ # self.position -= 1
455
466
return "" .join (attrName ), ""
456
467
#XXX need to advance positon in both spaces and value case
468
+ #Step 9
457
469
self .position += 1
470
+ #Step 10
458
471
self .skip ()
472
+ #XXX Need to exit if we go past the end of the fragment
473
+ if self .position == len (self .fragment ):
474
+ return "" .join (attrName ), ""
475
+ #Step 11
459
476
if self .fragment [self .position ] in ("'" , '"' ):
477
+ #11.1
460
478
quoteChar = self .fragment [self .position ]
461
- self .position += 1
462
479
while True :
463
- if self .fragment [self .position ] == quoteChar :
480
+ #11.2
481
+ self .position += 1
482
+ if self .position == len (self .fragment ):
483
+ return "" .join (attrName ), "" .join (attrValue )
484
+ #11.3
485
+ elif self .fragment [self .position ] == quoteChar :
486
+ #XXX Not in spec
487
+ self .position += 1
464
488
return "" .join (attrName ), "" .join (attrValue )
489
+ #11.4
465
490
elif self .fragment [self .position ] in asciiUppercase :
466
491
attrValue .extend (self .fragment [self .position ].lower ())
492
+ #11.5
467
493
else :
468
494
attrValue .extend (self .fragment [self .position ])
469
- self .position += 1
470
495
elif self .fragment [self .position ] in (">" , '<' ):
471
- self .position -= 1
496
+ # self.position -= 1
472
497
return "" .join (attrName ), ""
473
498
elif self .fragment [self .position ] in asciiUppercase :
474
499
attrValue .extend (self .fragment [self .position ].lower ())
@@ -477,10 +502,13 @@ def parse(self):
477
502
#XXX I think this next bit is right but there is a bug in the spec
478
503
while True :
479
504
self .position += 1
480
- if self .fragment [self .position ] in (
505
+ if self .position == len (self .fragment ):
506
+ return "" .join (attrName ), "" .join (attrValue )
507
+ elif self .fragment [self .position ] in (
481
508
list (spaceCharacters ) + [">" , '<' ]):
482
- self .position -= 1
483
- return "" .join (attrName ), ""
509
+ #XXX this is wrong
510
+ #self.position -= 1
511
+ return "" .join (attrName ), "" .join (attrValue )
484
512
elif self .fragment [self .position ] in asciiUppercase :
485
513
attrValue .extend (self .fragment [self .position ].lower ())
486
514
else :
0 commit comments