22
22
import iso639codes
23
23
import rfc3987
24
24
import rfc2046
25
- from html5lib .constants import E , spaceCharacters , digits
25
+ from html5lib .constants import E , spaceCharacters , digits , tokenTypes
26
26
from html5lib import tokenizer
27
27
import gettext
28
28
_ = gettext .gettext
@@ -267,8 +267,9 @@ def __init__(self, stream, encoding, parseMeta, **kwargs):
267
267
self .IDsWeHaveKnownAndLoved = []
268
268
269
269
def __iter__ (self ):
270
+ types = dict ((v ,k ) for k ,v in tokenTypes .iteritems ())
270
271
for token in _base .Filter .__iter__ (self ):
271
- fakeToken = {"type" : token .get ("type" , "-" ),
272
+ fakeToken = {"type" : types . get ( token .get ("type" , "-" ) , "-" ),
272
273
"name" : token .get ("name" , "-" ).capitalize ()}
273
274
method = getattr (self , "validate%(type)s%(name)s" % fakeToken , None )
274
275
if method :
@@ -301,23 +302,23 @@ def validateStartTagInput(self, token):
301
302
attrDict = dict ([(name .lower (), value ) for name , value in token .get ("data" , [])])
302
303
inputType = attrDict .get ("type" , "text" )
303
304
if inputType not in inputTypeAllowedAttributeMap .keys ():
304
- yield {"type" : "ParseError" ,
305
+ yield {"type" : tokenTypes [ "ParseError" ] ,
305
306
"data" : "unknown-input-type" ,
306
307
"datavars" : {"attrValue" : inputType }}
307
308
allowedAttributes = inputTypeAllowedAttributeMap .get (inputType , [])
308
309
for attrName , attrValue in attrDict .items ():
309
310
if attrName not in allowedAttributeMap ['input' ]:
310
- yield {"type" : "ParseError" ,
311
+ yield {"type" : tokenTypes [ "ParseError" ] ,
311
312
"data" : "unknown-attribute" ,
312
313
"datavars" : {"tagName" : "input" ,
313
314
"attributeName" : attrName }}
314
315
elif attrName not in allowedAttributes :
315
- yield {"type" : "ParseError" ,
316
+ yield {"type" : tokenTypes [ "ParseError" ] ,
316
317
"data" : "attribute-not-allowed-on-this-input-type" ,
317
318
"datavars" : {"attributeName" : attrName ,
318
319
"inputType" : inputType }}
319
320
if attrName in inputTypeDeprecatedAttributeMap .get (inputType , []):
320
- yield {"type" : "ParseError" ,
321
+ yield {"type" : tokenTypes [ "ParseError" ] ,
321
322
"data" : "deprecated-attribute" ,
322
323
"datavars" : {"attributeName" : attrName ,
323
324
"inputType" : inputType }}
@@ -330,7 +331,7 @@ def checkUnknownStartTag(self, token):
330
331
# check for recognized tag name
331
332
name = token .get ("name" , "" ).lower ()
332
333
if name not in allowedAttributeMap .keys ():
333
- yield {"type" : "ParseError" ,
334
+ yield {"type" : tokenTypes [ "ParseError" ] ,
334
335
"data" : "unknown-start-tag" ,
335
336
"datavars" : {"tagName" : name }}
336
337
@@ -342,7 +343,7 @@ def checkStartTagRequiredAttributes(self, token):
342
343
in token .get ("data" , [])]
343
344
for attrName in requiredAttributeMap [name ]:
344
345
if attrName not in attrsPresent :
345
- yield {"type" : "ParseError" ,
346
+ yield {"type" : tokenTypes [ "ParseError" ] ,
346
347
"data" : "missing-required-attribute" ,
347
348
"datavars" : {"tagName" : name ,
348
349
"attributeName" : attrName }}
@@ -353,7 +354,7 @@ def checkStartTagUnknownAttributes(self, token):
353
354
allowedAttributes = globalAttributes | allowedAttributeMap .get (name , frozenset (()))
354
355
for attrName , attrValue in token .get ("data" , []):
355
356
if attrName .lower () not in allowedAttributes :
356
- yield {"type" : "ParseError" ,
357
+ yield {"type" : tokenTypes [ "ParseError" ] ,
357
358
"data" : "unknown-attribute" ,
358
359
"datavars" : {"tagName" : name ,
359
360
"attributeName" : attrName }}
@@ -365,40 +366,40 @@ def checkStartTagUnknownAttributes(self, token):
365
366
# def checkURI(self, token, tagName, attrName, attrValue):
366
367
# isValid, errorCode = rfc3987.isValidURI(attrValue)
367
368
# if not isValid:
368
- # yield {"type": "ParseError",
369
+ # yield {"type": tokenTypes[ "ParseError"] ,
369
370
# "data": errorCode,
370
371
# "datavars": {"tagName": tagName,
371
372
# "attributeName": attrName}}
372
- # yield {"type": "ParseError",
373
+ # yield {"type": tokenTypes[ "ParseError"] ,
373
374
# "data": "invalid-attribute-value",
374
375
# "datavars": {"tagName": tagName,
375
376
# "attributeName": attrName}}
376
377
377
378
def checkIRI (self , token , tagName , attrName , attrValue ):
378
379
isValid , errorCode = rfc3987 .isValidIRI (attrValue )
379
380
if not isValid :
380
- yield {"type" : "ParseError" ,
381
+ yield {"type" : tokenTypes [ "ParseError" ] ,
381
382
"data" : errorCode ,
382
383
"datavars" : {"tagName" : tagName ,
383
384
"attributeName" : attrName }}
384
- yield {"type" : "ParseError" ,
385
+ yield {"type" : tokenTypes [ "ParseError" ] ,
385
386
"data" : "invalid-attribute-value" ,
386
387
"datavars" : {"tagName" : tagName ,
387
388
"attributeName" : attrName }}
388
389
389
390
def checkID (self , token , tagName , attrName , attrValue ):
390
391
if not attrValue :
391
- yield {"type" : "ParseError" ,
392
+ yield {"type" : tokenTypes [ "ParseError" ] ,
392
393
"data" : "attribute-value-can-not-be-blank" ,
393
394
"datavars" : {"tagName" : tagName ,
394
395
"attributeName" : attrName }}
395
396
for c in attrValue :
396
397
if c in spaceCharacters :
397
- yield {"type" : "ParseError" ,
398
+ yield {"type" : tokenTypes [ "ParseError" ] ,
398
399
"data" : "space-in-id" ,
399
400
"datavars" : {"tagName" : tagName ,
400
401
"attributeName" : attrName }}
401
- yield {"type" : "ParseError" ,
402
+ yield {"type" : tokenTypes [ "ParseError" ] ,
402
403
"data" : "invalid-attribute-value" ,
403
404
"datavars" : {"tagName" : tagName ,
404
405
"attributeName" : attrName }}
@@ -427,7 +428,7 @@ def checkTokenList(self, tagName, attrName, attrValue):
427
428
valueDict = {}
428
429
for currentValue in valueList :
429
430
if valueDict .has_key (currentValue ):
430
- yield {"type" : "ParseError" ,
431
+ yield {"type" : tokenTypes [ "ParseError" ] ,
431
432
"data" : "duplicate-value-in-token-list" ,
432
433
"datavars" : {"tagName" : tagName ,
433
434
"attributeName" : attrName ,
@@ -437,32 +438,32 @@ def checkTokenList(self, tagName, attrName, attrValue):
437
438
438
439
def checkEnumeratedValue (self , token , tagName , attrName , attrValue , enumeratedValues ):
439
440
if not attrValue and ('' not in enumeratedValues ):
440
- yield {"type" : "ParseError" ,
441
+ yield {"type" : tokenTypes [ "ParseError" ] ,
441
442
"data" : "attribute-value-can-not-be-blank" ,
442
443
"datavars" : {"tagName" : tagName ,
443
444
"attributeName" : attrName }}
444
445
return
445
446
attrValue = attrValue .lower ()
446
447
if attrValue not in enumeratedValues :
447
- yield {"type" : "ParseError" ,
448
+ yield {"type" : tokenTypes [ "ParseError" ] ,
448
449
"data" : "invalid-enumerated-value" ,
449
450
"datavars" : {"tagName" : tagName ,
450
451
"attributeName" : attrName ,
451
452
"enumeratedValues" : tuple (enumeratedValues )}}
452
- yield {"type" : "ParseError" ,
453
+ yield {"type" : tokenTypes [ "ParseError" ] ,
453
454
"data" : "invalid-attribute-value" ,
454
455
"datavars" : {"tagName" : tagName ,
455
456
"attributeName" : attrName }}
456
457
457
458
def checkBoolean (self , token , tagName , attrName , attrValue ):
458
459
enumeratedValues = frozenset ((attrName , '' ))
459
460
if attrValue not in enumeratedValues :
460
- yield {"type" : "ParseError" ,
461
+ yield {"type" : tokenTypes [ "ParseError" ] ,
461
462
"data" : "invalid-boolean-value" ,
462
463
"datavars" : {"tagName" : tagName ,
463
464
"attributeName" : attrName ,
464
465
"enumeratedValues" : tuple (enumeratedValues )}}
465
- yield {"type" : "ParseError" ,
466
+ yield {"type" : tokenTypes [ "ParseError" ] ,
466
467
"data" : "invalid-attribute-value" ,
467
468
"datavars" : {"tagName" : tagName ,
468
469
"attributeName" : attrName }}
@@ -471,7 +472,7 @@ def checkInteger(self, token, tagName, attrName, attrValue):
471
472
sign = 1
472
473
numberString = ''
473
474
state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk')
474
- error = {"type" : "ParseError" ,
475
+ error = {"type" : tokenTypes [ "ParseError" ] ,
475
476
"data" : "invalid-integer-value" ,
476
477
"datavars" : {"tagName" : tagName ,
477
478
"attributeName" : attrName ,
@@ -503,7 +504,7 @@ def checkInteger(self, token, tagName, attrName, attrValue):
503
504
elif state == 'trailing-junk' :
504
505
pass
505
506
if not numberString :
506
- yield {"type" : "ParseError" ,
507
+ yield {"type" : tokenTypes [ "ParseError" ] ,
507
508
"data" : "attribute-value-can-not-be-blank" ,
508
509
"datavars" : {"tagName" : tagName ,
509
510
"attributeName" : attrName }}
@@ -517,15 +518,15 @@ def checkBrowsingContext(self, token, tagName, attrName, attrValue):
517
518
if attrValue [0 ] != '_' : return
518
519
attrValue = attrValue .lower ()
519
520
if attrValue in frozenset (('_self' , '_parent' , '_top' , '_blank' )): return
520
- yield {"type" : "ParseError" ,
521
+ yield {"type" : tokenTypes [ "ParseError" ] ,
521
522
"data" : "invalid-browsing-context" ,
522
523
"datavars" : {"tagName" : tagName ,
523
524
"attributeName" : attrName }}
524
525
525
526
def checkLangCode (self , token , tagName , attrName , attrValue ):
526
527
if not attrValue : return # blank is OK
527
528
if not iso639codes .isValidLangCode (attrValue ):
528
- yield {"type" : "ParseError" ,
529
+ yield {"type" : tokenTypes [ "ParseError" ] ,
529
530
"data" : "invalid-lang-code" ,
530
531
"datavars" : {"tagName" : tagName ,
531
532
"attributeName" : attrName ,
@@ -534,13 +535,13 @@ def checkLangCode(self, token, tagName, attrName, attrValue):
534
535
def checkMIMEType (self , token , tagName , attrName , attrValue ):
535
536
# XXX needs tests
536
537
if not attrValue :
537
- yield {"type" : "ParseError" ,
538
+ yield {"type" : tokenTypes [ "ParseError" ] ,
538
539
"data" : "attribute-value-can-not-be-blank" ,
539
540
"datavars" : {"tagName" : tagName ,
540
541
"attributeName" : attrName }}
541
542
542
543
if not rfc2046 .isValidMIMEType (attrValue ):
543
- yield {"type" : "ParseError" ,
544
+ yield {"type" : tokenTypes [ "ParseError" ] ,
544
545
"data" : "invalid-mime-type" ,
545
546
"datavars" : {"tagName" : tagName ,
546
547
"attributeName" : attrName ,
@@ -556,7 +557,7 @@ def checkLinkRelation(self, token, tagName, attrName, attrValue):
556
557
allowedValues = (tagName == 'link' ) and linkRelValues or aRelValues
557
558
for currentValue in valueList :
558
559
if currentValue not in allowedValues :
559
- yield {"type" : "ParseError" ,
560
+ yield {"type" : tokenTypes [ "ParseError" ] ,
560
561
"data" : "invalid-rel" ,
561
562
"datavars" : {"tagName" : tagName ,
562
563
"attributeName" : attrName }}
@@ -593,7 +594,7 @@ def checkAttributeValues(self, token):
593
594
def validateAttributeValueClass (self , token , tagName , attrName , attrValue ):
594
595
for t in self .checkTokenList (tagName , attrName , attrValue ) or []:
595
596
yield t
596
- yield {"type" : "ParseError" ,
597
+ yield {"type" : tokenTypes [ "ParseError" ] ,
597
598
"data" : "invalid-attribute-value" ,
598
599
"datavars" : {"tagName" : tagName ,
599
600
"attributeName" : attrName }}
@@ -623,7 +624,7 @@ def validateAttributeValueId(self, token, tagName, attrName, attrValue):
623
624
for t in self .checkID (token , tagName , attrName , attrValue ) or []: yield t
624
625
if not attrValue : return
625
626
if attrValue in self .IDsWeHaveKnownAndLoved :
626
- yield {"type" : "ParseError" ,
627
+ yield {"type" : tokenTypes [ "ParseError" ] ,
627
628
"data" : "duplicate-id" ,
628
629
"datavars" : {"tagName" : tagName }}
629
630
self .IDsWeHaveKnownAndLoved .append (attrValue )
@@ -641,7 +642,7 @@ def validateAttributeValueTemplate(self, token, tagName, attrName, attrValue):
641
642
642
643
def validateAttributeValueHtmlXmlns (self , token , tagName , attrName , attrValue ):
643
644
if attrValue != "http://www.w3.org/1999/xhtml" :
644
- yield {"type" : "ParseError" ,
645
+ yield {"type" : tokenTypes [ "ParseError" ] ,
645
646
"data" : "invalid-root-namespace" ,
646
647
"datavars" : {"tagName" : tagName ,
647
648
"attributeName" : attrName }}
@@ -699,7 +700,7 @@ def eof(self):
699
700
# hooray for obscure side effects!
700
701
attrValue = attrsDict .get ("contextmenu" , "" )
701
702
if attrValue and (attrValue not in self .IDsWeHaveKnownAndLoved ):
702
- yield {"type" : "ParseError" ,
703
+ yield {"type" : tokenTypes [ "ParseError" ] ,
703
704
"data" : "id-does-not-exist" ,
704
705
"datavars" : {"tagName" : tagName ,
705
706
"attributeName" : "contextmenu" ,
@@ -710,6 +711,6 @@ def eof(self):
710
711
if not id : continue
711
712
if id == attrValue :
712
713
if refToken .get ("name" , "" ).lower () != "menu" :
713
- yield {"type" : "ParseError" ,
714
+ yield {"type" : tokenTypes [ "ParseError" ] ,
714
715
"data" : "contextmenu-must-point-to-menu" }
715
716
break
0 commit comments