Skip to content

Commit 88c2483

Browse files
author
Mark Pilgrim
committed
initial checkin of non-functional conformance checker
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40963
1 parent 6ee1c5a commit 88c2483

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

src/html5lib/filters/validator.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""HTML 5 conformance checker
2+
3+
Warning: this module is experimental, incomplete, and subject to removal at any time.
4+
5+
Usage:
6+
>>> from html5lib.html5parser import HTMLParser
7+
>>> from html5lib.filters.validator import HTMLConformanceChecker
8+
>>> p = HTMLParser(tokenizer=HTMLConformanceChecker)
9+
>>> p.parse('<!doctype html>\n<html foo=bar></html>')
10+
<<class 'html5lib.treebuilders.simpletree.Document'> None>
11+
>>> p.errors
12+
[((2, 14), 'unrecognized-attribute', {'attributeName': u'foo', 'tagName': u'html'})]
13+
"""
14+
15+
import _base
16+
from html5lib.constants import E
17+
from html5lib import tokenizer
18+
import gettext
19+
_ = gettext.gettext
20+
21+
E.update({
22+
"unrecognized-attribute":
23+
_(u"Unrecognized attribute '%(attrName)s' in <%(tagName)s>"),
24+
})
25+
26+
class HTMLConformanceChecker(_base.Filter):
27+
def __init__(self, stream, encoding, parseMeta, **kwargs):
28+
_base.Filter.__init__(self, tokenizer.HTMLTokenizer(stream, encoding, parseMeta, **kwargs))
29+
30+
def __iter__(self):
31+
for token in _base.Filter.__iter__(self):
32+
type = token["type"]
33+
if type == "StartTag":
34+
name = token["name"].lower()
35+
if name == 'html':
36+
for attrName, attrValue in token["data"]:
37+
if attrName.lower() != 'xmlns':
38+
yield {"type": "ParseError", "data": "unrecognized-attribute", "datavars": {"tagName": name, "attributeName": attrName}}
39+
40+
yield token

0 commit comments

Comments
 (0)