initial checkin of non-functional conformance checker

Mark Pilgrim · Mark Pilgrim · commit 88c2483bae5a · 2007-08-29T03:48:15.000Z
--HG--
extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40963
diff --git a/src/html5lib/filters/validator.py b/src/html5lib/filters/validator.py
@@ -0,0 +1,40 @@
+"""HTML 5 conformance checker
+
+Warning: this module is experimental, incomplete, and subject to removal at any time.
+
+Usage:
+>>> from html5lib.html5parser import HTMLParser
+>>> from html5lib.filters.validator import HTMLConformanceChecker
+>>> p = HTMLParser(tokenizer=HTMLConformanceChecker)
+>>> p.parse('<!doctype html>\n<html foo=bar></html>')
+<<class 'html5lib.treebuilders.simpletree.Document'> None>
+>>> p.errors
+[((2, 14), 'unrecognized-attribute', {'attributeName': u'foo', 'tagName': u'html'})]
+"""
+
+import _base
+from html5lib.constants import E
+from html5lib import tokenizer
+import gettext
+_ = gettext.gettext
+
+E.update({
+    "unrecognized-attribute":
+        _(u"Unrecognized attribute '%(attrName)s' in <%(tagName)s>"),
+})
+
+class HTMLConformanceChecker(_base.Filter):
+    def __init__(self, stream, encoding, parseMeta, **kwargs):
+        _base.Filter.__init__(self, tokenizer.HTMLTokenizer(stream, encoding, parseMeta, **kwargs))
+
+    def __iter__(self):
+        for token in _base.Filter.__iter__(self):
+            type = token["type"]
+            if type == "StartTag":
+                name = token["name"].lower()
+                if name == 'html':
+                    for attrName, attrValue in token["data"]:
+                        if attrName.lower() != 'xmlns':
+                            yield {"type": "ParseError", "data": "unrecognized-attribute", "datavars": {"tagName": name, "attributeName": attrName}}
+
+            yield token