html5lib
diff --git a/‎html5lib/__init__.py
Lines changed: 7 additions & 6 deletions b/‎html5lib/__init__.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎html5lib/constants.py
Lines changed: 543 additions & 542 deletions b/‎html5lib/constants.py
Lines changed: 543 additions & 542 deletions
diff --git a/‎html5lib/filters/_base.py
Lines changed: 3 additions & 0 deletions b/‎html5lib/filters/_base.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎html5lib/filters/inject_meta_charset.py
Lines changed: 27 additions & 24 deletions b/‎html5lib/filters/inject_meta_charset.py
Lines changed: 27 additions & 24 deletions
diff --git a/‎html5lib/filters/lint.py
Lines changed: 35 additions & 33 deletions b/‎html5lib/filters/lint.py
Lines changed: 35 additions & 33 deletions
@@ -1,4 +1,4 @@
-""" 
+u""" 
 HTML parsing library based on the WHATWG "HTML5"
 specification. The parser is designed to be compatible with existing
 HTML found in the wild and implements well-defined error recovery that
@@ -10,8 +10,9 @@
 f = open("my_document.html")
 tree = html5lib.parse(f) 
 """
-__version__ = "0.95-dev"
-from html5parser import HTMLParser, parse, parseFragment
-from treebuilders import getTreeBuilder
-from treewalkers import getTreeWalker
-from serializer import serialize
+from __future__ import absolute_import
+__version__ = u"0.95-dev"
+from .html5parser import HTMLParser, parse, parseFragment
+from .treebuilders import getTreeBuilder
+from .treewalkers import getTreeWalker
+from .serializer import serialize
@@ -2,9 +2,12 @@
 class Filter(object):
     def __init__(self, source):
         self.source = source
+    __init__.func_annotations = {}
 
     def __iter__(self):
         return iter(self.source)
+    __iter__.func_annotations = {}
 
     def __getattr__(self, name):
         return getattr(self.source, name)
+    __getattr__.func_annotations = {}
@@ -1,62 +1,65 @@
-import _base
+from __future__ import absolute_import
+from . import _base
 
 class Filter(_base.Filter):
     def __init__(self, source, encoding):
         _base.Filter.__init__(self, source)
         self.encoding = encoding
+    __init__.func_annotations = {}
 
     def __iter__(self):
-        state = "pre_head"
+        state = u"pre_head"
         meta_found = (self.encoding is None)
         pending = []
 
         for token in _base.Filter.__iter__(self):
-            type = token["type"]
-            if type == "StartTag":
-                if token["name"].lower() == u"head":
-                    state = "in_head"
+            type = token[u"type"]
+            if type == u"StartTag":
+                if token[u"name"].lower() == u"head":
+                    state = u"in_head"
 
-            elif type == "EmptyTag":
-                if token["name"].lower() == u"meta":
+            elif type == u"EmptyTag":
+                if token[u"name"].lower() == u"meta":
                    # replace charset with actual encoding
                    has_http_equiv_content_type = False
-                   for (namespace,name),value in token["data"].iteritems():
+                   for (namespace,name),value in token[u"data"].items():
                        if namespace != None:
                            continue
                        elif name.lower() == u'charset':
-                          token["data"][(namespace,name)] = self.encoding
+                          token[u"data"][(namespace,name)] = self.encoding
                           meta_found = True
                           break
                        elif name == u'http-equiv' and value.lower() == u'content-type':
                            has_http_equiv_content_type = True
                    else:
-                       if has_http_equiv_content_type and (None, u"content") in token["data"]:
-                           token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
+                       if has_http_equiv_content_type and (None, u"content") in token[u"data"]:
+                           token[u"data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
                            meta_found = True
 
-                elif token["name"].lower() == u"head" and not meta_found:
+                elif token[u"name"].lower() == u"head" and not meta_found:
                     # insert meta into empty head
-                    yield {"type": "StartTag", "name": u"head",
-                           "data": token["data"]}
-                    yield {"type": "EmptyTag", "name": u"meta",
-                           "data": {(None, u"charset"): self.encoding}}
-                    yield {"type": "EndTag", "name": u"head"}
+                    yield {u"type": u"StartTag", u"name": u"head",
+                           u"data": token[u"data"]}
+                    yield {u"type": u"EmptyTag", u"name": u"meta",
+                           u"data": {(None, u"charset"): self.encoding}}
+                    yield {u"type": u"EndTag", u"name": u"head"}
                     meta_found = True
                     continue
 
-            elif type == "EndTag":
-                if token["name"].lower() == u"head" and pending:
+            elif type == u"EndTag":
+                if token[u"name"].lower() == u"head" and pending:
                     # insert meta into head (if necessary) and flush pending queue
                     yield pending.pop(0)
                     if not meta_found:
-                        yield {"type": "EmptyTag", "name": u"meta",
-                               "data": {(None, u"charset"): self.encoding}}
+                        yield {u"type": u"EmptyTag", u"name": u"meta",
+                               u"data": {(None, u"charset"): self.encoding}}
                     while pending:
                         yield pending.pop(0)
                     meta_found = True
-                    state = "post_head"
+                    state = u"post_head"
 
-            if state == "in_head":
+            if state == u"in_head":
                 pending.append(token)
             else:
                 yield token
+    __iter__.func_annotations = {}
@@ -1,7 +1,8 @@
+from __future__ import absolute_import
 from gettext import gettext
 _ = gettext
 
-import _base
+from . import _base
 from html5lib.constants import cdataElements, rcdataElements, voidElements
 
 from html5lib.constants import spaceCharacters
@@ -12,39 +13,39 @@ class LintError(Exception): pass
 class Filter(_base.Filter):
     def __iter__(self):
         open_elements = []
-        contentModelFlag = "PCDATA"
+        contentModelFlag = u"PCDATA"
         for token in _base.Filter.__iter__(self):
-            type = token["type"]
-            if type in ("StartTag", "EmptyTag"):
-                name = token["name"]
-                if contentModelFlag != "PCDATA":
-                    raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
+            type = token[u"type"]
+            if type in (u"StartTag", u"EmptyTag"):
+                name = token[u"name"]
+                if contentModelFlag != u"PCDATA":
+                    raise LintError(_(u"StartTag not in PCDATA content model flag: %s") % name)
                 if not isinstance(name, unicode):
                     raise LintError(_(u"Tag name is not a string: %r") % name)
                 if not name:
                     raise LintError(_(u"Empty tag name"))
-                if type == "StartTag" and name in voidElements:
+                if type == u"StartTag" and name in voidElements:
                     raise LintError(_(u"Void element reported as StartTag token: %s") % name)
-                elif type == "EmptyTag" and name not in voidElements:
-                    raise LintError(_(u"Non-void element reported as EmptyTag token: %s") % token["name"])
-                if type == "StartTag":
+                elif type == u"EmptyTag" and name not in voidElements:
+                    raise LintError(_(u"Non-void element reported as EmptyTag token: %s") % token[u"name"])
+                if type == u"StartTag":
                     open_elements.append(name)
-                for name, value in token["data"]:
+                for name, value in token[u"data"]:
                     if not isinstance(name, unicode):
-                        raise LintError(_("Attribute name is not a string: %r") % name)
+                        raise LintError(_(u"Attribute name is not a string: %r") % name)
                     if not name:
                         raise LintError(_(u"Empty attribute name"))
                     if not isinstance(value, unicode):
-                        raise LintError(_("Attribute value is not a string: %r") % value)
+                        raise LintError(_(u"Attribute value is not a string: %r") % value)
                 if name in cdataElements:
-                    contentModelFlag = "CDATA"
+                    contentModelFlag = u"CDATA"
                 elif name in rcdataElements:
-                    contentModelFlag = "RCDATA"
-                elif name == "plaintext":
-                    contentModelFlag = "PLAINTEXT"
+                    contentModelFlag = u"RCDATA"
+                elif name == u"plaintext":
+                    contentModelFlag = u"PLAINTEXT"
 
-            elif type == "EndTag":
-                name = token["name"]
+            elif type == u"EndTag":
+                name = token[u"name"]
                 if not isinstance(name, unicode):
                     raise LintError(_(u"Tag name is not a string: %r") % name)
                 if not name:
@@ -54,35 +55,36 @@ def __iter__(self):
                 start_name = open_elements.pop()
                 if start_name != name:
                     raise LintError(_(u"EndTag (%s) does not match StartTag (%s)") % (name, start_name))
-                contentModelFlag = "PCDATA"
+                contentModelFlag = u"PCDATA"
 
-            elif type == "Comment":
-                if contentModelFlag != "PCDATA":
-                    raise LintError(_("Comment not in PCDATA content model flag"))
+            elif type == u"Comment":
+                if contentModelFlag != u"PCDATA":
+                    raise LintError(_(u"Comment not in PCDATA content model flag"))
 
-            elif type in ("Characters", "SpaceCharacters"):
-                data = token["data"]
+            elif type in (u"Characters", u"SpaceCharacters"):
+                data = token[u"data"]
                 if not isinstance(data, unicode):
-                    raise LintError(_("Attribute name is not a string: %r") % data)
+                    raise LintError(_(u"Attribute name is not a string: %r") % data)
                 if not data:
                     raise LintError(_(u"%s token with empty data") % type)
-                if type == "SpaceCharacters":
+                if type == u"SpaceCharacters":
                     data = data.strip(spaceCharacters)
                     if data:
                         raise LintError(_(u"Non-space character(s) found in SpaceCharacters token: ") % data)
 
-            elif type == "Doctype":
-                name = token["name"]
-                if contentModelFlag != "PCDATA":
-                    raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
+            elif type == u"Doctype":
+                name = token[u"name"]
+                if contentModelFlag != u"PCDATA":
+                    raise LintError(_(u"Doctype not in PCDATA content model flag: %s") % name)
                 if not isinstance(name, unicode):
                     raise LintError(_(u"Tag name is not a string: %r") % name)
                 # XXX: what to do with token["data"] ?
 
-            elif type in ("ParseError", "SerializeError"):
+            elif type in (u"ParseError", u"SerializeError"):
                 pass
 
             else:
                 raise LintError(_(u"Unknown token type: %s") % type)
 
             yield token
+    __iter__.func_annotations = {}