html5lib
diff --git a/‎debug-info.py
Lines changed: 0 additions & 2 deletions b/‎debug-info.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎doc/conf.py
Lines changed: 1 addition & 2 deletions b/‎doc/conf.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎html5lib/__init__.py
Lines changed: 0 additions & 1 deletion b/‎html5lib/__init__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎html5lib/_ihatexml.py
Lines changed: 1 addition & 3 deletions b/‎html5lib/_ihatexml.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎html5lib/_inputstream.py
Lines changed: 12 additions & 14 deletions b/‎html5lib/_inputstream.py
Lines changed: 12 additions & 14 deletions
diff --git a/‎html5lib/_tokenizer.py
Lines changed: 2 additions & 4 deletions b/‎html5lib/_tokenizer.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎html5lib/_trie/__init__.py
Lines changed: 0 additions & 2 deletions b/‎html5lib/_trie/__init__.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎html5lib/_trie/_base.py
Lines changed: 1 addition & 3 deletions b/‎html5lib/_trie/_base.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎html5lib/_trie/py.py
Lines changed: 1 addition & 2 deletions b/‎html5lib/_trie/py.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎html5lib/_utils.py
Lines changed: 3 additions & 11 deletions b/‎html5lib/_utils.py
Lines changed: 3 additions & 11 deletions
diff --git a/‎html5lib/constants.py
Lines changed: 0 additions & 2 deletions b/‎html5lib/constants.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎html5lib/filters/alphabeticalattributes.py
Lines changed: 0 additions & 2 deletions b/‎html5lib/filters/alphabeticalattributes.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎html5lib/filters/base.py
Lines changed: 1 addition & 4 deletions b/‎html5lib/filters/base.py
Lines changed: 1 addition & 4 deletions
diff --git a/‎html5lib/filters/inject_meta_charset.py
Lines changed: 0 additions & 2 deletions b/‎html5lib/filters/inject_meta_charset.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎html5lib/filters/lint.py
Lines changed: 17 additions & 19 deletions b/‎html5lib/filters/lint.py
Lines changed: 17 additions & 19 deletions
diff --git a/‎html5lib/filters/optionaltags.py
Lines changed: 0 additions & 2 deletions b/‎html5lib/filters/optionaltags.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎html5lib/filters/sanitizer.py
Lines changed: 3 additions & 4 deletions b/‎html5lib/filters/sanitizer.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎html5lib/filters/whitespace.py
Lines changed: 0 additions & 2 deletions b/‎html5lib/filters/whitespace.py
Lines changed: 0 additions & 2 deletions
@@ -1,5 +1,3 @@
-from __future__ import print_function, unicode_literals
-
 import platform
 import sys
 
 
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 #
 # html5lib documentation build configuration file, created by
 # sphinx-quickstart on Wed May  8 00:04:49 2013.
@@ -92,7 +91,7 @@
 ]
 
 
-class CExtMock(object):
+class CExtMock:
     """Required for autodoc on readthedocs.org where you cannot build C extensions."""
     def __init__(self, *args, **kwargs):
         pass
 
@@ -20,7 +20,6 @@
 * :func:`~.serializer.serialize`
 """
 
-from __future__ import absolute_import, division, unicode_literals
 
 from .html5parser import HTMLParser, parse, parseFragment
 from .treebuilders import getTreeBuilder
 
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import re
 import warnings
 
@@ -184,7 +182,7 @@ def escapeRegexp(string):
 nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
 
 
-class InfosetFilter(object):
+class InfosetFilter:
     replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
 
     def __init__(self,
 
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from six import text_type
 from six.moves import http_client, urllib
 
@@ -14,9 +12,9 @@
 from . import _utils
 
 # Non-unicode versions of constants for use in the pre-parser
-spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
-asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
-asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
+spaceCharactersBytes = frozenset(item.encode("ascii") for item in spaceCharacters)
+asciiLettersBytes = frozenset(item.encode("ascii") for item in asciiLetters)
+asciiUppercaseBytes = frozenset(item.encode("ascii") for item in asciiUppercase)
 spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
 
 
@@ -48,7 +46,7 @@
 charsUntilRegEx = {}
 
 
-class BufferedStream(object):
+class BufferedStream:
     """Buffering for streams that do not have buffering of their own
 
     The buffer is implemented as a list of chunks on the assumption that
@@ -86,7 +84,7 @@ def read(self, bytes):
             return self._readFromBuffer(bytes)
 
     def _bufferedBytes(self):
-        return sum([len(item) for item in self.buffer])
+        return sum(len(item) for item in self.buffer)
 
     def _readStream(self, bytes):
         data = self.stream.read(bytes)
@@ -131,9 +129,9 @@ def HTMLInputStream(source, **kwargs):
          isinstance(source.fp, http_client.HTTPResponse))):
         isUnicode = False
     elif hasattr(source, "read"):
-        isUnicode = isinstance(source.read(0), text_type)
+        isUnicode = isinstance(source.read(0), str)
     else:
-        isUnicode = isinstance(source, text_type)
+        isUnicode = isinstance(source, str)
 
     if isUnicode:
         encodings = [x for x in kwargs if x.endswith("_encoding")]
@@ -145,7 +143,7 @@ def HTMLInputStream(source, **kwargs):
         return HTMLBinaryInputStream(source, **kwargs)
 
 
-class HTMLUnicodeInputStream(object):
+class HTMLUnicodeInputStream:
     """Provides a unicode stream of characters to the HTMLTokenizer.
 
     This class takes care of character encoding and removing or replacing
@@ -325,7 +323,7 @@ def charsUntil(self, characters, opposite=False):
             if __debug__:
                 for c in characters:
                     assert(ord(c) < 128)
-            regex = "".join(["\\x%02x" % ord(c) for c in characters])
+            regex = "".join("\\x%02x" % ord(c) for c in characters)
             if not opposite:
                 regex = "^%s" % regex
             chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
@@ -524,7 +522,7 @@ def changeEncoding(self, newEncoding):
             self.rawStream.seek(0)
             self.charEncoding = (newEncoding, "certain")
             self.reset()
-            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+            raise _ReparseException("Encoding changed from {} to {}".format(self.charEncoding[0], newEncoding))
 
     def detectBOM(self):
         """Attempts to detect at BOM at the start of the stream. If
@@ -673,7 +671,7 @@ def jumpTo(self, bytes):
         return True
 
 
-class EncodingParser(object):
+class EncodingParser:
     """Mini parser for detecting character encoding from meta elements"""
 
     def __init__(self, data):
@@ -861,7 +859,7 @@ def getAttribute(self):
                 attrValue.append(c)
 
 
-class ContentAttrParser(object):
+class ContentAttrParser:
     def __init__(self, data):
         assert isinstance(data, bytes)
         self.data = data
 
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from six import unichr as chr
 
 from collections import deque, OrderedDict
@@ -24,7 +22,7 @@
     attributeMap = OrderedDict
 
 
-class HTMLTokenizer(object):
+class HTMLTokenizer:
     """ This class takes care of tokenizing HTML.
 
     * self.currentToken
@@ -50,7 +48,7 @@ def __init__(self, stream, parser=None, **kwargs):
 
         # The current token being created
         self.currentToken = None
-        super(HTMLTokenizer, self).__init__()
+        super().__init__()
 
     def __iter__(self):
         """ This is where the magic happens.
 
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from .py import Trie
 
 __all__ = ["Trie"]
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from collections.abc import Mapping
 
 
@@ -8,7 +6,7 @@ class Trie(Mapping):
 
     def keys(self, prefix=None):
         # pylint:disable=arguments-differ
-        keys = super(Trie, self).keys()
+        keys = super().keys()
 
         if prefix is None:
             return set(keys)
 
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
 from bisect import bisect_left
@@ -8,7 +7,7 @@
 
 class Trie(ABCTrie):
     def __init__(self, data):
-        if not all(isinstance(x, text_type) for x in data.keys()):
+        if not all(isinstance(x, str) for x in data.keys()):
             raise TypeError("All keys must be strings")
 
         self._data = data
 
@@ -1,18 +1,10 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from types import ModuleType
 
 from collections.abc import Mapping
 
 from six import text_type, PY3
 
-if PY3:
-    import xml.etree.ElementTree as default_etree
-else:
-    try:
-        import xml.etree.cElementTree as default_etree
-    except ImportError:
-        import xml.etree.ElementTree as default_etree
+import xml.etree.ElementTree as default_etree
 
 
 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
@@ -28,10 +20,10 @@
 # escapes.
 try:
     _x = eval('"\\uD800"')  # pylint:disable=eval-used
-    if not isinstance(_x, text_type):
+    if not isinstance(_x, str):
         # We need this with u"" because of http://bugs.jython.org/issue2039
         _x = eval('u"\\uD800"')  # pylint:disable=eval-used
-        assert isinstance(_x, text_type)
+        assert isinstance(_x, str)
 except Exception:
     supports_lone_surrogates = False
 else:
 
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import string
 
 EOF = None
 
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from . import base
 
 from collections import OrderedDict
 
@@ -1,7 +1,4 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-class Filter(object):
+class Filter:
     def __init__(self, source):
         self.source = source
 
 
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from . import base
 
 
 
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from six import text_type
 
 from . import base
@@ -23,7 +21,7 @@ def __init__(self, source, require_matching_tags=True):
         :arg require_matching_tags: whether or not to require matching tags
 
         """
-        super(Filter, self).__init__(source)
+        super().__init__(source)
         self.require_matching_tags = require_matching_tags
 
     def __iter__(self):
@@ -33,9 +31,9 @@ def __iter__(self):
             if type in ("StartTag", "EmptyTag"):
                 namespace = token["namespace"]
                 name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace is None or isinstance(namespace, str)
                 assert namespace != ""
-                assert isinstance(name, text_type)
+                assert isinstance(name, str)
                 assert name != ""
                 assert isinstance(token["data"], dict)
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
@@ -45,49 +43,49 @@ def __iter__(self):
                 if type == "StartTag" and self.require_matching_tags:
                     open_elements.append((namespace, name))
                 for (namespace, name), value in token["data"].items():
-                    assert namespace is None or isinstance(namespace, text_type)
+                    assert namespace is None or isinstance(namespace, str)
                     assert namespace != ""
-                    assert isinstance(name, text_type)
+                    assert isinstance(name, str)
                     assert name != ""
-                    assert isinstance(value, text_type)
+                    assert isinstance(value, str)
 
             elif type == "EndTag":
                 namespace = token["namespace"]
                 name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace is None or isinstance(namespace, str)
                 assert namespace != ""
-                assert isinstance(name, text_type)
+                assert isinstance(name, str)
                 assert name != ""
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
-                    assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
+                    assert False, "Void element reported as EndTag token: {tag}".format(tag=name)
                 elif self.require_matching_tags:
                     start = open_elements.pop()
                     assert start == (namespace, name)
 
             elif type == "Comment":
                 data = token["data"]
-                assert isinstance(data, text_type)
+                assert isinstance(data, str)
 
             elif type in ("Characters", "SpaceCharacters"):
                 data = token["data"]
-                assert isinstance(data, text_type)
+                assert isinstance(data, str)
                 assert data != ""
                 if type == "SpaceCharacters":
                     assert data.strip(spaceCharacters) == ""
 
             elif type == "Doctype":
                 name = token["name"]
-                assert name is None or isinstance(name, text_type)
-                assert token["publicId"] is None or isinstance(name, text_type)
-                assert token["systemId"] is None or isinstance(name, text_type)
+                assert name is None or isinstance(name, str)
+                assert token["publicId"] is None or isinstance(name, str)
+                assert token["systemId"] is None or isinstance(name, str)
 
             elif type == "Entity":
-                assert isinstance(token["name"], text_type)
+                assert isinstance(token["name"], str)
 
             elif type == "SerializerError":
-                assert isinstance(token["data"], text_type)
+                assert isinstance(token["data"], str)
 
             else:
-                assert False, "Unknown token type: %(type)s" % {"type": type}
+                assert False, "Unknown token type: {type}".format(type=type)
 
             yield token
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from . import base
 
 
 
@@ -6,7 +6,6 @@
 if Bleach is unsuitable for your needs.
 
 """
-from __future__ import absolute_import, division, unicode_literals
 
 import re
 import warnings
@@ -766,7 +765,7 @@ def __init__(self,
             hrefs--these are removed
 
         """
-        super(Filter, self).__init__(source)
+        super().__init__(source)
 
         warnings.warn(_deprecation_msg, DeprecationWarning)
 
@@ -874,8 +873,8 @@ def disallowed_token(self, token):
             assert token_type in ("StartTag", "EmptyTag")
             attrs = []
             for (ns, name), v in token["data"].items():
-                attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
-            token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
+                attrs.append(' {}="{}"'.format(name if ns is None else "{}:{}".format(prefixes[ns], name), escape(v)))
+            token["data"] = "<{}{}>".format(token["name"], ''.join(attrs))
         else:
             token["data"] = "<%s>" % token["name"]
         if token.get("selfClosing"):
 
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import re
 
 from . import base
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-from __future__ import print_function, unicode_literals`
`2`		`-`
`3`	`1`	`import platform`
`4`	`2`	`import sys`
`5`	`3`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`#!/usr/bin/env python3`
`2`		`-# -- coding: utf-8 --`
`3`	`2`	`#`
`4`	`3`	`# html5lib documentation build configuration file, created by`
`5`	`4`	`# sphinx-quickstart on Wed May 8 00:04:49 2013.`
`@@ -92,7 +91,7 @@`
`92`	`91`	`]`
`93`	`92`
`94`	`93`
`95`		`-class CExtMock(object):`
	`94`	`+class CExtMock:`
`96`	`95`	`"""Required for autodoc on readthedocs.org where you cannot build C extensions."""`
`97`	`96`	`def __init__(self, args, *kwargs):`
`98`	`97`	`pass`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-from __future__ import absolute_import, division, unicode_literals`
`2`		`-`
`3`	`1`	`import string`
`4`	`2`
`5`	`3`	`EOF = None`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-from __future__ import absolute_import, division, unicode_literals`
`2`		`-`
`3`	`1`	`from . import base`
`4`	`2`
`5`	`3`	`from collections import OrderedDict`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-from __future__ import absolute_import, division, unicode_literals`
`2`		`-`
`3`	`1`	`import re`
`4`	`2`
`5`	`3`	`from . import base`