Skip to content

Commit 5d925be

Browse files
committed
Regenerate the Py2 code using awkwardduet 1.1a4.
This finally sorts out the unicode/str mess, so yay!
1 parent c1029a4 commit 5d925be

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+4659
-3743
lines changed

html5lib/__init__.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""
1+
u"""
22
HTML parsing library based on the WHATWG "HTML5"
33
specification. The parser is designed to be compatible with existing
44
HTML found in the wild and implements well-defined error recovery that
@@ -10,8 +10,9 @@
1010
f = open("my_document.html")
1111
tree = html5lib.parse(f)
1212
"""
13-
__version__ = "0.95-dev"
14-
from html5parser import HTMLParser, parse, parseFragment
15-
from treebuilders import getTreeBuilder
16-
from treewalkers import getTreeWalker
17-
from serializer import serialize
13+
from __future__ import absolute_import
14+
__version__ = u"0.95-dev"
15+
from .html5parser import HTMLParser, parse, parseFragment
16+
from .treebuilders import getTreeBuilder
17+
from .treewalkers import getTreeWalker
18+
from .serializer import serialize

html5lib/constants.py

Lines changed: 543 additions & 542 deletions
Large diffs are not rendered by default.

html5lib/filters/_base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
class Filter(object):
33
def __init__(self, source):
44
self.source = source
5+
__init__.func_annotations = {}
56

67
def __iter__(self):
78
return iter(self.source)
9+
__iter__.func_annotations = {}
810

911
def __getattr__(self, name):
1012
return getattr(self.source, name)
13+
__getattr__.func_annotations = {}
Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,65 @@
1-
import _base
1+
from __future__ import absolute_import
2+
from . import _base
23

34
class Filter(_base.Filter):
45
def __init__(self, source, encoding):
56
_base.Filter.__init__(self, source)
67
self.encoding = encoding
8+
__init__.func_annotations = {}
79

810
def __iter__(self):
9-
state = "pre_head"
11+
state = u"pre_head"
1012
meta_found = (self.encoding is None)
1113
pending = []
1214

1315
for token in _base.Filter.__iter__(self):
14-
type = token["type"]
15-
if type == "StartTag":
16-
if token["name"].lower() == u"head":
17-
state = "in_head"
16+
type = token[u"type"]
17+
if type == u"StartTag":
18+
if token[u"name"].lower() == u"head":
19+
state = u"in_head"
1820

19-
elif type == "EmptyTag":
20-
if token["name"].lower() == u"meta":
21+
elif type == u"EmptyTag":
22+
if token[u"name"].lower() == u"meta":
2123
# replace charset with actual encoding
2224
has_http_equiv_content_type = False
23-
for (namespace,name),value in token["data"].iteritems():
25+
for (namespace,name),value in token[u"data"].items():
2426
if namespace != None:
2527
continue
2628
elif name.lower() == u'charset':
27-
token["data"][(namespace,name)] = self.encoding
29+
token[u"data"][(namespace,name)] = self.encoding
2830
meta_found = True
2931
break
3032
elif name == u'http-equiv' and value.lower() == u'content-type':
3133
has_http_equiv_content_type = True
3234
else:
33-
if has_http_equiv_content_type and (None, u"content") in token["data"]:
34-
token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
35+
if has_http_equiv_content_type and (None, u"content") in token[u"data"]:
36+
token[u"data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
3537
meta_found = True
3638

37-
elif token["name"].lower() == u"head" and not meta_found:
39+
elif token[u"name"].lower() == u"head" and not meta_found:
3840
# insert meta into empty head
39-
yield {"type": "StartTag", "name": u"head",
40-
"data": token["data"]}
41-
yield {"type": "EmptyTag", "name": u"meta",
42-
"data": {(None, u"charset"): self.encoding}}
43-
yield {"type": "EndTag", "name": u"head"}
41+
yield {u"type": u"StartTag", u"name": u"head",
42+
u"data": token[u"data"]}
43+
yield {u"type": u"EmptyTag", u"name": u"meta",
44+
u"data": {(None, u"charset"): self.encoding}}
45+
yield {u"type": u"EndTag", u"name": u"head"}
4446
meta_found = True
4547
continue
4648

47-
elif type == "EndTag":
48-
if token["name"].lower() == u"head" and pending:
49+
elif type == u"EndTag":
50+
if token[u"name"].lower() == u"head" and pending:
4951
# insert meta into head (if necessary) and flush pending queue
5052
yield pending.pop(0)
5153
if not meta_found:
52-
yield {"type": "EmptyTag", "name": u"meta",
53-
"data": {(None, u"charset"): self.encoding}}
54+
yield {u"type": u"EmptyTag", u"name": u"meta",
55+
u"data": {(None, u"charset"): self.encoding}}
5456
while pending:
5557
yield pending.pop(0)
5658
meta_found = True
57-
state = "post_head"
59+
state = u"post_head"
5860

59-
if state == "in_head":
61+
if state == u"in_head":
6062
pending.append(token)
6163
else:
6264
yield token
65+
__iter__.func_annotations = {}

html5lib/filters/lint.py

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
from __future__ import absolute_import
12
from gettext import gettext
23
_ = gettext
34

4-
import _base
5+
from . import _base
56
from html5lib.constants import cdataElements, rcdataElements, voidElements
67

78
from html5lib.constants import spaceCharacters
@@ -12,39 +13,39 @@ class LintError(Exception): pass
1213
class Filter(_base.Filter):
1314
def __iter__(self):
1415
open_elements = []
15-
contentModelFlag = "PCDATA"
16+
contentModelFlag = u"PCDATA"
1617
for token in _base.Filter.__iter__(self):
17-
type = token["type"]
18-
if type in ("StartTag", "EmptyTag"):
19-
name = token["name"]
20-
if contentModelFlag != "PCDATA":
21-
raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
18+
type = token[u"type"]
19+
if type in (u"StartTag", u"EmptyTag"):
20+
name = token[u"name"]
21+
if contentModelFlag != u"PCDATA":
22+
raise LintError(_(u"StartTag not in PCDATA content model flag: %s") % name)
2223
if not isinstance(name, unicode):
2324
raise LintError(_(u"Tag name is not a string: %r") % name)
2425
if not name:
2526
raise LintError(_(u"Empty tag name"))
26-
if type == "StartTag" and name in voidElements:
27+
if type == u"StartTag" and name in voidElements:
2728
raise LintError(_(u"Void element reported as StartTag token: %s") % name)
28-
elif type == "EmptyTag" and name not in voidElements:
29-
raise LintError(_(u"Non-void element reported as EmptyTag token: %s") % token["name"])
30-
if type == "StartTag":
29+
elif type == u"EmptyTag" and name not in voidElements:
30+
raise LintError(_(u"Non-void element reported as EmptyTag token: %s") % token[u"name"])
31+
if type == u"StartTag":
3132
open_elements.append(name)
32-
for name, value in token["data"]:
33+
for name, value in token[u"data"]:
3334
if not isinstance(name, unicode):
34-
raise LintError(_("Attribute name is not a string: %r") % name)
35+
raise LintError(_(u"Attribute name is not a string: %r") % name)
3536
if not name:
3637
raise LintError(_(u"Empty attribute name"))
3738
if not isinstance(value, unicode):
38-
raise LintError(_("Attribute value is not a string: %r") % value)
39+
raise LintError(_(u"Attribute value is not a string: %r") % value)
3940
if name in cdataElements:
40-
contentModelFlag = "CDATA"
41+
contentModelFlag = u"CDATA"
4142
elif name in rcdataElements:
42-
contentModelFlag = "RCDATA"
43-
elif name == "plaintext":
44-
contentModelFlag = "PLAINTEXT"
43+
contentModelFlag = u"RCDATA"
44+
elif name == u"plaintext":
45+
contentModelFlag = u"PLAINTEXT"
4546

46-
elif type == "EndTag":
47-
name = token["name"]
47+
elif type == u"EndTag":
48+
name = token[u"name"]
4849
if not isinstance(name, unicode):
4950
raise LintError(_(u"Tag name is not a string: %r") % name)
5051
if not name:
@@ -54,35 +55,36 @@ def __iter__(self):
5455
start_name = open_elements.pop()
5556
if start_name != name:
5657
raise LintError(_(u"EndTag (%s) does not match StartTag (%s)") % (name, start_name))
57-
contentModelFlag = "PCDATA"
58+
contentModelFlag = u"PCDATA"
5859

59-
elif type == "Comment":
60-
if contentModelFlag != "PCDATA":
61-
raise LintError(_("Comment not in PCDATA content model flag"))
60+
elif type == u"Comment":
61+
if contentModelFlag != u"PCDATA":
62+
raise LintError(_(u"Comment not in PCDATA content model flag"))
6263

63-
elif type in ("Characters", "SpaceCharacters"):
64-
data = token["data"]
64+
elif type in (u"Characters", u"SpaceCharacters"):
65+
data = token[u"data"]
6566
if not isinstance(data, unicode):
66-
raise LintError(_("Attribute name is not a string: %r") % data)
67+
raise LintError(_(u"Attribute name is not a string: %r") % data)
6768
if not data:
6869
raise LintError(_(u"%s token with empty data") % type)
69-
if type == "SpaceCharacters":
70+
if type == u"SpaceCharacters":
7071
data = data.strip(spaceCharacters)
7172
if data:
7273
raise LintError(_(u"Non-space character(s) found in SpaceCharacters token: ") % data)
7374

74-
elif type == "Doctype":
75-
name = token["name"]
76-
if contentModelFlag != "PCDATA":
77-
raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
75+
elif type == u"Doctype":
76+
name = token[u"name"]
77+
if contentModelFlag != u"PCDATA":
78+
raise LintError(_(u"Doctype not in PCDATA content model flag: %s") % name)
7879
if not isinstance(name, unicode):
7980
raise LintError(_(u"Tag name is not a string: %r") % name)
8081
# XXX: what to do with token["data"] ?
8182

82-
elif type in ("ParseError", "SerializeError"):
83+
elif type in (u"ParseError", u"SerializeError"):
8384
pass
8485

8586
else:
8687
raise LintError(_(u"Unknown token type: %s") % type)
8788

8889
yield token
90+
__iter__.func_annotations = {}

0 commit comments

Comments
 (0)