Skip to content

Commit 8f7f9f0

Browse files
committed
Move the treewalker printer to the treewalker module
1 parent 9695fc8 commit 8f7f9f0

File tree

2 files changed

+83
-79
lines changed

2 files changed

+83
-79
lines changed

html5lib/tests/test_treewalkers.py

Lines changed: 2 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -139,83 +139,6 @@ def GenshiAdapter(tree):
139139
"adapter": GenshiAdapter,
140140
"walker": treewalkers.getTreeWalker("genshi")}
141141

142-
143-
def concatenateCharacterTokens(tokens):
144-
charactersToken = None
145-
for token in tokens:
146-
type = token["type"]
147-
if type in ("Characters", "SpaceCharacters"):
148-
if charactersToken is None:
149-
charactersToken = {"type": "Characters", "data": token["data"]}
150-
else:
151-
charactersToken["data"] += token["data"]
152-
else:
153-
if charactersToken is not None:
154-
yield charactersToken
155-
charactersToken = None
156-
yield token
157-
if charactersToken is not None:
158-
yield charactersToken
159-
160-
161-
def convertTokens(tokens):
162-
output = []
163-
indent = 0
164-
for token in concatenateCharacterTokens(tokens):
165-
type = token["type"]
166-
if type in ("StartTag", "EmptyTag"):
167-
if (token["namespace"] and
168-
token["namespace"] != constants.namespaces["html"]):
169-
if token["namespace"] in constants.prefixes:
170-
name = constants.prefixes[token["namespace"]]
171-
else:
172-
name = token["namespace"]
173-
name += " " + token["name"]
174-
else:
175-
name = token["name"]
176-
output.append("%s<%s>" % (" " * indent, name))
177-
indent += 2
178-
attrs = token["data"]
179-
if attrs:
180-
# TODO: Remove this if statement, attrs should always exist
181-
for (namespace, name), value in sorted(attrs.items()):
182-
if namespace:
183-
if namespace in constants.prefixes:
184-
outputname = constants.prefixes[namespace]
185-
else:
186-
outputname = namespace
187-
outputname += " " + name
188-
else:
189-
outputname = name
190-
output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
191-
if type == "EmptyTag":
192-
indent -= 2
193-
elif type == "EndTag":
194-
indent -= 2
195-
elif type == "Comment":
196-
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
197-
elif type == "Doctype":
198-
if token["name"]:
199-
if token["publicId"]:
200-
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
201-
(" " * indent, token["name"],
202-
token["publicId"],
203-
token["systemId"] and token["systemId"] or ""))
204-
elif token["systemId"]:
205-
output.append("""%s<!DOCTYPE %s "" "%s">""" %
206-
(" " * indent, token["name"],
207-
token["systemId"]))
208-
else:
209-
output.append("%s<!DOCTYPE %s>" % (" " * indent,
210-
token["name"]))
211-
else:
212-
output.append("%s<!DOCTYPE >" % (" " * indent,))
213-
elif type in ("Characters", "SpaceCharacters"):
214-
output.append("%s\"%s\"" % (" " * indent, token["data"]))
215-
else:
216-
pass # TODO: what to do with errors?
217-
return "\n".join(output)
218-
219142
import re
220143
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
221144

@@ -265,7 +188,7 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
265188

266189
document = treeClass.get("adapter", lambda x: x)(document)
267190
try:
268-
output = convertTokens(treeClass["walker"](document))
191+
output = treewalkers.pprint(treeClass["walker"](document))
269192
output = attrlist.sub(sortattrs, output)
270193
expected = attrlist.sub(sortattrs, convertExpected(expected))
271194
diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
@@ -323,7 +246,7 @@ def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
323246
set_attribute_on_first_child(document, nom, val, treeName)
324247

325248
document = treeClass.get("adapter", lambda x: x)(document)
326-
output = convertTokens(treeClass["walker"](document))
249+
output = treewalkers.pprint(treeClass["walker"](document))
327250
output = attrlist.sub(sortattrs, output)
328251
if not output in expected:
329252
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))

html5lib/treewalkers/__init__.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@
1010

1111
from __future__ import absolute_import, division, unicode_literals
1212

13+
__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
14+
"pulldom"]
15+
1316
import sys
1417

18+
from .. import constants
1519
from ..utils import default_etree
1620

1721
treeWalkerCache = {}
@@ -55,3 +59,80 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
5559
# XXX: NEVER cache here, caching is done in the etree submodule
5660
return etree.getETreeModule(implementation, **kwargs).TreeWalker
5761
return treeWalkerCache.get(treeType)
62+
63+
64+
def concatenateCharacterTokens(tokens):
65+
charactersToken = None
66+
for token in tokens:
67+
type = token["type"]
68+
if type in ("Characters", "SpaceCharacters"):
69+
if charactersToken is None:
70+
charactersToken = {"type": "Characters", "data": token["data"]}
71+
else:
72+
charactersToken["data"] += token["data"]
73+
else:
74+
if charactersToken is not None:
75+
yield charactersToken
76+
charactersToken = None
77+
yield token
78+
if charactersToken is not None:
79+
yield charactersToken
80+
81+
82+
def pprint(tokens):
83+
output = []
84+
indent = 0
85+
for token in concatenateCharacterTokens(tokens):
86+
type = token["type"]
87+
if type in ("StartTag", "EmptyTag"):
88+
if (token["namespace"] and
89+
token["namespace"] != constants.namespaces["html"]):
90+
if token["namespace"] in constants.prefixes:
91+
name = constants.prefixes[token["namespace"]]
92+
else:
93+
name = token["namespace"]
94+
name += " " + token["name"]
95+
else:
96+
name = token["name"]
97+
output.append("%s<%s>" % (" " * indent, name))
98+
indent += 2
99+
attrs = token["data"]
100+
if attrs:
101+
# TODO: Remove this if statement, attrs should always exist
102+
for (namespace, name), value in sorted(attrs.items()):
103+
if namespace:
104+
if namespace in constants.prefixes:
105+
outputname = constants.prefixes[namespace]
106+
else:
107+
outputname = namespace
108+
outputname += " " + name
109+
else:
110+
outputname = name
111+
output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
112+
if type == "EmptyTag":
113+
indent -= 2
114+
elif type == "EndTag":
115+
indent -= 2
116+
elif type == "Comment":
117+
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
118+
elif type == "Doctype":
119+
if token["name"]:
120+
if token["publicId"]:
121+
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
122+
(" " * indent, token["name"],
123+
token["publicId"],
124+
token["systemId"] and token["systemId"] or ""))
125+
elif token["systemId"]:
126+
output.append("""%s<!DOCTYPE %s "" "%s">""" %
127+
(" " * indent, token["name"],
128+
token["systemId"]))
129+
else:
130+
output.append("%s<!DOCTYPE %s>" % (" " * indent,
131+
token["name"]))
132+
else:
133+
output.append("%s<!DOCTYPE >" % (" " * indent,))
134+
elif type in ("Characters", "SpaceCharacters"):
135+
output.append("%s\"%s\"" % (" " * indent, token["data"]))
136+
else:
137+
pass # TODO: what to do with errors?
138+
return "\n".join(output)

0 commit comments

Comments
 (0)