Skip to content

Commit 9041a5d

Browse files
committed
Allow DOCTYPEs to round-trip, keeping public/system identifiers.
1 parent 4751ac4 commit 9041a5d

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

src/html5lib/serializer/htmlserializer.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,23 @@ def serialize(self, treewalker, encoding=None):
103103
for token in treewalker:
104104
type = token["type"]
105105
if type == "Doctype":
106-
doctype = u"<!DOCTYPE %s>" % token["name"]
106+
doctype = u"<!DOCTYPE %s" % token["name"]
107+
108+
if token["publicId"]:
109+
doctype += u' PUBLIC "%s"' % token["publicId"]
110+
elif token["systemId"]:
111+
doctype += u" SYSTEM"
112+
if token["systemId"]:
113+
if token["systemId"].find(u'"') >= 0:
114+
if token["systemId"].find(u"'") >= 0:
115+
self.serializeError(_("System identifer contains both single and double quote characters"))
116+
quote_char = u"'"
117+
else:
118+
quote_char = u'"'
119+
doctype += u" %s%s%s" % (quote_char, token["systemId"], quote_char)
120+
121+
doctype += u">"
122+
107123
if encoding:
108124
yield doctype.encode(encoding)
109125
else:

tests/test_serializer.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,12 @@ def __iter__(self):
3939
for token in self.text(token[1]):
4040
yield token
4141
elif type == "Doctype":
42-
yield self.doctype(token[1])
42+
if len(token) == 4:
43+
yield self.doctype(token[1], token[2], token[3])
44+
elif len(token) == 3:
45+
yield self.doctype(token[1], token[2])
46+
else:
47+
yield self.doctype(token[1])
4348
else:
4449
raise ValueError("Unknown token type: " + type)
4550

0 commit comments

Comments
 (0)