Skip to content

Commit f50fb36

Browse files
committed
special case fix for invalid doctypes. A more general solution is needed
1 parent 561cc33 commit f50fb36

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

src/html5lib/treebuilders/etree_lxml.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,9 @@ def insertDoctype(self, token):
281281
publicId = token["publicId"]
282282
systemId = token["systemId"]
283283

284-
if not name or ihatexml.nonXmlBMPRegexp.search(name):
284+
if not name or ihatexml.nonXmlBMPRegexp.search(name) or name[0] == '"':
285285
warnings.warn("lxml cannot represent null or non-xml doctype", DataLossWarning)
286+
286287
doctype = self.doctypeClass(name, publicId, systemId)
287288
self.doctype = doctype
288289

@@ -296,7 +297,7 @@ def insertRoot(self, token):
296297
#Therefore we need to use the built-in parser to create our iniial
297298
#tree, after which we can add elements like normal
298299
docStr = ""
299-
if self.doctype and self.doctype.name:
300+
if self.doctype and self.doctype.name and not self.doctype.name.startswith('"'):
300301
docStr += "<!DOCTYPE %s"%self.doctype.name
301302
if (self.doctype.publicId is not None or
302303
self.doctype.systemId is not None):

0 commit comments

Comments
 (0)