Skip to content

Commit ca260de

Browse files
committed
Fix ISSUE-140, make sure bs gets the right DOCTYPE format
1 parent bb0e7b4 commit ca260de

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

src/html5lib/treebuilders/soup.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,12 @@ def insertDoctype(self, token):
151151
systemId = token["systemId"]
152152

153153
if publicId:
154-
self.soup.insert(0, Declaration("%s PUBLIC \"%s\" \"%s\""%(name, publicId, systemId or "")))
154+
self.soup.insert(0, Declaration("DOCTYPE %s PUBLIC \"%s\" \"%s\""%(name, publicId, systemId or "")))
155155
elif systemId:
156-
self.soup.insert(0, Declaration("%s SYSTEM \"%s\""%
156+
self.soup.insert(0, Declaration("DOCTYPE %s SYSTEM \"%s\""%
157157
(name, systemId)))
158158
else:
159-
self.soup.insert(0, Declaration(name))
159+
self.soup.insert(0, Declaration("DOCTYPE %s"%name))
160160

161161
def elementClass(self, name, namespace):
162162
if namespace is not None:
@@ -188,7 +188,7 @@ def testSerializer(element):
188188
rv = []
189189
def serializeElement(element, indent=0):
190190
if isinstance(element, Declaration):
191-
doctype_regexp = r'(?P<name>[^\s]*)( PUBLIC "(?P<publicId>.*)" "(?P<systemId1>.*)"| SYSTEM "(?P<systemId2>.*)")?'
191+
doctype_regexp = r'DOCTYPE\s+(?P<name>[^\s]*)( PUBLIC "(?P<publicId>.*)" "(?P<systemId1>.*)"| SYSTEM "(?P<systemId2>.*)")?'
192192
m = re.compile(doctype_regexp).match(element.string)
193193
assert m is not None, "DOCTYPE did not match expected format"
194194
name = m.group('name')

src/html5lib/treewalkers/soup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
class TreeWalker(_base.NonRecursiveTreeWalker):
1010
doctype_regexp = re.compile(
11-
r'(?P<name>[^\s]*)(\s*PUBLIC\s*"(?P<publicId>.*)"\s*"(?P<systemId1>.*)"|\s*SYSTEM\s*"(?P<systemId2>.*)")?')
11+
r'DOCTYPE\s+(?P<name>[^\s]*)(\s*PUBLIC\s*"(?P<publicId>.*)"\s*"(?P<systemId1>.*)"|\s*SYSTEM\s*"(?P<systemId2>.*)")?')
1212
def getNodeDetails(self, node):
1313
if isinstance(node, BeautifulSoup): # Document or DocumentFragment
1414
return (_base.DOCUMENT,)
@@ -26,6 +26,7 @@ def getNodeDetails(self, node):
2626
#been modified at all
2727
#We could just feed to it a html5lib tokenizer, I guess...
2828
assert m is not None, "DOCTYPE did not match expected format"
29+
2930
name = m.group('name')
3031
publicId = m.group('publicId')
3132
if publicId is not None:

0 commit comments

Comments
 (0)