Skip to content

Commit 12a872f

Browse files
committed
Make beautifulsoup mostly work in a slightly hacky way (No namespace support)
1 parent dcaf1c7 commit 12a872f

File tree

2 files changed

+29
-11
lines changed

2 files changed

+29
-11
lines changed

src/html5lib/treebuilders/soup.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
import warnings
2+
13
from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment, Declaration
24

35
import _base
6+
from html5lib.constants import namespaces, DataLossWarning
47

58
class AttrList(object):
69
def __init__(self, element):
@@ -22,10 +25,11 @@ def __contains__(self, name):
2225

2326

2427
class Element(_base.Node):
25-
def __init__(self, element, soup):
28+
def __init__(self, element, soup, namespace):
2629
_base.Node.__init__(self, element.name)
2730
self.element = element
2831
self.soup = soup
32+
self.namespace = namespace
2933

3034
def _nodeIndex(self, node, refNode):
3135
# Finds a node by identity rather than equality
@@ -99,19 +103,27 @@ def reparentChildren(self, newParent):
99103
child = self.element.contents[0]
100104
child.extract()
101105
if isinstance(child, Tag):
102-
newParent.appendChild(Element(child, self.soup))
106+
newParent.appendChild(Element(child, self.soup, namespaces["html"]))
103107
else:
104108
newParent.appendChild(TextNode(child, self.soup))
105109

106110
def cloneNode(self):
107-
node = Element(Tag(self.soup, self.element.name), self.soup)
111+
node = Element(Tag(self.soup, self.element.name), self.soup, self.namespace)
108112
for key,value in self.attributes:
109113
node.attributes[key] = value
110114
return node
111115

112116
def hasContent(self):
113117
return self.element.contents
114118

119+
def getNameTuple(self):
120+
if self.namespace == None:
121+
return namespaces["html"], self.name
122+
else:
123+
return self.namespace, self.name
124+
125+
nameTuple = property(getNameTuple)
126+
115127
class TextNode(Element):
116128
def __init__(self, element, soup):
117129
_base.Node.__init__(self, None)
@@ -124,27 +136,33 @@ def cloneNode(self):
124136
class TreeBuilder(_base.TreeBuilder):
125137
def documentClass(self):
126138
self.soup = BeautifulSoup("")
127-
return Element(self.soup, self.soup)
139+
return Element(self.soup, self.soup, None)
128140

129-
def insertDoctype(self, name, publicId, systemId):
141+
def insertDoctype(self, token):
142+
name = token["name"]
143+
publicId = token["publicId"]
144+
systemId = token["systemId"]
145+
130146
if publicId:
131-
self.soup.insert(0, Declaration("%s PUBLIC \"%s\" \"%s\""%(name, publicId, systemId)))
147+
self.soup.insert(0, Declaration("%s PUBLIC \"%s\" \"%s\""%(name, publicId, systemId or "")))
132148
elif systemId:
133149
self.soup.insert(0, Declaration("%s SYSTEM \"%s\""%
134150
(name, systemId)))
135151
else:
136152
self.soup.insert(0, Declaration(name))
137153

138-
def elementClass(self, name):
139-
return Element(Tag(self.soup, name), self.soup)
154+
def elementClass(self, name, namespace):
155+
if namespace not in (None, namespaces["html"]):
156+
warnings.warn("BeautifulSoup cannot represent elemens in nn-html namespace", DataLossWarning)
157+
return Element(Tag(self.soup, name), self.soup, namespace)
140158

141159
def commentClass(self, data):
142160
return TextNode(Comment(data), self.soup)
143161

144162
def fragmentClass(self):
145163
self.soup = BeautifulSoup("")
146164
self.soup.name = "[document_fragment]"
147-
return Element(self.soup, self.soup)
165+
return Element(self.soup, self.soup, None)
148166

149167
def appendChild(self, node):
150168
self.soup.insert(len(self.soup.contents), node.element)
@@ -169,7 +187,7 @@ def serializeElement(element, indent=0):
169187
name = m.group('name')
170188
publicId = m.group('publicId')
171189
if publicId is not None:
172-
systemId = m.group('systemId1')
190+
systemId = m.group('systemId1') or ""
173191
else:
174192
systemId = m.group('systemId2')
175193

tests/test_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
try:
4949
import BeautifulSoup
50-
#treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
50+
treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
5151
except ImportError:
5252
pass
5353

0 commit comments

Comments
 (0)