Skip to content

Commit 226b37c

Browse files
committed
Fixed various BeautifulSoup bugs, including issue 70
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401161
1 parent abf1605 commit 226b37c

File tree

1 file changed

+35
-12
lines changed

1 file changed

+35
-12
lines changed

src/html5lib/treebuilders/soup.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,35 @@ class Element(_base.Node):
2525
def __init__(self, element, soup):
2626
_base.Node.__init__(self, element.name)
2727
self.element = element
28-
self.soup=soup
28+
self.soup = soup
29+
30+
def _nodeIndex(self, node, refNode):
31+
# Finds a node by identity rather than equality
32+
for index in range(len(self.element.contents)):
33+
if id(self.element.contents[index]) == id(refNode.element):
34+
return index
35+
return None
2936

3037
def appendChild(self, node):
3138
if (node.element.__class__ == NavigableString and self.element.contents
3239
and self.element.contents[-1].__class__ == NavigableString):
33-
newNode = TextNode(NavigableString(
34-
self.element.contents[-1]+node.element), self.soup)
35-
self.element.contents[-1].extract()
36-
self.appendChild(newNode)
40+
# Concatenate new text onto old text node
41+
# (TODO: This has O(n^2) performance, for input like "a</a>a</a>a</a>...")
42+
newStr = NavigableString(self.element.contents[-1]+node.element)
43+
44+
# Remove the old text node
45+
# (Can't simply use .extract() by itself, because it fails if
46+
# an equal text node exists within the parent node)
47+
oldElement = self.element.contents[-1]
48+
del self.element.contents[-1]
49+
oldElement.parent = None
50+
oldElement.extract()
51+
52+
self.element.insert(len(self.element.contents), newStr)
3753
else:
3854
self.element.insert(len(self.element.contents), node.element)
3955
node.parent = self
40-
56+
4157
def getAttributes(self):
4258
return AttrList(self.element)
4359

@@ -56,18 +72,25 @@ def insertText(self, data, insertBefore=None):
5672
self.appendChild(text)
5773

5874
def insertBefore(self, node, refNode):
59-
index = self.element.contents.index(refNode.element)
75+
index = self._nodeIndex(node, refNode)
6076
if (node.element.__class__ == NavigableString and self.element.contents
6177
and self.element.contents[index-1].__class__ == NavigableString):
62-
newNode = TextNode(NavigableString(
63-
self.element.contents[index-1]+node.element), self.soup)
64-
self.element.contents[index-1].extract()
65-
self.insertBefore(newNode, refNode)
78+
# (See comments in appendChild)
79+
newStr = NavigableString(self.element.contents[index-1]+node.element)
80+
oldNode = self.element.contents[index-1]
81+
del self.element.contents[index-1]
82+
oldNode.parent = None
83+
oldNode.extract()
84+
85+
self.element.insert(index-1, newStr)
6686
else:
6787
self.element.insert(index, node.element)
6888
node.parent = self
6989

7090
def removeChild(self, node):
91+
index = self._nodeIndex(node.parent, node)
92+
del node.parent.element.contents[index]
93+
node.element.parent = None
7194
node.element.extract()
7295
node.parent = None
7396

@@ -93,7 +116,7 @@ class TextNode(Element):
93116
def __init__(self, element, soup):
94117
_base.Node.__init__(self, None)
95118
self.element = element
96-
self.soup=soup
119+
self.soup = soup
97120

98121
def cloneNode(self):
99122
raise NotImplementedError

0 commit comments

Comments
 (0)