Skip to content

Commit 8911496

Browse files
committed
Fail on namespaces with BS.
1 parent 47b430e commit 8911496

File tree

3 files changed

+13
-6
lines changed

3 files changed

+13
-6
lines changed

src/html5lib/treebuilders/soup.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,11 @@ def cloneNode(self):
134134
raise NotImplementedError
135135

136136
class TreeBuilder(_base.TreeBuilder):
137+
def __init__(self, namespaceHTMLElements):
138+
if namespaceHTMLElements:
139+
warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
140+
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
141+
137142
def documentClass(self):
138143
self.soup = BeautifulSoup("")
139144
return Element(self.soup, self.soup, None)
@@ -152,8 +157,8 @@ def insertDoctype(self, token):
152157
self.soup.insert(0, Declaration(name))
153158

154159
def elementClass(self, name, namespace):
155-
if namespace not in (None, namespaces["html"]):
156-
warnings.warn("BeautifulSoup cannot represent elemens in nn-html namespace", DataLossWarning)
160+
if namespace is not None:
161+
warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
157162
return Element(Tag(self.soup, name), self.soup, namespace)
158163

159164
def commentClass(self, data):

tests/test_parser.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,11 @@ def runParserTest(self, innerHTML, input, expected, errors, treeClass,
8080
namespaceHTMLElements):
8181
#XXX - move this out into the setup function
8282
#concatenate all consecutive character tokens into a single token
83-
p = html5parser.HTMLParser(tree = treeClass,
84-
namespaceHTMLElements=namespaceHTMLElements)
83+
try:
84+
p = html5parser.HTMLParser(tree = treeClass,
85+
namespaceHTMLElements=namespaceHTMLElements)
86+
except constants.DataLossWarning:
87+
return
8588

8689
errors = [item.decode("utf-8") for item in errors]
8790

@@ -92,7 +95,6 @@ def runParserTest(self, innerHTML, input, expected, errors, treeClass,
9295
try:
9396
document = p.parse(StringIO.StringIO(input))
9497
except constants.DataLossWarning:
95-
sys.stderr.write("Test input causes known dataloss, skipping")
9698
return
9799
except:
98100
errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,

tests/test_treewalkers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,8 @@ def sortattrs(x):
225225

226226
class TestCase(unittest.TestCase):
227227
def runTest(self, innerHTML, input, expected, errors, treeClass):
228-
p = html5parser.HTMLParser(tree = treeClass["builder"])
229228
try:
229+
p = html5parser.HTMLParser(tree = treeClass["builder"])
230230
if innerHTML:
231231
document = p.parseFragment(StringIO.StringIO(input), innerHTML)
232232
else:

0 commit comments

Comments
 (0)