Skip to content

Commit 4bf7e88

Browse files
committed
lxml fixes
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401093
1 parent e51c9f6 commit 4bf7e88

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

src/html5lib/treebuilders/etree_lxml.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,17 @@ def insertRoot(self, name):
170170
if self.doctype:
171171
docStr += "<!DOCTYPE %s"%self.doctype.name
172172
if self.doctype.publicId is not None:
173-
docStr += "PUBLIC %s"%self.doctype.publicId
173+
docStr += ' PUBLIC "%s"'%self.doctype.publicId
174174
if self.doctype.systemId:
175-
docStr += "SYSTEM %s"%self.doctype.systemId
175+
docStr += ' "%s"'%self.doctype.systemId
176176
docStr += ">"
177177
docStr += "<html></html>"
178178

179-
root = etree.fromstring(docStr)
179+
try:
180+
root = etree.fromstring(docStr)
181+
except etree.XMLSyntaxError:
182+
print docStr
183+
raise
180184

181185
#Create the root document and add the ElementTree to it
182186
self.document = self.documentClass()

tests/test_parser.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import os
22
import sys
33
import traceback
4-
54
import StringIO
65
import unittest
7-
from support import html5lib_test_files, TestData, convert, convertExpected
6+
import warnings
7+
8+
warnings.simplefilter("error")
89

10+
from support import html5lib_test_files, TestData, convert, convertExpected
911
from html5lib import html5parser, treebuilders, constants
1012

1113
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
@@ -34,8 +36,11 @@
3436
pass
3537

3638
try:
37-
import lxml.etree as lxml
38-
treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True)
39+
try:
40+
import lxml.html as lxml
41+
except ImportError:
42+
import lxml.etree as lxml
43+
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml", lxml, fullTree=True)
3944
except ImportError:
4045
pass
4146

@@ -70,7 +75,11 @@ def runParserTest(self, innerHTML, input, expected, errors, treeClass):
7075
if innerHTML:
7176
document = p.parseFragment(StringIO.StringIO(input), innerHTML)
7277
else:
73-
document = p.parse(StringIO.StringIO(input))
78+
try:
79+
document = p.parse(StringIO.StringIO(input))
80+
except constants.DataLossWarning:
81+
sys.stderr.write("Test input causes known dataloss, skipping")
82+
return
7483
except:
7584
errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
7685
"\nTraceback:", traceback.format_exc()])

0 commit comments

Comments
 (0)