Skip to content

Commit 1a8ee1a

Browse files
committed
Fix lxml to work in the fragment case
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401128
1 parent abe8a00 commit 1a8ee1a

File tree

1 file changed

+43
-23
lines changed

1 file changed

+43
-23
lines changed

src/html5lib/treebuilders/etree_lxml.py

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from html5lib.constants import DataLossWarning
55
import etree as etree_builders
66
try:
7-
import lxml.html as etree
7+
import lxml.etree as etree
88
except ImportError:
99
import lxml.etree as etree
1010

@@ -48,21 +48,33 @@ def testSerializer(element):
4848
finalText = None
4949
def serializeElement(element, indent=0):
5050
if not hasattr(element, "tag"):
51-
rv.append("#document")
52-
if element.docinfo.internalDTD:
53-
if not (element.docinfo.public_id or element.docinfo.system_url):
54-
dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
55-
else:
56-
dtd_str = """<!DOCTYPE %s PUBLIC "%s" "%s">"""%(
57-
element.docinfo.root_name, element.docinfo.public_id,
58-
element.docinfo.system_url)
59-
rv.append("|%s%s"%(' '*(indent+2), dtd_str))
60-
next_element = element.getroot()
61-
while next_element.getprevious() is not None:
62-
next_element = next_element.getprevious()
63-
while next_element is not None:
64-
serializeElement(next_element, indent+2)
65-
next_element = next_element.getnext()
51+
if hasattr(element, "getroot"):
52+
#Full tree case
53+
rv.append("#document")
54+
if element.docinfo.internalDTD:
55+
if not (element.docinfo.public_id or
56+
element.docinfo.system_url):
57+
dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
58+
else:
59+
dtd_str = """<!DOCTYPE %s PUBLIC "%s" "%s">"""%(
60+
element.docinfo.root_name,
61+
element.docinfo.public_id,
62+
element.docinfo.system_url)
63+
rv.append("|%s%s"%(' '*(indent+2), dtd_str))
64+
next_element = element.getroot()
65+
while next_element.getprevious() is not None:
66+
next_element = next_element.getprevious()
67+
while next_element is not None:
68+
serializeElement(next_element, indent+2)
69+
next_element = next_element.getnext()
70+
elif isinstance(element, basestring):
71+
#Text in a fragment
72+
rv.append("|%s\"%s\""%(' '*indent, element))
73+
else:
74+
#Fragment case
75+
rv.append("#document-fragment")
76+
for next_element in element:
77+
serializeElement(next_element, indent+2)
6678
elif type(element.tag) == type(etree.Comment):
6779
rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
6880
else:
@@ -132,13 +144,13 @@ class TreeBuilder(_base.TreeBuilder):
132144
doctypeClass = DocumentType
133145
elementClass = None
134146
commentClass = None
135-
fragmentClass = None
147+
fragmentClass = Document
136148

137149
def __init__(self, fullTree = False):
138150
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
139151
self.elementClass = builder.Element
140152
self.commentClass = builder.Comment
141-
self.fragmentClass = builder.DocumentFragment
153+
#self.fragmentClass = builder.DocumentFragment
142154
_base.TreeBuilder.__init__(self)
143155

144156
def reset(self):
@@ -157,7 +169,14 @@ def getDocument(self):
157169
return self.document._elementTree.getroot()
158170

159171
def getFragment(self):
160-
return _base.TreeBuilder.getFragment(self)._element
172+
fragment = []
173+
element = self.openElements[0]._element
174+
if element.text:
175+
fragment.append(element.text)
176+
fragment.extend(element.getchildren())
177+
if element.tail:
178+
fragment.append(element.tail)
179+
return fragment
161180

162181
def insertDoctype(self, name, publicId, systemId):
163182
if not name:
@@ -172,9 +191,10 @@ def insertCommentInitial(self, data, parent=None):
172191

173192
def insertRoot(self, name):
174193
"""Create the document root"""
175-
#Because of the way libxml2 works, it doesn't seem to be possible to alter information
176-
#like the doctype after the tree has been parsed. Therefore we need to use the built-in
177-
#parser to create our iniial tree, after which we can add elements like normal
194+
#Because of the way libxml2 works, it doesn't seem to be possible to
195+
#alter informatioN like the doctype after the tree has been parsed.
196+
#Therefore we need to use the built-in parser to create our iniial
197+
#tree, after which we can add elements like normal
178198
docStr = ""
179199
if self.doctype:
180200
docStr += "<!DOCTYPE %s"%self.doctype.name
@@ -205,4 +225,4 @@ def insertRoot(self, name):
205225
self.openElements.append(root_element)
206226

207227
#Reset to the default insert comment function
208-
self.insertComment = super(TreeBuilder, self).insertComment
228+
self.insertComment = super(TreeBuilder, self).insertComment

0 commit comments

Comments
 (0)