4
4
from html5lib .constants import DataLossWarning
5
5
import etree as etree_builders
6
6
try :
7
- import lxml .html as etree
7
+ import lxml .etree as etree
8
8
except ImportError :
9
9
import lxml .etree as etree
10
10
@@ -48,21 +48,33 @@ def testSerializer(element):
48
48
finalText = None
49
49
def serializeElement (element , indent = 0 ):
50
50
if not hasattr (element , "tag" ):
51
- rv .append ("#document" )
52
- if element .docinfo .internalDTD :
53
- if not (element .docinfo .public_id or element .docinfo .system_url ):
54
- dtd_str = "<!DOCTYPE %s>" % element .docinfo .root_name
55
- else :
56
- dtd_str = """<!DOCTYPE %s PUBLIC "%s" "%s">""" % (
57
- element .docinfo .root_name , element .docinfo .public_id ,
58
- element .docinfo .system_url )
59
- rv .append ("|%s%s" % (' ' * (indent + 2 ), dtd_str ))
60
- next_element = element .getroot ()
61
- while next_element .getprevious () is not None :
62
- next_element = next_element .getprevious ()
63
- while next_element is not None :
64
- serializeElement (next_element , indent + 2 )
65
- next_element = next_element .getnext ()
51
+ if hasattr (element , "getroot" ):
52
+ #Full tree case
53
+ rv .append ("#document" )
54
+ if element .docinfo .internalDTD :
55
+ if not (element .docinfo .public_id or
56
+ element .docinfo .system_url ):
57
+ dtd_str = "<!DOCTYPE %s>" % element .docinfo .root_name
58
+ else :
59
+ dtd_str = """<!DOCTYPE %s PUBLIC "%s" "%s">""" % (
60
+ element .docinfo .root_name ,
61
+ element .docinfo .public_id ,
62
+ element .docinfo .system_url )
63
+ rv .append ("|%s%s" % (' ' * (indent + 2 ), dtd_str ))
64
+ next_element = element .getroot ()
65
+ while next_element .getprevious () is not None :
66
+ next_element = next_element .getprevious ()
67
+ while next_element is not None :
68
+ serializeElement (next_element , indent + 2 )
69
+ next_element = next_element .getnext ()
70
+ elif isinstance (element , basestring ):
71
+ #Text in a fragment
72
+ rv .append ("|%s\" %s\" " % (' ' * indent , element ))
73
+ else :
74
+ #Fragment case
75
+ rv .append ("#document-fragment" )
76
+ for next_element in element :
77
+ serializeElement (next_element , indent + 2 )
66
78
elif type (element .tag ) == type (etree .Comment ):
67
79
rv .append ("|%s<!-- %s -->" % (' ' * indent , element .text ))
68
80
else :
@@ -132,13 +144,13 @@ class TreeBuilder(_base.TreeBuilder):
132
144
doctypeClass = DocumentType
133
145
elementClass = None
134
146
commentClass = None
135
- fragmentClass = None
147
+ fragmentClass = Document
136
148
137
149
def __init__ (self , fullTree = False ):
138
150
builder = etree_builders .getETreeModule (etree , fullTree = fullTree )
139
151
self .elementClass = builder .Element
140
152
self .commentClass = builder .Comment
141
- self .fragmentClass = builder .DocumentFragment
153
+ # self.fragmentClass = builder.DocumentFragment
142
154
_base .TreeBuilder .__init__ (self )
143
155
144
156
def reset (self ):
@@ -157,7 +169,14 @@ def getDocument(self):
157
169
return self .document ._elementTree .getroot ()
158
170
159
171
def getFragment (self ):
160
- return _base .TreeBuilder .getFragment (self )._element
172
+ fragment = []
173
+ element = self .openElements [0 ]._element
174
+ if element .text :
175
+ fragment .append (element .text )
176
+ fragment .extend (element .getchildren ())
177
+ if element .tail :
178
+ fragment .append (element .tail )
179
+ return fragment
161
180
162
181
def insertDoctype (self , name , publicId , systemId ):
163
182
if not name :
@@ -172,9 +191,10 @@ def insertCommentInitial(self, data, parent=None):
172
191
173
192
def insertRoot (self , name ):
174
193
"""Create the document root"""
175
- #Because of the way libxml2 works, it doesn't seem to be possible to alter information
176
- #like the doctype after the tree has been parsed. Therefore we need to use the built-in
177
- #parser to create our iniial tree, after which we can add elements like normal
194
+ #Because of the way libxml2 works, it doesn't seem to be possible to
195
+ #alter informatioN like the doctype after the tree has been parsed.
196
+ #Therefore we need to use the built-in parser to create our iniial
197
+ #tree, after which we can add elements like normal
178
198
docStr = ""
179
199
if self .doctype :
180
200
docStr += "<!DOCTYPE %s" % self .doctype .name
@@ -205,4 +225,4 @@ def insertRoot(self, name):
205
225
self .openElements .append (root_element )
206
226
207
227
#Reset to the default insert comment function
208
- self .insertComment = super (TreeBuilder , self ).insertComment
228
+ self .insertComment = super (TreeBuilder , self ).insertComment
0 commit comments