1
+ import warnings
2
+
1
3
from BeautifulSoup import BeautifulSoup , Tag , NavigableString , Comment , Declaration
2
4
3
5
import _base
6
+ from html5lib .constants import namespaces , DataLossWarning
4
7
5
8
class AttrList (object ):
6
9
def __init__ (self , element ):
@@ -22,10 +25,11 @@ def __contains__(self, name):
22
25
23
26
24
27
class Element (_base .Node ):
25
- def __init__ (self , element , soup ):
28
+ def __init__ (self , element , soup , namespace ):
26
29
_base .Node .__init__ (self , element .name )
27
30
self .element = element
28
31
self .soup = soup
32
+ self .namespace = namespace
29
33
30
34
def _nodeIndex (self , node , refNode ):
31
35
# Finds a node by identity rather than equality
@@ -99,19 +103,27 @@ def reparentChildren(self, newParent):
99
103
child = self .element .contents [0 ]
100
104
child .extract ()
101
105
if isinstance (child , Tag ):
102
- newParent .appendChild (Element (child , self .soup ))
106
+ newParent .appendChild (Element (child , self .soup , namespaces [ "html" ] ))
103
107
else :
104
108
newParent .appendChild (TextNode (child , self .soup ))
105
109
106
110
def cloneNode (self ):
107
- node = Element (Tag (self .soup , self .element .name ), self .soup )
111
+ node = Element (Tag (self .soup , self .element .name ), self .soup , self . namespace )
108
112
for key ,value in self .attributes :
109
113
node .attributes [key ] = value
110
114
return node
111
115
112
116
def hasContent (self ):
113
117
return self .element .contents
114
118
119
+ def getNameTuple (self ):
120
+ if self .namespace == None :
121
+ return namespaces ["html" ], self .name
122
+ else :
123
+ return self .namespace , self .name
124
+
125
+ nameTuple = property (getNameTuple )
126
+
115
127
class TextNode (Element ):
116
128
def __init__ (self , element , soup ):
117
129
_base .Node .__init__ (self , None )
@@ -124,27 +136,33 @@ def cloneNode(self):
124
136
class TreeBuilder (_base .TreeBuilder ):
125
137
def documentClass (self ):
126
138
self .soup = BeautifulSoup ("" )
127
- return Element (self .soup , self .soup )
139
+ return Element (self .soup , self .soup , None )
128
140
129
- def insertDoctype (self , name , publicId , systemId ):
141
+ def insertDoctype (self , token ):
142
+ name = token ["name" ]
143
+ publicId = token ["publicId" ]
144
+ systemId = token ["systemId" ]
145
+
130
146
if publicId :
131
- self .soup .insert (0 , Declaration ("%s PUBLIC \" %s\" \" %s\" " % (name , publicId , systemId )))
147
+ self .soup .insert (0 , Declaration ("%s PUBLIC \" %s\" \" %s\" " % (name , publicId , systemId or "" )))
132
148
elif systemId :
133
149
self .soup .insert (0 , Declaration ("%s SYSTEM \" %s\" " %
134
150
(name , systemId )))
135
151
else :
136
152
self .soup .insert (0 , Declaration (name ))
137
153
138
- def elementClass (self , name ):
139
- return Element (Tag (self .soup , name ), self .soup )
154
+ def elementClass (self , name , namespace ):
155
+ if namespace not in (None , namespaces ["html" ]):
156
+ warnings .warn ("BeautifulSoup cannot represent elemens in nn-html namespace" , DataLossWarning )
157
+ return Element (Tag (self .soup , name ), self .soup , namespace )
140
158
141
159
def commentClass (self , data ):
142
160
return TextNode (Comment (data ), self .soup )
143
161
144
162
def fragmentClass (self ):
145
163
self .soup = BeautifulSoup ("" )
146
164
self .soup .name = "[document_fragment]"
147
- return Element (self .soup , self .soup )
165
+ return Element (self .soup , self .soup , None )
148
166
149
167
def appendChild (self , node ):
150
168
self .soup .insert (len (self .soup .contents ), node .element )
@@ -169,7 +187,7 @@ def serializeElement(element, indent=0):
169
187
name = m .group ('name' )
170
188
publicId = m .group ('publicId' )
171
189
if publicId is not None :
172
- systemId = m .group ('systemId1' )
190
+ systemId = m .group ('systemId1' ) or ""
173
191
else :
174
192
systemId = m .group ('systemId2' )
175
193
0 commit comments