7
7
class HTMLSanitizerMixin (object ):
8
8
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
9
9
10
- acceptable_elements = ['a' , 'abbr' , 'acronym' , 'address' , 'area' , 'b' ,
11
- 'big' , 'blockquote' , 'br' , 'button' , 'caption' , 'center' , 'cite' ,
12
- 'code' , 'col' , 'colgroup' , 'dd' , 'del' , 'dfn' , 'dir' , 'div' , 'dl' , 'dt' ,
13
- 'em' , 'fieldset' , 'font' , 'form' , 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' ,
14
- 'hr' , 'i' , 'img' , 'input' , 'ins' , 'kbd' , 'label' , 'legend' , 'li' , 'map' ,
15
- 'menu' , 'ol' , 'optgroup' , 'option' , 'p' , 'pre' , 'q' , 's' , 'samp' ,
16
- 'select' , 'small' , 'span' , 'strike' , 'strong' , 'sub' , 'sup' , 'table' ,
17
- 'tbody' , 'td' , 'textarea' , 'tfoot' , 'th' , 'thead' , 'tr' , 'tt' , 'u' ,
18
- 'ul' , 'var' ]
10
+ acceptable_elements = ['a' , 'abbr' , 'acronym' , 'address' , 'area' ,
11
+ 'article' , 'aside' , 'audio' , 'b' , 'big' , 'blockquote' , 'br' , 'button' ,
12
+ 'canvas' , 'caption' , 'center' , 'cite' , 'code' , 'col' , 'colgroup' ,
13
+ 'command' , 'datagrid' , 'datalist' , 'dd' , 'del' , 'details' , 'dfn' ,
14
+ 'dialog' , 'dir' , 'div' , 'dl' , 'dt' , 'em' , 'event-source' , 'fieldset' ,
15
+ 'figure' , 'footer' , 'font' , 'form' , 'header' , 'h1' , 'h2' , 'h3' , 'h4' ,
16
+ 'h5' , 'h6' , 'hr' , 'i' , 'img' , 'input' , 'ins' , 'keygen' , 'kbd' ,
17
+ 'label' , 'legend' , 'li' , 'm' , 'map' , 'menu' , 'meter' , 'multicol' ,
18
+ 'nav' , 'nextid' , 'ol' , 'output' , 'optgroup' , 'option' , 'p' , 'pre' ,
19
+ 'progress' , 'q' , 's' , 'samp' , 'section' , 'select' , 'small' , 'sound' ,
20
+ 'source' , 'spacer' , 'span' , 'strike' , 'strong' , 'sub' , 'sup' , 'table' ,
21
+ 'tbody' , 'td' , 'textarea' , 'time' , 'tfoot' , 'th' , 'thead' , 'tr' , 'tt' ,
22
+ 'u' , 'ul' , 'var' , 'video' ]
19
23
20
24
mathml_elements = ['maction' , 'math' , 'merror' , 'mfrac' , 'mi' ,
21
25
'mmultiscripts' , 'mn' , 'mo' , 'mover' , 'mpadded' , 'mphantom' ,
@@ -24,24 +28,35 @@ class HTMLSanitizerMixin(object):
24
28
'munderover' , 'none' ]
25
29
26
30
svg_elements = ['a' , 'animate' , 'animateColor' , 'animateMotion' ,
27
- 'animateTransform' , 'circle ' , 'defs ' , 'desc ' , 'ellipse ' , 'font-face ' ,
28
- 'font-face- name' , 'font-face-src' , 'g' , 'glyph' , 'hkern' ,
31
+ 'animateTransform' , 'clipPath ' , 'circle ' , 'defs ' , 'desc ' , 'ellipse ' ,
32
+ 'font-face' , 'font-face- name' , 'font-face-src' , 'g' , 'glyph' , 'hkern' ,
29
33
'linearGradient' , 'line' , 'marker' , 'metadata' , 'missing-glyph' ,
30
34
'mpath' , 'path' , 'polygon' , 'polyline' , 'radialGradient' , 'rect' ,
31
35
'set' , 'stop' , 'svg' , 'switch' , 'text' , 'title' , 'tspan' , 'use' ]
32
36
33
37
acceptable_attributes = ['abbr' , 'accept' , 'accept-charset' , 'accesskey' ,
34
- 'action' , 'align' , 'alt' , 'axis' , 'border' , 'cellpadding' ,
35
- 'cellspacing' , 'char' , 'charoff' , 'charset' , 'checked' , 'cite' , 'class' ,
36
- 'clear' , 'cols' , 'colspan' , 'color' , 'compact' , 'coords' , 'datetime' ,
37
- 'dir' , 'disabled' , 'enctype' , 'for' , 'frame' , 'headers' , 'height' ,
38
- 'href' , 'hreflang' , 'hspace' , 'id' , 'ismap' , 'label' , 'lang' ,
39
- 'longdesc' , 'maxlength' , 'media' , 'method' , 'multiple' , 'name' ,
40
- 'nohref' , 'noshade' , 'nowrap' , 'prompt' , 'readonly' , 'rel' , 'rev' ,
41
- 'rows' , 'rowspan' , 'rules' , 'scope' , 'selected' , 'shape' , 'size' ,
42
- 'span' , 'src' , 'start' , 'style' , 'summary' , 'tabindex' , 'target' ,
43
- 'title' , 'type' , 'usemap' , 'valign' , 'value' , 'vspace' , 'width' ,
44
- 'xml:lang' ]
38
+ 'action' , 'align' , 'alt' , 'autocomplete' , 'autofocus' , 'axis' ,
39
+ 'background' , 'balance' , 'bgcolor' , 'bgproperties' , 'border' ,
40
+ 'bordercolor' , 'bordercolordark' , 'bordercolorlight' , 'bottompadding' ,
41
+ 'cellpadding' , 'cellspacing' , 'ch' , 'challenge' , 'char' , 'charoff' ,
42
+ 'choff' , 'charset' , 'checked' , 'cite' , 'class' , 'clear' , 'color' ,
43
+ 'cols' , 'colspan' , 'compact' , 'contenteditable' , 'controls' , 'coords' ,
44
+ 'data' , 'datafld' , 'datapagesize' , 'datasrc' , 'datetime' , 'default' ,
45
+ 'delay' , 'dir' , 'disabled' , 'draggable' , 'dynsrc' , 'enctype' , 'end' ,
46
+ 'face' , 'for' , 'form' , 'frame' , 'galleryimg' , 'gutter' , 'headers' ,
47
+ 'height' , 'hidefocus' , 'hidden' , 'high' , 'href' , 'hreflang' , 'hspace' ,
48
+ 'icon' , 'id' , 'inputmode' , 'ismap' , 'keytype' , 'label' , 'leftspacing' ,
49
+ 'lang' , 'list' , 'longdesc' , 'loop' , 'loopcount' , 'loopend' ,
50
+ 'loopstart' , 'low' , 'lowsrc' , 'max' , 'maxlength' , 'media' , 'method' ,
51
+ 'min' , 'multiple' , 'name' , 'nohref' , 'noshade' , 'nowrap' , 'open' ,
52
+ 'optimum' , 'pattern' , 'ping' , 'point-size' , 'prompt' , 'pqg' ,
53
+ 'radiogroup' , 'readonly' , 'rel' , 'repeat-max' , 'repeat-min' ,
54
+ 'replace' , 'required' , 'rev' , 'rightspacing' , 'rows' , 'rowspan' ,
55
+ 'rules' , 'scope' , 'selected' , 'shape' , 'size' , 'span' , 'src' , 'start' ,
56
+ 'step' , 'style' , 'summary' , 'suppress' , 'tabindex' , 'target' ,
57
+ 'template' , 'title' , 'toppadding' , 'type' , 'unselectable' , 'usemap' ,
58
+ 'urn' , 'valign' , 'value' , 'variable' , 'volume' , 'vspace' , 'vrml' ,
59
+ 'width' , 'wrap' , 'xml:lang' ]
45
60
46
61
mathml_attributes = ['actiontype' , 'align' , 'columnalign' , 'columnalign' ,
47
62
'columnalign' , 'columnlines' , 'columnspacing' , 'columnspan' , 'depth' ,
@@ -54,43 +69,45 @@ class HTMLSanitizerMixin(object):
54
69
'xlink:type' , 'xmlns' , 'xmlns:xlink' ]
55
70
56
71
svg_attributes = ['accent-height' , 'accumulate' , 'additive' , 'alphabetic' ,
57
- 'arabic-form' , 'ascent' , 'attributeName' , 'attributeType' ,
58
- 'baseProfile' , 'bbox' , 'begin' , 'by' , 'calcMode' , 'cap-height' ,
59
- 'class' , 'color' , 'color-rendering' , 'content' , 'cx' , 'cy' , 'd' , 'dx ' ,
60
- ' dy' , 'descent' , 'display' , 'dur' , 'end' , 'fill' , 'fill-opacity ' ,
61
- 'fill-rule ' , 'font-family ' , 'font-size ' , 'font-stretch' , 'font-style ' ,
62
- 'font-variant ' , 'font-weight ' , 'from ' , 'fx ' , 'fy' , 'g1' , 'g2 ' ,
63
- 'glyph-name ' , 'gradientUnits ' , 'hanging ' , 'height ' , 'horiz-adv-x ' ,
64
- 'horiz-origin -x' , 'id ' , 'ideographic ' , 'k ' , 'keyPoints ' ,
65
- 'keySplines' , 'keyTimes' , 'lang' , 'marker-end' , 'marker-mid ' ,
66
- 'marker-start ' , 'markerHeight ' , 'markerUnits ' , 'markerWidth ' ,
67
- 'mathematical' , 'max' , 'min' , 'name' , 'offset' , 'opacity' , 'orient ' ,
68
- 'origin ' , 'overline-position ' , 'overline-thickness ' , 'panose-1 ' ,
69
- 'path ' , 'pathLength ' , 'points ' , 'preserveAspectRatio ' , 'r' , 'refX ' ,
70
- 'refY ' , 'repeatCount ' , 'repeatDur ' , 'requiredExtensions ' ,
71
- 'requiredFeatures ' , 'restart ' , 'rotate ' , 'rx' , 'ry' , 'slope ' ,
72
- 'stemh ' , 'stemv ' , 'stop-color ' , 'stop-opacity ' ,
73
- 'strikethrough-position ' , 'strikethrough-thickness ' , 'stroke ' ,
74
- 'stroke-dasharray' , 'stroke-dashoffset' , 'stroke-linecap' ,
75
- 'stroke-linejoin' , 'stroke-miterlimit' , 'stroke-opacity' ,
76
- 'stroke-width' , 'systemLanguage' , 'target' , 'text-anchor' , 'to' ,
77
- 'transform' , 'type' , 'u1' , 'u2' , 'underline-position' ,
78
- 'underline-thickness' , 'unicode' , 'unicode-range' , 'units-per-em' ,
79
- 'values' , 'version' , 'viewBox' , 'visibility' , 'width' , 'widths' , 'x' ,
80
- 'x-height' , 'x1' , 'x2' , 'xlink:actuate' , 'xlink:arcrole' ,
81
- 'xlink:href' , 'xlink:role' , 'xlink:show' , 'xlink:title' ,
82
- 'xlink:type' , ' xml:base' , 'xml:lang' , 'xml:space' , 'xmlns' ,
83
- 'xmlns:xlink' , 'y' , 'y1' , 'y2' , 'zoomAndPan' ]
72
+ 'arabic-form' , 'ascent' , 'attributeName' , 'attributeType' ,
73
+ 'baseProfile' , 'bbox' , 'begin' , 'by' , 'calcMode' , 'cap-height' ,
74
+ 'class' , 'clip-path' , ' color' , 'color-rendering' , 'content' , 'cx' ,
75
+ 'cy' , 'd' , 'dx' , ' dy' , 'descent' , 'display' , 'dur' , 'end' , 'fill' ,
76
+ 'fill-opacity ' , 'fill-rule ' , 'font-family ' , 'font-size ' ,
77
+ 'font-stretch ' , 'font-style ' , 'font-variant ' , 'font-weight ' , 'from ' ,
78
+ 'fx ' , 'fy ' , 'g1 ' , 'g2 ' , 'glyph-name' , 'gradientUnits' , 'hanging ' ,
79
+ 'height' , 'horiz-adv -x' , 'horiz-origin-x ' , 'id ' , 'ideographic ' , 'k ' ,
80
+ 'keyPoints' , 'keySplines' , 'keyTimes' , 'lang' , 'marker-end' ,
81
+ 'marker-mid ' , 'marker-start ' , 'markerHeight ' , 'markerUnits ' ,
82
+ 'markerWidth' , 'mathematical' , 'max' , 'min' , 'name' , 'offset' ,
83
+ 'opacity ' , 'orient ' , 'origin ' , 'overline-position ' ,
84
+ 'overline-thickness ' , 'panose-1 ' , 'path ' , 'pathLength ' , 'points ' ,
85
+ 'preserveAspectRatio' , 'r ' , 'refX ' , 'refY ' , 'repeatCount ' ,
86
+ 'repeatDur ' , 'requiredExtensions ' , 'requiredFeatures ' , 'restart ' ,
87
+ 'rotate' , 'rx ' , 'ry ' , 'slope ' , 'stemh' , 'stemv' , ' stop-color ' ,
88
+ 'stop-opacity ' , 'strikethrough-position ' , 'strikethrough-thickness ' ,
89
+ 'stroke' , 'stroke-dasharray' , 'stroke-dashoffset' , 'stroke-linecap' ,
90
+ 'stroke-linejoin' , 'stroke-miterlimit' , 'stroke-opacity' ,
91
+ 'stroke-width' , 'systemLanguage' , 'target' , 'text-anchor' , 'to' ,
92
+ 'transform' , 'type' , 'u1' , 'u2' , 'underline-position' ,
93
+ 'underline-thickness' , 'unicode' , 'unicode-range' , 'units-per-em' ,
94
+ 'values' , 'version' , 'viewBox' , 'visibility' , 'width' , 'widths' , 'x' ,
95
+ 'x-height' , 'x1' , 'x2' , 'xlink:actuate' , 'xlink:arcrole' ,
96
+ 'xlink:href' , 'xlink:role' , 'xlink:show' , 'xlink:title' , 'xlink:type ' ,
97
+ ' xml:base' , 'xml:lang' , 'xml:space' , 'xmlns' , 'xmlns:xlink' , 'y ' ,
98
+ 'y1' , 'y2' , 'zoomAndPan' ]
84
99
85
100
attr_val_is_uri = ['href' , 'src' , 'cite' , 'action' , 'longdesc' ,
86
- 'xlink:href' , 'xml:base' ]
101
+ 'xlink:href' , 'xml:base' ]
87
102
88
103
svg_attr_val_allows_ref = ['clip-path' , 'color-profile' , 'cursor' , 'fill' ,
89
- 'filter' , 'marker' , 'marker-start' , 'marker-mid' , 'marker-end' , 'mask' , 'stroke' ]
104
+ 'filter' , 'marker' , 'marker-start' , 'marker-mid' , 'marker-end' ,
105
+ 'mask' , 'stroke' ]
90
106
91
- svg_allow_local_href = ['altGlyph' , 'animate' , 'animateColor' , 'animateMotion' ,
92
- 'animateTransform' , 'cursor' , 'feImage' , 'filter' , 'linearGradient' , 'pattern' ,
93
- 'radialGradient' , 'textpath' , 'tref' , 'set' , 'use' ]
107
+ svg_allow_local_href = ['altGlyph' , 'animate' , 'animateColor' ,
108
+ 'animateMotion' , 'animateTransform' , 'cursor' , 'feImage' , 'filter' ,
109
+ 'linearGradient' , 'pattern' , 'radialGradient' , 'textpath' , 'tref' ,
110
+ 'set' , 'use' ]
94
111
95
112
acceptable_css_properties = ['azimuth' , 'background-color' ,
96
113
'border-bottom-color' , 'border-collapse' , 'border-color' ,
@@ -140,7 +157,13 @@ class HTMLSanitizerMixin(object):
140
157
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
141
158
# => <a>Click here for $100</a>
142
159
def sanitize_token (self , token ):
143
- if token ["type" ] in (tokenTypes ["StartTag" ], tokenTypes ["EndTag" ],
160
+
161
+ # accommodate filters which use token_type differently
162
+ token_type = token ["type" ]
163
+ if token_type in tokenTypes .keys ():
164
+ token_type = tokenTypes [token_type ]
165
+
166
+ if token_type in (tokenTypes ["StartTag" ], tokenTypes ["EndTag" ],
144
167
tokenTypes ["EmptyTag" ]):
145
168
if token ["name" ] in self .allowed_elements :
146
169
if token .has_key ("data" ):
@@ -172,19 +195,24 @@ def sanitize_token(self, token):
172
195
token ["data" ] = [[name ,val ] for name ,val in attrs .items ()]
173
196
return token
174
197
else :
175
- if token [ "type" ] == tokenTypes ["EndTag" ]:
198
+ if token_type == tokenTypes ["EndTag" ]:
176
199
token ["data" ] = "</%s>" % token ["name" ]
177
200
elif token ["data" ]:
178
201
attrs = '' .join ([' %s="%s"' % (k ,escape (v )) for k ,v in token ["data" ]])
179
202
token ["data" ] = "<%s%s>" % (token ["name" ],attrs )
180
203
else :
181
204
token ["data" ] = "<%s>" % token ["name" ]
182
- if token [ "selfClosing" ] :
205
+ if token . get ( "selfClosing" ) :
183
206
token ["data" ]= token ["data" ][:- 1 ] + "/>"
184
- token ["type" ] = tokenTypes ["Characters" ]
207
+
208
+ if token ["type" ] in tokenTypes .keys ():
209
+ token ["type" ] = "Characters"
210
+ else :
211
+ token ["type" ] = tokenTypes ["Characters" ]
212
+
185
213
del token ["name" ]
186
214
return token
187
- elif token [ "type" ] == tokenTypes ["Comment" ]:
215
+ elif token_type == tokenTypes ["Comment" ]:
188
216
pass
189
217
else :
190
218
return token
0 commit comments