|
8 | 8 | from .constants import tokenTypes
|
9 | 9 |
|
10 | 10 |
|
| 11 | +content_type_rgx = re.compile(r''' |
| 12 | + ^ |
| 13 | + # Match a content type <application>/<type> |
| 14 | + (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) |
| 15 | + # Match any character set and encoding |
| 16 | + # Note that this does not prevent the |
| 17 | + # same one being set twice |
| 18 | + # The charset group is currently unused |
| 19 | + (?:;charset=(?P<charset>[-a-zA-Z0-9]+)|;(?P<encoding>base64)){0,2} |
| 20 | + # Match the base64-encoded or urlencoded |
| 21 | + # data |
| 22 | + # The data group is currently unused |
| 23 | + (?P<data>,(?P<base64_encoded_data>[a-zA-Z0-9+/]+=*|(?P<url_encoded_data>[a-zA-Z0-9]+|%[a-fA-F0-9]{2}))) |
| 24 | + $ |
| 25 | + ''', |
| 26 | + re.VERBOSE) |
| 27 | + |
| 28 | + |
11 | 29 | class HTMLSanitizerMixin(object):
|
12 | 30 | """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
|
13 | 31 |
|
@@ -197,24 +215,8 @@ def allowed_token(self, token, token_type):
|
197 | 215 | if uri:
|
198 | 216 | if uri.scheme not in self.allowed_protocols:
|
199 | 217 | del attrs[attr]
|
200 |
| - rgx = re.compile(r''' |
201 |
| - ^ |
202 |
| - # Match a content type <application>/<type> |
203 |
| - (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) |
204 |
| - # Match any character set and encoding |
205 |
| - # Note that this does not prevent the |
206 |
| - # same one being set twice |
207 |
| - # The charset group is currently unused |
208 |
| - (?:;charset=(?P<charset>[-a-zA-Z0-9]+)|;(?P<encoding>base64)){0,2} |
209 |
| - # Match the base64-encoded or urlencoded |
210 |
| - # data |
211 |
| - # The data group is currently unused |
212 |
| - (?P<data>,(?P<base64_encoded_data>[a-zA-Z0-9+/]+=*|(?P<url_encoded_data>[a-zA-Z0-9]+|%[a-fA-F0-9]{2}))) |
213 |
| - $ |
214 |
| - ''', |
215 |
| - re.VERBOSE) |
216 | 218 | if uri.scheme == 'data':
|
217 |
| - m = rgx.match(uri.path) |
| 219 | + m = content_type_rgx.match(uri.path) |
218 | 220 | if not m:
|
219 | 221 | del attrs[attr]
|
220 | 222 | if m.group('content_type') not in self.allowed_content_types:
|
|
0 commit comments