Skip to content

Commit dc7a9d3

Browse files
committed
Remove replacement characters before looking for forbidden URI schemes in attributes
1 parent b242d88 commit dc7a9d3

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

src/html5lib/sanitizer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ def sanitize_token(self, token):
152152
continue
153153
val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
154154
unescape(attrs[attr])).lower()
155+
#remove replacement characters from unescaped characters
156+
val_unescaped = val_unescaped.replace(u"\ufffd", "")
155157
if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
156158
(val_unescaped.split(':')[0] not in
157159
self.allowed_protocols)):

0 commit comments

Comments
 (0)