Skip to content

Commit be6230b

Browse files
committed
fixup! Fix #11, #12: quote attributes that need escaping in legacy browsers
1 parent fe1baa8 commit be6230b

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

html5lib/serializer/htmlserializer.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010

1111
spaceCharacters = "".join(spaceCharacters)
1212

13-
quoteAttributeSpec = re.compile("[" + spaceCharacters + "\"'=<>`]")
14-
quoteAttributeLegacy = re.compile("[\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
13+
quoteAttributeSpecChars = spaceCharacters + "\"'=<>`"
14+
quoteAttributeSpec = re.compile("[" + quoteAttributeSpecChars + "]")
15+
quoteAttributeLegacy = re.compile("[" + quoteAttributeSpecChars +
16+
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
1517
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
1618
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
1719
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
@@ -79,7 +81,7 @@ def htmlentityreplace_errors(exc):
7981
class HTMLSerializer(object):
8082

8183
# attribute quoting options
82-
quote_attr_values = "legacy"
84+
quote_attr_values = "legacy" # be secure by default
8385
quote_char = '"'
8486
use_best_quote_char = True
8587

@@ -115,9 +117,9 @@ def __init__(self, **kwargs):
115117
inject_meta_charset=True|False
116118
Whether it insert a meta element to define the character set of the
117119
document.
118-
quote_attr_values="legacy"|"spec"|True
120+
quote_attr_values="legacy"|"spec"|"always"
119121
Whether to quote attribute values that don't require quoting
120-
per legacy browser behaviour, HTML authoring rules, or always.
122+
per legacy browser behaviour, when required by the standard, or always.
121123
quote_char=u'"'|u"'"
122124
Use given quote character for attribute quoting. Default is to
123125
use double quote unless attribute value contains a double quote,
@@ -246,10 +248,15 @@ def serialize(self, treewalker, encoding=None):
246248
(k not in booleanAttributes.get(name, tuple()) and
247249
k not in booleanAttributes.get("", tuple())):
248250
yield self.encodeStrict("=")
249-
if self.quote_attr_values or len(v) == 0:
251+
if self.quote_attr_values == "always" or len(v) == 0:
250252
quote_attr = True
251-
elif :
252-
quoteAttributeSpec.search(v)
253+
elif self.quote_attr_values == "spec":
254+
quote_attr = quoteAttributeSpec.search(v) is not None
255+
elif self.quote_attr_values == "legacy":
256+
quote_attr = quoteAttributeLegacy.search(v) is not None
257+
else:
258+
raise ValueError("quote_attr_values must be one of: "
259+
"'always', 'spec', or 'legacy'")
253260
v = v.replace("&", "&amp;")
254261
if self.escape_lt_in_attrs:
255262
v = v.replace("<", "&lt;")

0 commit comments

Comments
 (0)