-
-
Notifications
You must be signed in to change notification settings - Fork 31.8k
bpo-17088: Fix handling of XML attributes when serializing with default namespace #11050
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a6c78ce
e0d6f6a
5e200cf
34e6b95
71119f5
11c924a
4a07bca
4cf93e1
6bf5eab
d80197e
31c32a3
52c4b8e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -836,63 +836,81 @@ def _get_writer(file_or_filename, encoding): | |
def _namespaces(elem, default_namespace=None): | ||
# identify namespaces used in this tree | ||
|
||
# maps qnames to *encoded* prefix:local names | ||
qnames = {None: None} | ||
# maps qnames to *encoded* prefix:local names, | ||
# value is a pair of prefix:local strings, | ||
# second value is for attribute names | ||
qnames = {None: (None, None)} | ||
|
||
# maps uri:s to prefixes | ||
namespaces = {} | ||
if default_namespace: | ||
namespaces[default_namespace] = "" | ||
|
||
def add_qname(qname): | ||
def serialize_qname(qname, is_attr): | ||
# calculate serialized qname representation | ||
try: | ||
if qname[:1] == "{": | ||
uri, tag = qname[1:].rsplit("}", 1) | ||
if qname[:1] == "{": | ||
uri, local = qname[1:].rsplit("}", 1) | ||
if not is_attr and uri == default_namespace: | ||
prefix = "" | ||
else: | ||
prefix = namespaces.get(uri) | ||
if prefix is None: | ||
prefix = _namespace_map.get(uri) | ||
if prefix is None: | ||
prefix = "ns%d" % len(namespaces) | ||
if prefix != "xml": | ||
namespaces[uri] = prefix | ||
if prefix: | ||
qnames[qname] = "%s:%s" % (prefix, tag) | ||
else: | ||
qnames[qname] = tag # default element | ||
if prefix: | ||
return "%s:%s" % (prefix, local) | ||
else: | ||
if default_namespace: | ||
# FIXME: can this be handled in XML 1.0? | ||
raise ValueError( | ||
"cannot use non-qualified names with " | ||
"default_namespace option" | ||
) | ||
qnames[qname] = qname | ||
except TypeError: | ||
_raise_serialization_error(qname) | ||
return local # default element | ||
else: | ||
if not is_attr and default_namespace: | ||
# FIXME: can this be handled in XML 1.0? | ||
raise ValueError( | ||
"cannot use non-qualified name (<%s>) with " | ||
"default_namespace option" % qname | ||
) | ||
return qname | ||
|
||
def add_qname(qname, is_attr=False): | ||
ser_tag, ser_attr = qnames.get(qname, (None, None)) | ||
if is_attr: | ||
if ser_attr is None: | ||
ser_attr = serialize_qname(qname, True) | ||
if not default_namespace: | ||
ser_tag = ser_attr | ||
Comment on lines
+879
to
+880
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems worth a comment in the code (likewise below). |
||
qnames[qname] = (ser_tag, ser_attr) | ||
else: | ||
if ser_tag is None: | ||
ser_tag = serialize_qname(qname, False) | ||
if not default_namespace: | ||
ser_attr = ser_tag | ||
qnames[qname] = (ser_tag, ser_attr) | ||
|
||
# populate qname and namespaces table | ||
for elem in elem.iter(): | ||
tag = elem.tag | ||
if isinstance(tag, QName): | ||
if tag.text not in qnames: | ||
add_qname(tag.text) | ||
add_qname(tag.text) | ||
elif isinstance(tag, str): | ||
if tag not in qnames: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These conditions might have been there for performance reasons. Not sure if it matters, but that's up to some benchmarking I think. |
||
add_qname(tag) | ||
add_qname(tag) | ||
elif tag is not None and tag is not Comment and tag is not PI: | ||
_raise_serialization_error(tag) | ||
for key, value in elem.items(): | ||
if isinstance(key, QName): | ||
key = key.text | ||
if key not in qnames: | ||
add_qname(key) | ||
if isinstance(value, QName) and value.text not in qnames: | ||
elif not isinstance(key, str): | ||
_raise_serialization_error(key) | ||
add_qname(key, is_attr=True) | ||
if isinstance(value, QName): | ||
add_qname(value.text) | ||
text = elem.text | ||
if isinstance(text, QName) and text.text not in qnames: | ||
if isinstance(text, QName): | ||
add_qname(text.text) | ||
return qnames, namespaces | ||
|
||
prefix_map = {prefix: ns for ns, prefix in namespaces.items()} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prefixes don't have to be globally unique, so this might introduce bugs. (Suggests to me that there might be missing tests.) |
||
if default_namespace: | ||
prefix_map[""] = default_namespace | ||
return qnames, prefix_map | ||
|
||
def _serialize_xml(write, elem, qnames, namespaces, | ||
short_empty_elements, **kwargs): | ||
|
@@ -903,7 +921,7 @@ def _serialize_xml(write, elem, qnames, namespaces, | |
elif tag is ProcessingInstruction: | ||
write("<?%s?>" % text) | ||
else: | ||
tag = qnames[tag] | ||
tag = qnames[tag][0] | ||
if tag is None: | ||
if text: | ||
write(_escape_cdata(text)) | ||
|
@@ -915,8 +933,7 @@ def _serialize_xml(write, elem, qnames, namespaces, | |
items = list(elem.items()) | ||
if items or namespaces: | ||
if namespaces: | ||
for v, k in sorted(namespaces.items(), | ||
key=lambda x: x[1]): # sort on prefix | ||
for k, v in sorted(namespaces.items()): | ||
if k: | ||
k = ":" + k | ||
write(" xmlns%s=\"%s\"" % ( | ||
|
@@ -927,10 +944,10 @@ def _serialize_xml(write, elem, qnames, namespaces, | |
if isinstance(k, QName): | ||
k = k.text | ||
if isinstance(v, QName): | ||
v = qnames[v.text] | ||
v = qnames[v.text][0] | ||
else: | ||
v = _escape_attrib(v) | ||
write(" %s=\"%s\"" % (qnames[k], v)) | ||
write(" %s=\"%s\"" % (qnames[k][1], v)) | ||
if text or len(elem) or not short_empty_elements: | ||
write(">") | ||
if text: | ||
|
@@ -960,7 +977,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs): | |
elif tag is ProcessingInstruction: | ||
write("<?%s?>" % _escape_cdata(text)) | ||
else: | ||
tag = qnames[tag] | ||
tag = qnames[tag][0] | ||
if tag is None: | ||
if text: | ||
write(_escape_cdata(text)) | ||
|
@@ -971,8 +988,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs): | |
items = list(elem.items()) | ||
if items or namespaces: | ||
if namespaces: | ||
for v, k in sorted(namespaces.items(), | ||
key=lambda x: x[1]): # sort on prefix | ||
for k, v in sorted(namespaces.items()): | ||
if k: | ||
k = ":" + k | ||
write(" xmlns%s=\"%s\"" % ( | ||
|
@@ -983,11 +999,11 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs): | |
if isinstance(k, QName): | ||
k = k.text | ||
if isinstance(v, QName): | ||
v = qnames[v.text] | ||
v = qnames[v.text][0] | ||
else: | ||
v = _escape_attrib_html(v) | ||
# FIXME: handle boolean attributes | ||
write(" %s=\"%s\"" % (qnames[k], v)) | ||
write(" %s=\"%s\"" % (qnames[k][1], v)) | ||
write(">") | ||
ltag = tag.lower() | ||
if text: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
ElementTree's serialization of attribute names when a default namespace is | ||
passed was corrected. Previously, unqualified attribute names would be | ||
rejected unnecessarily, while attributes in the default namespace would have | ||
their prefix stripped, which goes against the rules for namespace defaulting | ||
and uniqueness of attributes as specified in the XML namespaces spec. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where's this part gone? Wouldn't this leak a
TypeError
to the user side?