Skip to content

Commit 58850d6

Browse files
committed
Merge pull request kyrus#29 from desolat/master
Handle non-ASCII characters correctly
2 parents 3bd9c1e + 3c6489b commit 58850d6

File tree

2 files changed

+203
-45
lines changed

2 files changed

+203
-45
lines changed

junit_xml/__init__.py

+79-28
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
#!/usr/bin/env python
2+
# -*- coding: UTF-8 -*-
23
from collections import defaultdict
34
import sys
45
import re
56
import xml.etree.ElementTree as ET
67
import xml.dom.minidom
78

8-
from six import u, iteritems
9+
from six import u, iteritems, PY2
910

1011
try:
1112
# Python 2
@@ -53,8 +54,29 @@
5354
"""
5455

5556

57+
def decode(var, encoding):
58+
'''
59+
If not already unicode, decode it.
60+
'''
61+
if PY2:
62+
if isinstance(var, unicode):
63+
ret = var
64+
elif isinstance(var, str):
65+
if encoding:
66+
ret = var.decode(encoding)
67+
else:
68+
ret = unicode(var)
69+
else:
70+
ret = unicode(var)
71+
else:
72+
ret = str(var)
73+
return ret
74+
75+
5676
class TestSuite(object):
57-
"""Suite of test cases"""
77+
'''Suite of test cases.
78+
Can handle unicode strings or binary strings if their encoding is provided.
79+
'''
5880

5981
def __init__(self, name, test_cases=None, hostname=None, id=None,
6082
package=None, timestamp=None, properties=None):
@@ -72,11 +94,18 @@ def __init__(self, name, test_cases=None, hostname=None, id=None,
7294
self.timestamp = timestamp
7395
self.properties = properties
7496

75-
def build_xml_doc(self):
76-
"""Builds the XML document for the JUnit test suite"""
97+
98+
def build_xml_doc(self, encoding=None):
99+
'''
100+
Builds the XML document for the JUnit test suite.
101+
Produces clean unicode strings and decodes non-unicode with the help of encoding.
102+
@param encoding: Used to decode encoded strings.
103+
@return: XML document with unicode string elements
104+
'''
105+
77106
# build the test suite element
78107
test_suite_attributes = dict()
79-
test_suite_attributes['name'] = str(self.name)
108+
test_suite_attributes['name'] = decode(self.name, encoding)
80109
test_suite_attributes['failures'] = \
81110
str(len([c for c in self.test_cases if c.is_failure()]))
82111
test_suite_attributes['errors'] = \
@@ -88,31 +117,31 @@ def build_xml_doc(self):
88117
test_suite_attributes['tests'] = str(len(self.test_cases))
89118

90119
if self.hostname:
91-
test_suite_attributes['hostname'] = str(self.hostname)
120+
test_suite_attributes['hostname'] = decode(self.hostname, encoding)
92121
if self.id:
93-
test_suite_attributes['id'] = str(self.id)
122+
test_suite_attributes['id'] = decode(self.id, encoding)
94123
if self.package:
95-
test_suite_attributes['package'] = str(self.package)
124+
test_suite_attributes['package'] = decode(self.package, encoding)
96125
if self.timestamp:
97-
test_suite_attributes['timestamp'] = str(self.timestamp)
126+
test_suite_attributes['timestamp'] = decode(self.timestamp, encoding)
98127

99128
xml_element = ET.Element("testsuite", test_suite_attributes)
100129

101130
# add any properties
102131
if self.properties:
103132
props_element = ET.SubElement(xml_element, "properties")
104133
for k, v in self.properties.items():
105-
attrs = {'name': str(k), 'value': str(v)}
134+
attrs = {'name': decode(k, encoding), 'value': decode(v, encoding)}
106135
ET.SubElement(props_element, "property", attrs)
107136

108137
# test cases
109138
for case in self.test_cases:
110139
test_case_attributes = dict()
111-
test_case_attributes['name'] = str(case.name)
140+
test_case_attributes['name'] = decode(case.name, encoding)
112141
if case.elapsed_sec:
113142
test_case_attributes['time'] = "%f" % case.elapsed_sec
114143
if case.classname:
115-
test_case_attributes['classname'] = str(case.classname)
144+
test_case_attributes['classname'] = decode(case.classname, encoding)
116145

117146
test_case_element = ET.SubElement(
118147
xml_element, "testcase", test_case_attributes)
@@ -121,49 +150,53 @@ def build_xml_doc(self):
121150
if case.is_failure():
122151
attrs = {'type': 'failure'}
123152
if case.failure_message:
124-
attrs['message'] = case.failure_message
153+
attrs['message'] = decode(case.failure_message, encoding)
125154
failure_element = ET.Element("failure", attrs)
126155
if case.failure_output:
127-
failure_element.text = case.failure_output
156+
failure_element.text = decode(case.failure_output, encoding)
128157
test_case_element.append(failure_element)
129158

130159
# errors
131160
if case.is_error():
132161
attrs = {'type': 'error'}
133162
if case.error_message:
134-
attrs['message'] = case.error_message
163+
attrs['message'] = decode(case.error_message, encoding)
135164
error_element = ET.Element("error", attrs)
136165
if case.error_output:
137-
error_element.text = case.error_output
166+
error_element.text = decode(case.error_output, encoding)
138167
test_case_element.append(error_element)
139168

140169
# skippeds
141170
if case.is_skipped():
142171
attrs = {'type': 'skipped'}
143172
if case.skipped_message:
144-
attrs['message'] = case.skipped_message
173+
attrs['message'] = decode(case.skipped_message, encoding)
145174
skipped_element = ET.Element("skipped", attrs)
146175
if case.skipped_output:
147-
skipped_element.text = case.skipped_output
176+
skipped_element.text = decode(case.skipped_output, encoding)
148177
test_case_element.append(skipped_element)
149178

150179
# test stdout
151180
if case.stdout:
152181
stdout_element = ET.Element("system-out")
153-
stdout_element.text = case.stdout
182+
stdout_element.text = decode(case.stdout, encoding)
154183
test_case_element.append(stdout_element)
155184

156185
# test stderr
157186
if case.stderr:
158187
stderr_element = ET.Element("system-err")
159-
stderr_element.text = case.stderr
188+
stderr_element.text = decode(case.stderr, encoding)
160189
test_case_element.append(stderr_element)
161190

162191
return xml_element
163192

164193
@staticmethod
165194
def to_xml_string(test_suites, prettyprint=True, encoding=None):
166-
"""Returns the string representation of the JUnit XML document"""
195+
'''Returns the string representation of the JUnit XML document.
196+
@param encoding: The encoding of the input.
197+
@return: unicode string
198+
'''
199+
167200
try:
168201
iter(test_suites)
169202
except TypeError:
@@ -172,7 +205,7 @@ def to_xml_string(test_suites, prettyprint=True, encoding=None):
172205
xml_element = ET.Element("testsuites")
173206
attributes = defaultdict(int)
174207
for ts in test_suites:
175-
ts_xml = ts.build_xml_doc()
208+
ts_xml = ts.build_xml_doc(encoding=encoding)
176209
for key in ['failures', 'errors', 'skipped', 'tests']:
177210
attributes[key] += int(ts_xml.get(key, 0))
178211
for key in ['time']:
@@ -182,23 +215,41 @@ def to_xml_string(test_suites, prettyprint=True, encoding=None):
182215
xml_element.set(key, str(value))
183216

184217
xml_string = ET.tostring(xml_element, encoding=encoding)
218+
# is encoded now
185219
xml_string = TestSuite._clean_illegal_xml_chars(
186220
xml_string.decode(encoding or 'utf-8'))
221+
# is unicode now
187222

188223
if prettyprint:
189-
xml_string = xml.dom.minidom.parseString(xml_string).toprettyxml()
224+
# minidom.parseString() works just on correctly encoded binary strings
225+
xml_string = xml_string.encode(encoding or 'utf-8')
226+
xml_string = xml.dom.minidom.parseString(xml_string)
227+
# toprettyxml() produces unicode if no encoding is being passed or binary string with an encoding
228+
xml_string = xml_string.toprettyxml(encoding=encoding)
229+
if encoding:
230+
xml_string = xml_string.decode(encoding)
231+
# is unicode now
190232
return xml_string
191233

192234
@staticmethod
193235
def to_file(file_descriptor, test_suites, prettyprint=True, encoding=None):
194-
"""Writes the JUnit XML document to file"""
195-
file_descriptor.write(
196-
TestSuite.to_xml_string(test_suites, prettyprint, encoding))
236+
'''
237+
Writes the JUnit XML document to a file.
238+
'''
239+
xml_string = TestSuite.to_xml_string(
240+
test_suites, prettyprint=prettyprint, encoding=encoding)
241+
# has problems with encoded str with non-ASCII (non-default-encoding) characters!
242+
file_descriptor.write(xml_string)
243+
197244

198245
@staticmethod
199246
def _clean_illegal_xml_chars(string_to_clean):
200-
"""Removes any illegal unicode characters from the given XML string"""
201-
# see https://stackoverflow.com/q/1707890
247+
'''
248+
Removes any illegal unicode characters from the given XML string.
249+
250+
@see: http://stackoverflow.com/questions/1707890/fast-way-to-filter-illegal-xml-unicode-chars-in-python
251+
'''
252+
202253
illegal_unichrs = [
203254
(0x00, 0x08), (0x0B, 0x1F), (0x7F, 0x84), (0x86, 0x9F),
204255
(0xD800, 0xDFFF), (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF),

0 commit comments

Comments
 (0)