Skip to content

Commit dcf4073

Browse files
committed
Encode non-ASCII document IDs in multipart headers with RFC 2047 (issue 179).
1 parent 02af203 commit dcf4073

File tree

2 files changed

+48
-3
lines changed

2 files changed

+48
-3
lines changed

couchdb/multipart.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from base64 import b64encode
1212
from cgi import parse_header
13+
from email import header
1314
try:
1415
from hashlib import md5
1516
except ImportError:
@@ -67,8 +68,13 @@ def _current_part():
6768
if in_headers:
6869
line = line.replace(CRLF, '\n')
6970
if line != '\n':
70-
name, value = line.split(':', 1)
71-
headers[name.lower().strip()] = value.strip()
71+
name, value = [item.strip() for item in line.split(':', 1)]
72+
name = name.lower()
73+
value, charset = header.decode_header(value)[0]
74+
if charset is None:
75+
headers[name] = value
76+
else:
77+
headers[name] = value.decode(charset)
7278
else:
7379
in_headers = False
7480
mimetype, params = parse_header(headers.get('content-type'))
@@ -170,9 +176,12 @@ def _make_boundary(self):
170176
def _write_headers(self, headers):
171177
if headers:
172178
for name in sorted(headers.keys()):
179+
value = headers[name]
180+
if isinstance(value, unicode):
181+
value = str(header.make_header([(value, 'utf-8')]))
173182
self.fileobj.write(name)
174183
self.fileobj.write(': ')
175-
self.fileobj.write(headers[name])
184+
self.fileobj.write(value)
176185
self.fileobj.write(CRLF)
177186
self.fileobj.write(CRLF)
178187

couchdb/tests/multipart.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,23 @@ def test_nested(self):
148148
num += 1
149149
self.assertEqual(num, 3)
150150

151+
def test_unicode_headers(self):
152+
# http://code.google.com/p/couchdb-python/issues/detail?id=179
153+
dump = '''Content-Type: multipart/mixed; boundary="==123456789=="
154+
155+
--==123456789==
156+
Content-ID: =?utf-8?b?5paH5qGj?=
157+
Content-Length: 63
158+
Content-MD5: Cpw3iC3xPua8YzKeWLzwvw==
159+
Content-Type: application/json
160+
161+
{"_rev": "3-bc27b6930ca514527d8954c7c43e6a09", "_id": "文档"}
162+
'''
163+
parts = multipart.read_multipart(StringIO(dump))
164+
for headers, is_multipart, payload in parts:
165+
self.assertEqual(headers['content-id'], u'文档')
166+
break
167+
151168

152169
class WriteMultipartTestCase(unittest.TestCase):
153170

@@ -173,6 +190,25 @@ def test_unicode_content_ascii(self):
173190
self.assertRaises(UnicodeEncodeError, envelope.add,
174191
'text/plain;charset=ascii', u'Iñtërnâtiônàlizætiøn')
175192

193+
def test_unicode_headers(self):
194+
# http://code.google.com/p/couchdb-python/issues/detail?id=179
195+
buf = StringIO()
196+
envelope = multipart.write_multipart(buf, boundary='==123456789==')
197+
envelope.add('application/json',
198+
'{"_rev": "3-bc27b6930ca514527d8954c7c43e6a09",'
199+
' "_id": "文档"}',
200+
headers={'Content-ID': u"文档"})
201+
self.assertEqual('''Content-Type: multipart/mixed; boundary="==123456789=="
202+
203+
--==123456789==
204+
Content-ID: =?utf-8?b?5paH5qGj?=
205+
Content-Length: 63
206+
Content-MD5: Cpw3iC3xPua8YzKeWLzwvw==
207+
Content-Type: application/json
208+
209+
{"_rev": "3-bc27b6930ca514527d8954c7c43e6a09", "_id": "文档"}
210+
''', buf.getvalue().replace('\r\n', '\n'))
211+
176212

177213
def suite():
178214
suite = unittest.TestSuite()

0 commit comments

Comments
 (0)