Skip to content

Commit aa9f393

Browse files
committed
Python 3: deal with multipart streams as bytes.
1 parent cd3fccb commit aa9f393

File tree

2 files changed

+44
-37
lines changed

2 files changed

+44
-37
lines changed

couchdb/multipart.py

Lines changed: 43 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
__docformat__ = 'restructuredtext en'
2424

2525

26-
CRLF = '\r\n'
26+
CRLF = b'\r\n'
2727

2828

2929
def read_multipart(fileobj, boundary=None):
@@ -50,16 +50,16 @@ def read_multipart(fileobj, boundary=None):
5050
buf = []
5151
outer = in_headers = boundary is None
5252

53-
next_boundary = boundary and '--' + boundary + '\n' or None
54-
last_boundary = boundary and '--' + boundary + '--\n' or None
53+
next_boundary = boundary and ('--' + boundary + '\n').encode('ascii') or None
54+
last_boundary = boundary and ('--' + boundary + '--\n').encode('ascii') or None
5555

5656
def _current_part():
57-
payload = ''.join(buf)
58-
if payload.endswith('\r\n'):
57+
payload = b''.join(buf)
58+
if payload.endswith(b'\r\n'):
5959
payload = payload[:-2]
60-
elif payload.endswith('\n'):
60+
elif payload.endswith(b'\n'):
6161
payload = payload[:-1]
62-
content_md5 = headers.get('content-md5')
62+
content_md5 = headers.get(b'content-md5')
6363
if content_md5:
6464
h = b64encode(md5(payload).digest())
6565
if content_md5 != h:
@@ -68,11 +68,11 @@ def _current_part():
6868

6969
for line in fileobj:
7070
if in_headers:
71-
line = line.replace(CRLF, '\n')
72-
if line != '\n':
73-
name, value = [item.strip() for item in line.split(':', 1)]
74-
name = name.lower()
75-
value, charset = header.decode_header(value)[0]
71+
line = line.replace(CRLF, b'\n')
72+
if line != b'\n':
73+
name, value = [item.strip() for item in line.split(b':', 1)]
74+
name = name.lower().decode('ascii')
75+
value, charset = header.decode_header(value.decode('utf-8'))[0]
7676
if charset is None:
7777
headers[name] = value
7878
else:
@@ -92,7 +92,7 @@ def _current_part():
9292
yield part
9393
return
9494

95-
elif line.replace(CRLF, '\n') == next_boundary:
95+
elif line.replace(CRLF, b'\n') == next_boundary:
9696
# We've reached the start of a new part, as indicated by the
9797
# boundary
9898
if headers:
@@ -104,7 +104,7 @@ def _current_part():
104104
del buf[:]
105105
in_headers = True
106106

107-
elif line.replace(CRLF, '\n') == last_boundary:
107+
elif line.replace(CRLF, b'\n') == last_boundary:
108108
# We're done with this multipart envelope
109109
break
110110

@@ -130,39 +130,47 @@ def __init__(self, fileobj, headers=None, subtype='mixed', boundary=None):
130130
self._write_headers(headers)
131131

132132
def open(self, headers=None, subtype='mixed', boundary=None):
133-
self.fileobj.write('--')
134-
self.fileobj.write(self.boundary)
133+
self.fileobj.write(b'--')
134+
self.fileobj.write(self.boundary.encode('utf-8'))
135135
self.fileobj.write(CRLF)
136136
return MultipartWriter(self.fileobj, headers=headers, subtype=subtype,
137137
boundary=boundary)
138138

139139
def add(self, mimetype, content, headers=None):
140-
self.fileobj.write('--')
141-
self.fileobj.write(self.boundary)
140+
self.fileobj.write(b'--')
141+
self.fileobj.write(self.boundary.encode('utf-8'))
142142
self.fileobj.write(CRLF)
143143
if headers is None:
144144
headers = {}
145+
146+
ctype, params = parse_header(mimetype)
145147
if isinstance(content, util.utype):
146-
ctype, params = parse_header(mimetype)
147148
if 'charset' in params:
148149
content = content.encode(params['charset'])
149150
else:
150151
content = content.encode('utf-8')
151152
mimetype = mimetype + ';charset=utf-8'
153+
elif 'charset' not in params:
154+
try:
155+
content.decode('utf-8')
156+
finally:
157+
mimetype = mimetype + ';charset=utf-8'
158+
152159
headers['Content-Type'] = mimetype
153160
if content:
154161
headers['Content-Length'] = str(len(content))
155-
headers['Content-MD5'] = b64encode(md5(content).digest())
162+
hash = b64encode(md5(content).digest()).decode('ascii')
163+
headers['Content-MD5'] = hash
156164
self._write_headers(headers)
157165
if content:
158166
# XXX: throw an exception if a boundary appears in the content??
159167
self.fileobj.write(content)
160168
self.fileobj.write(CRLF)
161169

162170
def close(self):
163-
self.fileobj.write('--')
164-
self.fileobj.write(self.boundary)
165-
self.fileobj.write('--')
171+
self.fileobj.write(b'--')
172+
self.fileobj.write(self.boundary.encode('ascii'))
173+
self.fileobj.write(b'--')
166174
self.fileobj.write(CRLF)
167175

168176
def _make_boundary(self):
@@ -171,19 +179,18 @@ def _make_boundary(self):
171179
return '==' + uuid4().hex + '=='
172180
except ImportError:
173181
from random import randrange
174-
token = randrange(sys.maxsize)
175-
format = '%%0%dd' % len(repr(sys.maxsize - 1))
176-
return '===============' + (format % token) + '=='
182+
nonce = ('%%0%dd' % len(repr(sys.maxsize - 1))) % token
183+
return '===============' + nonce + '=='
177184

178185
def _write_headers(self, headers):
179186
if headers:
180187
for name in sorted(headers.keys()):
181188
value = headers[name]
182-
if isinstance(value, util.utype):
183-
value = str(header.make_header([(value, 'utf-8')]))
184-
self.fileobj.write(name)
185-
self.fileobj.write(': ')
186-
self.fileobj.write(value)
189+
if value.encode('ascii', 'ignore') != value.encode('utf-8'):
190+
value = header.make_header([(value, 'utf-8')]).encode()
191+
self.fileobj.write(name.encode('utf-8'))
192+
self.fileobj.write(b': ')
193+
self.fileobj.write(value.encode('utf-8'))
187194
self.fileobj.write(CRLF)
188195
self.fileobj.write(CRLF)
189196

@@ -202,19 +209,19 @@ def write_multipart(fileobj, subtype='mixed', boundary=None):
202209
envelope you call the ``add(mimetype, content, [headers])`` method for
203210
every part, and finally call the ``close()`` method.
204211
205-
>>> from StringIO import StringIO
212+
>>> from couchdb.util import StringIO
206213
207214
>>> buf = StringIO()
208215
>>> envelope = write_multipart(buf, boundary='==123456789==')
209216
>>> envelope.add('text/plain', 'Just testing')
210217
>>> envelope.close()
211-
>>> print(buf.getvalue().replace('\r\n', '\n'))
218+
>>> print(buf.getvalue().replace(b'\r\n', b'\n'))
212219
Content-Type: multipart/mixed; boundary="==123456789=="
213220
<BLANKLINE>
214221
--==123456789==
215222
Content-Length: 12
216223
Content-MD5: nHmX4a6el41B06x2uCpglQ==
217-
Content-Type: text/plain
224+
Content-Type: text/plain;charset=utf-8
218225
<BLANKLINE>
219226
Just testing
220227
--==123456789==--
@@ -233,7 +240,7 @@ def write_multipart(fileobj, subtype='mixed', boundary=None):
233240
>>> part.add('text/plain', 'Just testing')
234241
>>> part.close()
235242
>>> envelope.close()
236-
>>> print(buf.getvalue().replace('\r\n', '\n')) #:doctest +ELLIPSIS
243+
>>> print(buf.getvalue().replace(b'\r\n', b'\n')) #:doctest +ELLIPSIS
237244
Content-Type: multipart/mixed; boundary="==123456789=="
238245
<BLANKLINE>
239246
--==123456789==
@@ -242,7 +249,7 @@ def write_multipart(fileobj, subtype='mixed', boundary=None):
242249
--==abcdefghi==
243250
Content-Length: 12
244251
Content-MD5: nHmX4a6el41B06x2uCpglQ==
245-
Content-Type: text/plain
252+
Content-Type: text/plain;charset=utf-8
246253
<BLANKLINE>
247254
Just testing
248255
--==abcdefghi==--

couchdb/tests/multipart.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def test_unicode_headers(self):
203203
Content-ID: =?utf-8?b?5paH5qGj?=
204204
Content-Length: 63
205205
Content-MD5: Cpw3iC3xPua8YzKeWLzwvw==
206-
Content-Type: application/json
206+
Content-Type: application/json;charset=utf-8
207207
208208
{"_rev": "3-bc27b6930ca514527d8954c7c43e6a09", "_id": "文档"}
209209
''', buf.getvalue().replace('\r\n', '\n'))

0 commit comments

Comments
 (0)