Skip to content

Commit 3c41a21

Browse files
committed
Add a streaming MIME multipart writer, and switch couchdb-dump over to use it. Needs more real-world testing. Should close issue 58.
--HG-- extra : convert_revision : svn%3A7a298fb0-333a-0410-83e7-658617cd9cf3/trunk%40162
1 parent ff63ffb commit 3c41a21

File tree

3 files changed

+151
-22
lines changed

3 files changed

+151
-22
lines changed

ChangeLog.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ http://couchdb-python.googlecode.com/svn/tags/0.6.0
2424
a `ResourceNotFound` exception as advertised (issue 41).
2525
* Added a `delete()` method to the `client.Server` class for consistency
2626
(issue 64).
27+
* The `couchdb-dump` tool now operates in a streaming fashion, writing one
28+
document at a time to the resulting MIME multipart file (issue 58).
2729

2830

2931
Version 0.5

couchdb/multipart.py

Lines changed: 124 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22
#
3-
# Copyright (C) 2008 Christopher Lenz
3+
# Copyright (C) 2008-2009 Christopher Lenz
44
# All rights reserved.
55
#
66
# This software is licensed as described in the file COPYING, which
@@ -9,8 +9,9 @@
99
"""Support for streamed reading and writing of multipart MIME content."""
1010

1111
from cgi import parse_header
12+
import sys
1213

13-
__all__ = ['read_multipart']
14+
__all__ = ['read_multipart', 'write_multipart']
1415
__docformat__ = 'restructuredtext en'
1516

1617

@@ -32,6 +33,7 @@ def read_multipart(fileobj, boundary=None):
3233
automatically from the headers of the outermost multipart
3334
envelope
3435
:return: an iterator over the parts
36+
:since: 0.5
3537
"""
3638
headers = {}
3739
buf = []
@@ -85,5 +87,124 @@ def _current_part():
8587
else:
8688
buf.append(line)
8789

88-
if not outer:
90+
if not outer and headers:
8991
yield _current_part()
92+
93+
94+
class MultipartWriter(object):
95+
96+
def __init__(self, fileobj, headers=None, subtype='mixed', boundary=None):
97+
self.fileobj = fileobj
98+
if boundary is None:
99+
boundary = self._make_boundary()
100+
self.boundary = boundary
101+
if headers is None:
102+
headers = {}
103+
headers['Content-Type'] = 'multipart/%s; boundary="%s"' % (
104+
subtype, self.boundary
105+
)
106+
self._write_headers(headers)
107+
108+
def open(self, headers=None, subtype='mixed', boundary=None):
109+
self.fileobj.write('--%s\r\n' % self.boundary)
110+
return MultipartWriter(self.fileobj, headers=headers, subtype=subtype,
111+
boundary=boundary)
112+
113+
def add(self, mimetype, content, headers=None):
114+
self.fileobj.write('--%s\r\n' % self.boundary)
115+
if headers is None:
116+
headers = {}
117+
headers['Content-Type'] = mimetype
118+
headers['Content-Length'] = len(content)
119+
self._write_headers(headers)
120+
if content:
121+
# XXX: throw an exception if a boundary appears in the content??
122+
self.fileobj.write(content)
123+
self.fileobj.write('\r\n')
124+
125+
def close(self):
126+
self.fileobj.write('--%s--\r\n' % self.boundary)
127+
128+
def _make_boundary():
129+
try:
130+
from uuid import uuid4
131+
return uuid4().hex
132+
except ImportError:
133+
from random import randrange
134+
token = randrange(sys.maxint)
135+
format = '%%0%dd' % len(repr(sys.maxint - 1))
136+
return ('=' * 15) + (fmt % token) + '=='
137+
138+
def _write_headers(self, headers):
139+
if headers:
140+
for name in sorted(headers.keys()):
141+
self.fileobj.write('%s: %s\r\n' % (name, headers[name]))
142+
self.fileobj.write('\r\n')
143+
144+
def __enter__(self):
145+
return self
146+
147+
def __exit__(self, exc_type, exc_val, exc_tb):
148+
self.close()
149+
150+
151+
def write_multipart(fileobj, subtype='mixed', boundary=None):
152+
r"""Simple streaming MIME multipart writer.
153+
154+
This function returns a `MultipartWriter` object that has a few methods to
155+
control the nested MIME parts. For example, to write a flat multipart
156+
envelope you call the ``add(mimetype, content, [headers])`` method for
157+
every part, and finally call the ``close()`` method.
158+
159+
>>> from StringIO import StringIO
160+
161+
>>> buf = StringIO()
162+
>>> envelope = write_multipart(buf, boundary='==123456789==')
163+
>>> envelope.add('text/plain', 'Just testing')
164+
>>> envelope.close()
165+
>>> print buf.getvalue().replace('\r\n', '\n')
166+
Content-Type: multipart/mixed; boundary="==123456789=="
167+
<BLANKLINE>
168+
--==123456789==
169+
Content-Length: 12
170+
Content-Type: text/plain
171+
<BLANKLINE>
172+
Just testing
173+
--==123456789==--
174+
<BLANKLINE>
175+
176+
Note that an explicit boundary is only specified for testing purposes. If
177+
the `boundary` parameter is omitted, the multipart writer will generate a
178+
random string for the boundary.
179+
180+
To write nested structures, call the ``open([headers])`` method on the
181+
respective envelope, and finish each envelope using the ``close()`` method:
182+
183+
>>> buf = StringIO()
184+
>>> envelope = write_multipart(buf, boundary='==123456789==')
185+
>>> part = envelope.open(boundary='==abcdefghi==')
186+
>>> part.add('text/plain', 'Just testing')
187+
>>> part.close()
188+
>>> envelope.close()
189+
>>> print buf.getvalue().replace('\r\n', '\n')
190+
Content-Type: multipart/mixed; boundary="==123456789=="
191+
<BLANKLINE>
192+
--==123456789==
193+
Content-Type: multipart/mixed; boundary="==abcdefghi=="
194+
<BLANKLINE>
195+
--==abcdefghi==
196+
Content-Length: 12
197+
Content-Type: text/plain
198+
<BLANKLINE>
199+
Just testing
200+
--==abcdefghi==--
201+
--==123456789==--
202+
<BLANKLINE>
203+
204+
:param fileobj: a writable file-like object that the output should get
205+
written to
206+
:param subtype: the subtype of the multipart MIME type (e.g. "mixed")
207+
:param boundary: the boundary to use to separate the different parts
208+
:since: 0.6
209+
"""
210+
return MultipartWriter(fileobj, subtype=subtype, boundary=boundary)

couchdb/tools/dump.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,40 +19,47 @@
1919

2020
from couchdb import __version__ as VERSION
2121
from couchdb.client import Database
22+
from couchdb.multipart import write_multipart
2223

2324

24-
def dump_db(dburl, username=None, password=None, boundary=None):
25-
envelope = MIMEMultipart('mixed', boundary)
25+
def dump_db(dburl, username=None, password=None, boundary=None,
26+
output=sys.stdout):
2627
db = Database(dburl)
2728
if username is not None and password is not None:
2829
db.resource.http.add_credentials(username, password)
30+
31+
envelope = write_multipart(output)
32+
#envelope = MIMEMultipart('mixed', boundary)
33+
2934
for docid in db:
3035
doc = db.get(docid, attachments=True)
3136
print>>sys.stderr, 'Dumping document %r' % doc.id
3237
attachments = doc.pop('_attachments', {})
33-
34-
part = MIMEBase('application', 'json')
35-
part.set_payload(json.dumps(doc, sort_keys=True, indent=2))
38+
jsondoc = json.dumps(doc, sort_keys=True, indent=2)
3639

3740
if attachments:
38-
inner = MIMEMultipart('mixed')
39-
inner.attach(part)
41+
inner = envelope.start({
42+
'Content-ID': doc.id,
43+
'ETag': '"%s"' % doc.rev
44+
})
45+
part = inner.add('application/json', jsondoc)
46+
4047
for name, info in attachments.items():
4148
content_type = info.get('content_type')
4249
if content_type is None: # CouchDB < 0.8
4350
content_type = info.get('content-type')
44-
maintype, subtype = content_type.split('/', 1)
45-
subpart = MIMEBase(maintype, subtype)
46-
subpart['Content-ID'] = name
47-
subpart.set_payload(b64decode(info['data']))
48-
inner.attach(subpart)
49-
part = inner
51+
subpart = inner.add(content_type, b64decode(info['data']), {
52+
'Content-ID': name
53+
})
54+
inner.end()
5055

51-
part['Content-ID'] = doc.id
52-
part['ETag'] = '"%s"' % doc.rev
56+
else:
57+
part = envelope.add('application/json', jsondoc, {
58+
'Content-ID': doc.id,
59+
'ETag': '"%s"' % doc.rev
60+
}, )
5361

54-
envelope.attach(part)
55-
return envelope.as_string()
62+
envelope.end()
5663

5764

5865
def main():
@@ -67,8 +74,7 @@ def main():
6774
if len(args) != 1:
6875
return parser.error('incorrect number of arguments')
6976

70-
print dump_db(args[0], username=options.username,
71-
password=options.password)
77+
dump_db(args[0], username=options.username, password=options.password)
7278

7379

7480
if __name__ == '__main__':

0 commit comments

Comments
 (0)