Skip to content

Commit 94aea52

Browse files
author
Gauvain Pocentek
committed
Allow to stream the downloads when appropriate
Some API calls will download possibly large data, resulting in a high memory usage and out-of-memory errors. For these API calls use the requests streaming capabilities and download chunked data. The caller is responsible of providing a callable to actually store the data. The default callable just prints the data on stdout.
1 parent 8e6a944 commit 94aea52

File tree

5 files changed

+75
-23
lines changed

5 files changed

+75
-23
lines changed

docs/gl_objects/builds.py

+13
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,19 @@
7777
build.artifacts()
7878
# end artifacts
7979

80+
# stream artifacts
81+
class Foo(object):
82+
def __init__(self):
83+
self._fd = open('artifacts.zip', 'w')
84+
85+
def __call__(self, chunk):
86+
self._fd.write(chunk)
87+
88+
target = Foo()
89+
build.artifacts(streamed=True, streamed=True, action=target)
90+
del(target) # flushes data on disk
91+
# end stream artifacts
92+
8093
# keep artifacts
8194
build.keep_artifacts()
8295
# end keep artifacts

docs/gl_objects/builds.rst

+12-2
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,16 @@ Get a build artifacts:
116116

117117
.. warning::
118118

119-
Artifacts are entirely stored in memory.
119+
Artifacts are entirely stored in memory in this example.
120+
121+
.. _streaming_example:
122+
123+
You can download artifacts as a stream. Provide a callable to handle the
124+
stream:
125+
126+
.. literalinclude:: builds.py
127+
:start-after: # stream artifacts
128+
:end-before: # end stream artifacts
120129

121130
Mark a build artifact as kept when expiration is set:
122131

@@ -132,7 +141,8 @@ Get a build trace:
132141

133142
.. warning::
134143

135-
Traces are entirely stored in memory.
144+
Traces are entirely stored in memory unless you use the streaming feature.
145+
See :ref:`the artifacts example <streaming_example>`.
136146

137147
Cancel/retry a build:
138148

gitlab/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ def set_credentials(self, email, password):
286286
self.email = email
287287
self.password = password
288288

289-
def _raw_get(self, path, content_type=None, **kwargs):
289+
def _raw_get(self, path, content_type=None, streamed=False, **kwargs):
290290
url = '%s%s' % (self._url, path)
291291
headers = self._create_headers(content_type)
292292
try:
@@ -295,6 +295,7 @@ def _raw_get(self, path, content_type=None, **kwargs):
295295
headers=headers,
296296
verify=self.ssl_verify,
297297
timeout=self.timeout,
298+
stream=streamed,
298299
auth=requests.auth.HTTPBasicAuth(
299300
self.http_username,
300301
self.http_password))

gitlab/objects.py

+33-20
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
import gitlab
3131
from gitlab.exceptions import * # noqa
32+
from gitlab import utils
3233

3334

3435
class jsonEncoder(json.JSONEncoder):
@@ -889,22 +890,31 @@ def keep_artifacts(self, **kwargs):
889890
r = self.gitlab._raw_post(url)
890891
raise_error_from_response(r, GitlabGetError, 200)
891892

892-
def artifacts(self, **kwargs):
893+
def artifacts(self, streamed=False, action=None, chunk_size=1024,
894+
**kwargs):
893895
"""Get the build artifacts.
894896
897+
Args:
898+
streamed (bool): If True the data will be processed by chunks of
899+
`chunk_size` and each chunk is passed to `action` for
900+
treatment.
901+
action (callable): Callable responsible of dealing with chunk of
902+
data.
903+
chunk_size (int): Size of each chunk.
904+
895905
Returns:
896-
str: The artifacts.
906+
str: The artifacts if `streamed` is False, None otherwise.
897907
898908
Raises:
899909
GitlabConnectionError: If the server cannot be reached.
900910
GitlabGetError: If the artifacts are not available.
901911
"""
902912
url = '/projects/%s/builds/%s/artifacts' % (self.project_id, self.id)
903-
r = self.gitlab._raw_get(url)
913+
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
904914
raise_error_from_response(r, GitlabGetError, 200)
905-
return r.content
915+
return utils.response_content(r, streamed, action, chunk_size)
906916

907-
def trace(self, **kwargs):
917+
def trace(self, streamed=False, action=None, chunk_size=1024, **kwargs):
908918
"""Get the build trace.
909919
910920
Returns:
@@ -915,9 +925,9 @@ def trace(self, **kwargs):
915925
GitlabGetError: If the trace is not available.
916926
"""
917927
url = '/projects/%s/builds/%s/trace' % (self.project_id, self.id)
918-
r = self.gitlab._raw_get(url)
928+
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
919929
raise_error_from_response(r, GitlabGetError, 200)
920-
return r.content
930+
return utils.response_content(r, streamed, action, chunk_size)
921931

922932

923933
class ProjectBuildManager(BaseManager):
@@ -972,7 +982,8 @@ def diff(self, **kwargs):
972982

973983
return r.json()
974984

975-
def blob(self, filepath, **kwargs):
985+
def blob(self, filepath, streamed=False, action=None, chunk_size=1024,
986+
**kwargs):
976987
"""Generate the content of a file for this commit.
977988
978989
Args:
@@ -988,10 +999,9 @@ def blob(self, filepath, **kwargs):
988999
url = ('/projects/%(project_id)s/repository/blobs/%(commit_id)s' %
9891000
{'project_id': self.project_id, 'commit_id': self.id})
9901001
url += '?filepath=%s' % filepath
991-
r = self.gitlab._raw_get(url, **kwargs)
1002+
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
9921003
raise_error_from_response(r, GitlabGetError)
993-
994-
return r.content
1004+
return utils.response_content(r, streamed, action, chunk_size)
9951005

9961006
def builds(self, **kwargs):
9971007
"""List the build for this commit.
@@ -1734,7 +1744,8 @@ def blob(self, sha, filepath, **kwargs):
17341744
DeprecationWarning)
17351745
return self.repository_blob(sha, filepath, **kwargs)
17361746

1737-
def repository_blob(self, sha, filepath, **kwargs):
1747+
def repository_blob(self, sha, filepath, streamed=False, action=None,
1748+
chunk_size=1024, **kwargs):
17381749
"""Return the content of a file for a commit.
17391750
17401751
Args:
@@ -1750,11 +1761,12 @@ def repository_blob(self, sha, filepath, **kwargs):
17501761
"""
17511762
url = "/projects/%s/repository/blobs/%s" % (self.id, sha)
17521763
url += '?filepath=%s' % (filepath)
1753-
r = self.gitlab._raw_get(url, **kwargs)
1764+
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
17541765
raise_error_from_response(r, GitlabGetError)
1755-
return r.content
1766+
return utils.response_content(r, streamed, action, chunk_size)
17561767

1757-
def repository_raw_blob(self, sha, **kwargs):
1768+
def repository_raw_blob(self, sha, streamed=False, action=None,
1769+
chunk_size=1024, **kwargs):
17581770
"""Returns the raw file contents for a blob by blob SHA.
17591771
17601772
Args:
@@ -1768,9 +1780,9 @@ def repository_raw_blob(self, sha, **kwargs):
17681780
GitlabGetError: If the server fails to perform the request.
17691781
"""
17701782
url = "/projects/%s/repository/raw_blobs/%s" % (self.id, sha)
1771-
r = self.gitlab._raw_get(url, **kwargs)
1783+
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
17721784
raise_error_from_response(r, GitlabGetError)
1773-
return r.content
1785+
return utils.response_content(r, streamed, action, chunk_size)
17741786

17751787
def repository_compare(self, from_, to, **kwargs):
17761788
"""Returns a diff between two branches/commits.
@@ -1813,7 +1825,8 @@ def archive(self, sha=None, **kwargs):
18131825
DeprecationWarning)
18141826
return self.repository_archive(sha, **kwargs)
18151827

1816-
def repository_archive(self, sha=None, **kwargs):
1828+
def repository_archive(self, sha=None, streamed=False, action=None,
1829+
chunk_size=1024, **kwargs):
18171830
"""Return a tarball of the repository.
18181831
18191832
Args:
@@ -1829,9 +1842,9 @@ def repository_archive(self, sha=None, **kwargs):
18291842
url = '/projects/%s/repository/archive' % self.id
18301843
if sha:
18311844
url += '?sha=%s' % sha
1832-
r = self.gitlab._raw_get(url, **kwargs)
1845+
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
18331846
raise_error_from_response(r, GitlabGetError)
1834-
return r.content
1847+
return utils.response_content(r, streamed, action, chunk_size)
18351848

18361849
def create_file(self, path, branch, content, message, **kwargs):
18371850
"""Creates file in project repository

gitlab/utils.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
class _StdoutStream(object):
2+
def __call__(self, chunk):
3+
print(chunk)
4+
5+
6+
def response_content(response, streamed, action, chunk_size):
7+
if streamed is False:
8+
return response.content
9+
10+
if action is None:
11+
action = _StdoutStream()
12+
13+
for chunk in response.iter_content(chunk_size=chunk_size):
14+
if chunk:
15+
action(chunk)

0 commit comments

Comments
 (0)