Skip to content

Commit effb919

Browse files
committed
Upgrade joblib to 0.9.4 to fix important bugs
1 parent f2ac3db commit effb919

File tree

11 files changed

+295
-162
lines changed

11 files changed

+295
-162
lines changed

sklearn/externals/joblib/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
>>> c = square(a)
7575
>>> # The above call did not trigger an evaluation
7676
77-
2) **Embarrassingly parallel helper:** to make is easy to write readable
77+
2) **Embarrassingly parallel helper:** to make it easy to write readable
7878
parallel code and debug it quickly::
7979
8080
>>> from sklearn.externals.joblib import Parallel, delayed
@@ -86,7 +86,7 @@
8686
3) **Logging/tracing:** The different functionalities will
8787
progressively acquire better logging mechanism to help track what
8888
has been ran, and capture I/O easily. In addition, Joblib will
89-
provide a few I/O primitives, to easily define define logging and
89+
provide a few I/O primitives, to easily define logging and
9090
display streams, and provide a way of compiling a report.
9191
We want to be able to quickly inspect what has been run.
9292
@@ -115,7 +115,7 @@
115115
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
116116
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
117117
#
118-
__version__ = '0.9.3'
118+
__version__ = '0.9.4'
119119

120120

121121
from .memory import Memory, MemorizedResult

sklearn/externals/joblib/_compat.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""
22
Compatibility layer for Python 3/Python 2 single codebase
33
"""
4+
import sys
5+
6+
PY3_OR_LATER = sys.version_info[0] >= 3
47

58
try:
69
_basestring = basestring
710
_bytes_or_unicode = (str, unicode)
811
except NameError:
912
_basestring = str
10-
_bytes_or_unicode = (bytes, str)
13+
_bytes_or_unicode = (bytes, str)

sklearn/externals/joblib/format_stack.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,14 @@
2828
import time
2929
import tokenize
3030
import traceback
31-
import types
31+
3232
try: # Python 2
3333
generate_tokens = tokenize.generate_tokens
3434
except AttributeError: # Python 3
3535
generate_tokens = tokenize.tokenize
3636

37-
PY3 = (sys.version[0] == '3')
3837
INDENT = ' ' * 8
3938

40-
from ._compat import _basestring
4139

4240
###############################################################################
4341
# some internal-use functions
@@ -195,14 +193,13 @@ def format_records(records): # , print_globals=False):
195193
# the abspath call will throw an OSError. Just ignore it and
196194
# keep the original file string.
197195
pass
196+
197+
if file.endswith('.pyc'):
198+
file = file[:-4] + '.py'
199+
198200
link = file
199-
try:
200-
args, varargs, varkw, locals = inspect.getargvalues(frame)
201-
except:
202-
# This can happen due to a bug in python2.3. We should be
203-
# able to remove this try/except when 2.4 becomes a
204-
# requirement. Bug details at http://python.org/sf/1005466
205-
print("\nJoblib's exception reporting continues...\n")
201+
202+
args, varargs, varkw, locals = inspect.getargvalues(frame)
206203

207204
if func == '?':
208205
call = ''
@@ -350,13 +347,11 @@ def format_exc(etype, evalue, etb, context=5, tb_offset=0):
350347
date = time.ctime(time.time())
351348
pid = 'PID: %i' % os.getpid()
352349

353-
head = '%s%s%s\n%s%s%s' % (etype, ' ' * (75 - len(str(etype)) - len(date)),
354-
date, pid, ' ' * (75 - len(str(pid)) - len(pyver)),
355-
pyver)
350+
head = '%s%s%s\n%s%s%s' % (
351+
etype, ' ' * (75 - len(str(etype)) - len(date)),
352+
date, pid, ' ' * (75 - len(str(pid)) - len(pyver)),
353+
pyver)
356354

357-
# Flush cache before calling inspect. This helps alleviate some of the
358-
# problems with python 2.3's inspect.py.
359-
linecache.checkcache()
360355
# Drop topmost frames if requested
361356
try:
362357
records = _fixed_getframes(etb, context, tb_offset)

sklearn/externals/joblib/func_inspect.py

Lines changed: 84 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from ._compat import _basestring
1616
from .logger import pformat
1717
from ._memory_helpers import open_py_source
18+
from ._compat import PY3_OR_LATER
19+
1820

1921
def get_func_code(func):
2022
""" Attempts to retrieve a reliable function code hash.
@@ -156,6 +158,53 @@ def get_func_name(func, resolv_alias=True, win_characters=True):
156158
return module, name
157159

158160

161+
def getfullargspec(func):
162+
"""Compatibility function to provide inspect.getfullargspec in Python 2
163+
164+
This should be rewritten using a backport of Python 3 signature
165+
once we drop support for Python 2.6. We went for a simpler
166+
approach at the time of writing because signature uses OrderedDict
167+
which is not available in Python 2.6.
168+
"""
169+
try:
170+
return inspect.getfullargspec(func)
171+
except AttributeError:
172+
arg_spec = inspect.getargspec(func)
173+
import collections
174+
tuple_fields = ('args varargs varkw defaults kwonlyargs '
175+
'kwonlydefaults annotations')
176+
tuple_type = collections.namedtuple('FullArgSpec', tuple_fields)
177+
178+
return tuple_type(args=arg_spec.args,
179+
varargs=arg_spec.varargs,
180+
varkw=arg_spec.keywords,
181+
defaults=arg_spec.defaults,
182+
kwonlyargs=[],
183+
kwonlydefaults=None,
184+
annotations={})
185+
186+
187+
def _signature_str(function_name, arg_spec):
188+
"""Helper function to output a function signature"""
189+
# inspect.formatargspec can not deal with the same
190+
# number of arguments in python 2 and 3
191+
arg_spec_for_format = arg_spec[:7 if PY3_OR_LATER else 4]
192+
193+
arg_spec_str = inspect.formatargspec(*arg_spec_for_format)
194+
return '{0}{1}'.format(function_name, arg_spec_str)
195+
196+
197+
def _function_called_str(function_name, args, kwargs):
198+
"""Helper function to output a function call"""
199+
template_str = '{0}({1}, {2})'
200+
201+
args_str = repr(args)[1:-1]
202+
kwargs_str = ', '.join('%s=%s' % (k, v)
203+
for k, v in kwargs.items())
204+
return template_str.format(function_name, args_str,
205+
kwargs_str)
206+
207+
159208
def filter_args(func, ignore_lst, args=(), kwargs=dict()):
160209
""" Filters the given args and kwargs using a list of arguments to
161210
ignore, and a function specification.
@@ -180,24 +229,23 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
180229
args = list(args)
181230
if isinstance(ignore_lst, _basestring):
182231
# Catch a common mistake
183-
raise ValueError('ignore_lst must be a list of parameters to ignore '
232+
raise ValueError(
233+
'ignore_lst must be a list of parameters to ignore '
184234
'%s (type %s) was given' % (ignore_lst, type(ignore_lst)))
185235
# Special case for functools.partial objects
186236
if (not inspect.ismethod(func) and not inspect.isfunction(func)):
187237
if ignore_lst:
188238
warnings.warn('Cannot inspect object %s, ignore list will '
189-
'not work.' % func, stacklevel=2)
239+
'not work.' % func, stacklevel=2)
190240
return {'*': args, '**': kwargs}
191-
arg_spec = inspect.getargspec(func)
192-
# We need to if/them to account for different versions of Python
193-
if hasattr(arg_spec, 'args'):
194-
arg_names = arg_spec.args
195-
arg_defaults = arg_spec.defaults
196-
arg_keywords = arg_spec.keywords
197-
arg_varargs = arg_spec.varargs
198-
else:
199-
arg_names, arg_varargs, arg_keywords, arg_defaults = arg_spec
200-
arg_defaults = arg_defaults or {}
241+
arg_spec = getfullargspec(func)
242+
arg_names = arg_spec.args + arg_spec.kwonlyargs
243+
arg_defaults = arg_spec.defaults or ()
244+
arg_defaults = arg_defaults + tuple(arg_spec.kwonlydefaults[k]
245+
for k in arg_spec.kwonlyargs)
246+
arg_varargs = arg_spec.varargs
247+
arg_varkw = arg_spec.varkw
248+
201249
if inspect.ismethod(func):
202250
# First argument is 'self', it has been removed by Python
203251
# we need to add it back:
@@ -211,7 +259,18 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
211259
for arg_position, arg_name in enumerate(arg_names):
212260
if arg_position < len(args):
213261
# Positional argument or keyword argument given as positional
214-
arg_dict[arg_name] = args[arg_position]
262+
if arg_name not in arg_spec.kwonlyargs:
263+
arg_dict[arg_name] = args[arg_position]
264+
else:
265+
raise ValueError(
266+
"Keyword-only parameter '%s' was passed as "
267+
'positional parameter for %s:\n'
268+
' %s was called.'
269+
% (arg_name,
270+
_signature_str(name, arg_spec),
271+
_function_called_str(name, args, kwargs))
272+
)
273+
215274
else:
216275
position = arg_position - len(arg_names)
217276
if arg_name in kwargs:
@@ -221,28 +280,24 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
221280
arg_dict[arg_name] = arg_defaults[position]
222281
except (IndexError, KeyError):
223282
# Missing argument
224-
raise ValueError('Wrong number of arguments for %s%s:\n'
225-
' %s(%s, %s) was called.'
226-
% (name,
227-
inspect.formatargspec(*inspect.getargspec(func)),
228-
name,
229-
repr(args)[1:-1],
230-
', '.join('%s=%s' % (k, v)
231-
for k, v in kwargs.items())
232-
)
233-
)
283+
raise ValueError(
284+
'Wrong number of arguments for %s:\n'
285+
' %s was called.'
286+
% (_signature_str(name, arg_spec),
287+
_function_called_str(name, args, kwargs))
288+
)
234289

235290
varkwargs = dict()
236291
for arg_name, arg_value in sorted(kwargs.items()):
237292
if arg_name in arg_dict:
238293
arg_dict[arg_name] = arg_value
239-
elif arg_keywords is not None:
294+
elif arg_varkw is not None:
240295
varkwargs[arg_name] = arg_value
241296
else:
242297
raise TypeError("Ignore list for %s() contains an unexpected "
243298
"keyword argument '%s'" % (name, arg_name))
244299

245-
if arg_keywords is not None:
300+
if arg_varkw is not None:
246301
arg_dict['**'] = varkwargs
247302
if arg_varargs is not None:
248303
varargs = args[arg_position + 1:]
@@ -254,13 +309,10 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
254309
arg_dict.pop(item)
255310
else:
256311
raise ValueError("Ignore list: argument '%s' is not defined for "
257-
"function %s%s" %
258-
(item, name,
259-
inspect.formatargspec(arg_names,
260-
arg_varargs,
261-
arg_keywords,
262-
arg_defaults,
263-
)))
312+
"function %s"
313+
% (item,
314+
_signature_str(name, arg_spec))
315+
)
264316
# XXX: Return a sorted list of pairs?
265317
return arg_dict
266318

sklearn/externals/joblib/hashing.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,17 @@
77
# Copyright (c) 2009 Gael Varoquaux
88
# License: BSD Style, 3 clauses.
99

10-
import warnings
1110
import pickle
1211
import hashlib
1312
import sys
1413
import types
1514
import struct
16-
from ._compat import _bytes_or_unicode
17-
1815
import io
1916

20-
PY3 = sys.version[0] == '3'
17+
from ._compat import _bytes_or_unicode, PY3_OR_LATER
18+
2119

22-
if PY3:
20+
if PY3_OR_LATER:
2321
Pickler = pickle._Pickler
2422
else:
2523
Pickler = pickle.Pickler
@@ -30,7 +28,17 @@ class _ConsistentSet(object):
3028
whatever the order of its items.
3129
"""
3230
def __init__(self, set_sequence):
33-
self._sequence = sorted(set_sequence)
31+
# Forces order of elements in set to ensure consistent hash.
32+
try:
33+
# Trying first to order the set assuming the type of elements is
34+
# consistent and orderable.
35+
# This fails on python 3 when elements are unorderable
36+
# but we keep it in a try as it's faster.
37+
self._sequence = sorted(set_sequence)
38+
except TypeError:
39+
# If elements are unorderable, sorting them using their hash.
40+
# This is slower but works in any case.
41+
self._sequence = sorted((hash(e) for e in set_sequence))
3442

3543

3644
class _MyHash(object):
@@ -49,7 +57,7 @@ def __init__(self, hash_name='md5'):
4957
self.stream = io.BytesIO()
5058
# By default we want a pickle protocol that only changes with
5159
# the major python version and not the minor one
52-
protocol = (pickle.DEFAULT_PROTOCOL if PY3
60+
protocol = (pickle.DEFAULT_PROTOCOL if PY3_OR_LATER
5361
else pickle.HIGHEST_PROTOCOL)
5462
Pickler.__init__(self, self.stream, protocol=protocol)
5563
# Initialise the hash obj
@@ -59,7 +67,8 @@ def hash(self, obj, return_digest=True):
5967
try:
6068
self.dump(obj)
6169
except pickle.PicklingError as e:
62-
warnings.warn('PicklingError while hashing %r: %r' % (obj, e))
70+
e.args += ('PicklingError while hashing %r: %r' % (obj, e),)
71+
raise
6372
dumps = self.stream.getvalue()
6473
self._hash.update(dumps)
6574
if return_digest:
@@ -128,8 +137,18 @@ def save_global(self, obj, name=None, pack=struct.pack):
128137
dispatch[type(pickle.dump)] = save_global
129138

130139
def _batch_setitems(self, items):
131-
# forces order of keys in dict to ensure consistent hash
132-
Pickler._batch_setitems(self, iter(sorted(items)))
140+
# forces order of keys in dict to ensure consistent hash.
141+
try:
142+
# Trying first to compare dict assuming the type of keys is
143+
# consistent and orderable.
144+
# This fails on python 3 when keys are unorderable
145+
# but we keep it in a try as it's faster.
146+
Pickler._batch_setitems(self, iter(sorted(items)))
147+
except TypeError:
148+
# If keys are unorderable, sorting them using their hash. This is
149+
# slower but works in any case.
150+
Pickler._batch_setitems(self, iter(sorted((hash(k), v)
151+
for k, v in items)))
133152

134153
def save_set(self, set_items):
135154
# forces order of items in Set to ensure consistent hash

sklearn/externals/joblib/memory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from .logger import Logger, format_time, pformat
3737
from . import numpy_pickle
3838
from .disk import mkdirp, rm_subdirs
39-
from ._compat import _basestring
39+
from ._compat import _basestring, PY3_OR_LATER
4040

4141
FIRST_LINE_TEXT = "# first line:"
4242

@@ -547,7 +547,7 @@ def _write_func_code(self, filename, func_code, first_line):
547547
out.write(func_code)
548548
# Also store in the in-memory store of function hashes
549549
is_named_callable = False
550-
if sys.version_info[0] > 2:
550+
if PY3_OR_LATER:
551551
is_named_callable = (hasattr(self.func, '__name__')
552552
and self.func.__name__ != '<lambda>')
553553
else:

0 commit comments

Comments
 (0)