scikit-learn · ogrisel · Jul 17, 2018 · Jul 10, 2018 · Jul 10, 2018 · Jul 10, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -38,13 +38,15 @@ matrix:
            NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2"
            PILLOW_VERSION="4.0.0" COVERAGE=true
       if: type != cron
-    # This environment tests the newest supported Anaconda release (5.0.0)
-    # It also runs tests requiring Pandas and PyAMG
+    # This environment tests the newest supported Anaconda release.
+    # It runs tests requiring pandas and PyAMG.
+    # It also runs with the site joblib instead of the vendored copy of joblib.
     - env: DISTRIB="conda" PYTHON_VERSION="3.6.2" INSTALL_MKL="true"
            NUMPY_VERSION="1.14.2" SCIPY_VERSION="1.0.0" PANDAS_VERSION="0.20.3"
            CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" PILLOW_VERSION="4.3.0"
-           COVERAGE=true
+           JOBLIB_VERSION="0.12.0" COVERAGE=true
            CHECK_PYTEST_SOFT_DEPENDENCY="true" TEST_DOCSTRINGS="true"
+           SKLEARN_SITE_JOBLIB=1
       if: type != cron
     # flake8 linting on diff wrt common ancestor with upstream/master
     - env: RUN_FLAKE8="true" SKIP_TESTS="true"

diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py
@@ -59,7 +59,7 @@
 from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
 from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.metrics import zero_one_loss
-from sklearn.externals.joblib import Memory
+from sklearn.utils import Memory
 from sklearn.utils import check_array
 
 # Memoize the data extraction and memory map the resulting

diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py
@@ -41,7 +41,7 @@
 from sklearn.ensemble import ExtraTreesClassifier
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.dummy import DummyClassifier
-from sklearn.externals.joblib import Memory
+from sklearn.utils import Memory
 from sklearn.kernel_approximation import Nystroem
 from sklearn.kernel_approximation import RBFSampler
 from sklearn.metrics import zero_one_loss

diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
@@ -22,7 +22,7 @@
 from sklearn.decomposition.nmf import _initialize_nmf
 from sklearn.decomposition.nmf import _beta_divergence
 from sklearn.decomposition.nmf import INTEGER_TYPES, _check_init
-from sklearn.externals.joblib import Memory
+from sklearn.utils import Memory
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.utils.extmath import safe_sparse_dot, squared_norm
 from sklearn.utils import check_array

diff --git a/benchmarks/bench_rcv1_logreg_convergence.py b/benchmarks/bench_rcv1_logreg_convergence.py
@@ -8,7 +8,7 @@
 import gc
 import time
 
-from sklearn.externals.joblib import Memory
+from sklearn.utils import Memory
 from sklearn.linear_model import (LogisticRegression, SGDClassifier)
 from sklearn.datasets import fetch_rcv1
 from sklearn.linear_model.sag import get_auto_step_size

diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py
@@ -12,7 +12,7 @@
 
 from sklearn.datasets import fetch_rcv1, load_iris, load_digits, \
     fetch_20newsgroups_vectorized
-from sklearn.externals.joblib import delayed, Parallel, Memory
+from sklearn.utils import delayed, Parallel, Memory
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import log_loss
 from sklearn.model_selection import train_test_split

diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py
@@ -15,7 +15,7 @@
 import json
 import argparse
 
-from sklearn.externals.joblib import Memory
+from sklearn.utils import Memory
 from sklearn.datasets import fetch_mldata
 from sklearn.manifold import TSNE
 from sklearn.neighbors import NearestNeighbors

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
@@ -59,6 +59,10 @@ if [[ "$DISTRIB" == "conda" ]]; then
         TO_INSTALL="$TO_INSTALL pillow=$PILLOW_VERSION"
     fi
 
+    if [[ -n "$JOBLIB_VERSION" ]]; then
+        TO_INSTALL="$TO_INSTALL joblib=$JOBLIB_VERSION"
+    fi
+
     conda create -n testenv --yes $TO_INSTALL
     source activate testenv
 

diff --git a/doc/developers/utilities.rst b/doc/developers/utilities.rst
@@ -45,7 +45,7 @@ should be used when applicable.
 
 - :func:`validation.check_memory` checks that input is ``joblib.Memory``-like,
   which means that it can be converted into a
-  ``sklearn.externals.joblib.Memory`` instance (typically a str denoting
+  ``sklearn.utils.Memory`` instance (typically a str denoting
   the ``cachedir``) or has the same interface.
 
 If your code relies on a random number generator, it should never use

diff --git a/doc/glossary.rst b/doc/glossary.rst
@@ -1485,7 +1485,7 @@ functions or non-estimator constructors.
           sometimes parallelism happens in prediction (e.g. in random forests).
         * Some parallelism uses a multi-threading backend by default, some
           a multi-processing backend.  It is possible to override the default
-          backend by using :func:`sklearn.externals.joblib.parallel.parallel_backend`.
+          backend by using :func:`sklearn.utils.parallel_backend`.
         * Whether parallel processing is helpful at improving runtime depends
           on many factors, and it's usually a good idea to experiment rather
           than assuming that increasing the number of jobs is always a good

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -1472,6 +1472,22 @@ Low-level methods
    utils.testing.assert_raise_message
    utils.testing.all_estimators
 
+Utilities from joblib:
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   utils.Memory
+   utils.Parallel
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   utils.cpu_count
+   utils.delayed
+   utils.parallel_backend
 
 Recently deprecated
 ===================

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -80,12 +80,6 @@ Details are listed in the changelog below.
 (While we are trying to better inform users by providing this information, we
 cannot assure that this list is complete.)
 
-**Other backward incompatible change** The vendored version of the joblib
-module is now found at `sklearn.externals._joblib` (:issue:`11166`). The
-main API of joblib is still exposed in `sklearn.externals.joblib`, but
-code doing imports of subpackages of `sklearn.externals.joblib` will
-break.
-
 Changelog
 ---------
 
@@ -207,8 +201,9 @@ Misc
   :issue:`10280` by `Joel Nothman`_ and :user:`Aman Dalmia <dalmia>`.
 
 - An environment variable to use the site joblib instead of the vendored
-  one was added (:ref:`environment_variable`).
-  :issue:`11166` by `Gael Varoquaux`_
+  one was added (:ref:`environment_variable`). The main API of joblib is now
+  exposed in :mod:`sklearn.utils`.
+  :issue:`11166`by `Gael Varoquaux`_
 
 Enhancements
 ............

diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py
@@ -45,7 +45,7 @@
 from scipy import sparse
 
 from sklearn.decomposition import randomized_svd
-from sklearn.externals.joblib import Memory
+from sklearn.utils import Memory
 from sklearn.externals.six.moves.urllib.request import urlopen
 from sklearn.externals.six import iteritems
 

diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
@@ -30,7 +30,7 @@
 from sklearn.cluster import FeatureAgglomeration
 from sklearn.linear_model import BayesianRidge
 from sklearn.pipeline import Pipeline
-from sklearn.externals.joblib import Memory
+from sklearn.utils import Memory
 from sklearn.model_selection import GridSearchCV
 from sklearn.model_selection import KFold
 

diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py
@@ -104,7 +104,7 @@
 
 from tempfile import mkdtemp
 from shutil import rmtree
-from sklearn.externals.joblib import Memory
+from sklearn.utils import Memory
 
 # Create a temporary folder to store the transformers of the pipeline
 cachedir = mkdtemp()

diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
@@ -29,8 +29,8 @@
 from ..utils import gen_batches
 from ..utils.validation import check_is_fitted
 from ..utils.validation import FLOAT_DTYPES
-from ..externals.joblib import Parallel
-from ..externals.joblib import delayed
+from ..utils import Parallel
+from ..utils import delayed
 from ..externals.six import string_types
 from ..exceptions import ConvergenceWarning
 from . import _k_means

diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py
@@ -24,8 +24,8 @@
 from ..base import BaseEstimator, ClusterMixin
 from ..neighbors import NearestNeighbors
 from ..metrics.pairwise import pairwise_distances_argmin
-from ..externals.joblib import Parallel
-from ..externals.joblib import delayed
+from ..utils import Parallel
+from ..utils import delayed
 
 
 def estimate_bandwidth(X, quantile=0.3, n_samples=None, random_state=0,

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
@@ -12,7 +12,7 @@
 from scipy import sparse
 
 from ..base import clone, TransformerMixin
-from ..externals.joblib import Parallel, delayed
+from ..utils import Parallel, delayed
 from ..externals import six
 from ..pipeline import (
     _fit_one_transformer, _fit_transform_one, _transform_one, _name_estimators)

diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
@@ -23,7 +23,7 @@
 from ..linear_model import lars_path
 from ..linear_model import cd_fast
 from ..model_selection import check_cv, cross_val_score
-from ..externals.joblib import Parallel, delayed
+from ..utils import Parallel, delayed
 
 
 # Helper functions to compute the objective and dual objective functions

diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py
@@ -31,7 +31,7 @@
 
 from .base import get_data_home, _fetch_remote, RemoteFileMetadata
 from ..utils import Bunch
-from ..externals.joblib import Memory
+from ..utils import Memory
 from ..externals.six import b
 
 logger = logging.getLogger(__name__)

diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
@@ -132,7 +132,7 @@ def load_svmlight_file(f, n_features=None, dtype=np.float64,
     --------
     To use joblib.Memory to cache the svmlight file::
 
-        from sklearn.externals.joblib import Memory
+        from sklearn.utils import Memory
         from sklearn.datasets import load_svmlight_file
         mem = Memory("./mycache")
 

diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
@@ -15,7 +15,7 @@
 from numpy.lib.stride_tricks import as_strided
 
 from ..base import BaseEstimator, TransformerMixin
-from ..externals.joblib import Parallel, delayed, cpu_count
+from ..utils import Parallel, delayed, cpu_count
 from ..externals.six.moves import zip
 from ..utils import (check_array, check_random_state, gen_even_slices,
                      gen_batches, _get_n_jobs)

diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
@@ -21,7 +21,7 @@
                      gen_batches, gen_even_slices, _get_n_jobs)
 from ..utils.fixes import logsumexp
 from ..utils.validation import check_non_negative
-from ..externals.joblib import Parallel, delayed
+from ..utils import Parallel, delayed
 from ..externals.six.moves import xrange
 from ..exceptions import NotFittedError
 

diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py
@@ -151,7 +151,7 @@ def test_mini_batch_fit_transform():
     U1 = spca_lars.transform(Y)
     # Test multiple CPUs
     if sys.platform == 'win32':  # fake parallelism for win32
-        import sklearn.externals.joblib.parallel as joblib_par
+        import sklearn.utils._joblib.parallel as joblib_par
         _mp = joblib_par.multiprocessing
         joblib_par.multiprocessing = None
         try:

diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
@@ -13,7 +13,7 @@
 
 from .base import BaseEnsemble, _partition_estimators
 from ..base import ClassifierMixin, RegressorMixin
-from ..externals.joblib import Parallel, delayed
+from ..utils import Parallel, delayed
 from ..externals.six import with_metaclass
 from ..externals.six.moves import zip
 from ..metrics import r2_score, accuracy_score

diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
@@ -52,7 +52,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
 
 
 from ..base import ClassifierMixin, RegressorMixin
-from ..externals.joblib import Parallel, delayed
+from ..utils import Parallel, delayed
 from ..externals import six
 from ..metrics import r2_score
 from ..preprocessing import OneHotEncoder

diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py
@@ -10,7 +10,7 @@
 from scipy.stats.mstats import mquantiles
 
 from ..utils.extmath import cartesian
-from ..externals.joblib import Parallel, delayed
+from ..utils import Parallel, delayed
 from ..externals import six
 from ..externals.six.moves import map, range, zip
 from ..utils import check_array

diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
@@ -18,7 +18,7 @@
 from ..base import TransformerMixin
 from ..base import clone
 from ..preprocessing import LabelEncoder
-from ..externals.joblib import Parallel, delayed
+from ..utils import Parallel, delayed
 from ..utils.validation import has_fit_parameter, check_is_fitted
 from ..utils.metaestimators import _BaseComposition
 from ..utils import Bunch

diff --git a/sklearn/externals/copy_joblib.sh b/sklearn/externals/copy_joblib.sh
@@ -12,14 +12,14 @@ else
 fi
 
 pip install $JOBLIB --target $INSTALL_FOLDER
-cp -r $INSTALL_FOLDER/joblib _joblib
+cp -r $INSTALL_FOLDER/joblib joblib
 rm -rf $INSTALL_FOLDER
 
 # Needed to rewrite the doctests
 # Note: BSD sed -i needs an argument unders OSX
 # so first renaming to .bak and then deleting backup files
-find _joblib -name "*.py" | xargs sed -i.bak "s/from joblib/from sklearn.externals.joblib/"
-find _joblib -name "*.bak" | xargs rm
+find joblib -name "*.py" | xargs sed -i.bak "s/from joblib/from sklearn.externals.joblib/"
+find joblib -name "*.bak" | xargs rm
 
 # Remove the tests folders to speed-up test time for scikit-learn.
 # joblib is already tested on its own CI infrastructure upstream.

diff --git a/sklearn/externals/_joblib/__init__.py → sklearn/externals/joblib/__init__.py b/sklearn/externals/_joblib/__init__.py → sklearn/externals/joblib/__init__.py
@@ -58,7 +58,7 @@
    inputs and  outputs: Python functions. Joblib can save their
    computation to disk and rerun it only if necessary::
 
-      >>> from sklearn.externals.joblib import Memory
+      >>> from sklearn.utils import Memory
       >>> mem = Memory(cachedir='/tmp/joblib')
       >>> import numpy as np
       >>> a = np.vander(np.arange(3)).astype(np.float)
@@ -77,7 +77,7 @@
 2) **Embarrassingly parallel helper:** to make it easy to write readable
    parallel code and debug it quickly::
 
-      >>> from sklearn.externals.joblib import Parallel, delayed
+      >>> from sklearn.utils import Parallel, delayed
       >>> from math import sqrt
       >>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
       [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]

diff --git a/sklearn/externals/_joblib/_compat.py → sklearn/externals/joblib/_compat.py b/sklearn/externals/_joblib/_compat.py → sklearn/externals/joblib/_compat.py
diff --git a/sklearn/externals/_joblib/_memory_helpers.py → sklearn/externals/joblib/_memory_helpers.py b/sklearn/externals/_joblib/_memory_helpers.py → sklearn/externals/joblib/_memory_helpers.py
diff --git a/...rnals/_joblib/_multiprocessing_helpers.py → ...ernals/joblib/_multiprocessing_helpers.py b/...rnals/_joblib/_multiprocessing_helpers.py → ...ernals/joblib/_multiprocessing_helpers.py
diff --git a/...n/externals/_joblib/_parallel_backends.py → ...rn/externals/joblib/_parallel_backends.py b/...n/externals/_joblib/_parallel_backends.py → ...rn/externals/joblib/_parallel_backends.py
diff --git a/sklearn/externals/_joblib/backports.py → sklearn/externals/joblib/backports.py b/sklearn/externals/_joblib/backports.py → sklearn/externals/joblib/backports.py
diff --git a/sklearn/externals/_joblib/disk.py → sklearn/externals/joblib/disk.py b/sklearn/externals/_joblib/disk.py → sklearn/externals/joblib/disk.py
diff --git a/sklearn/externals/_joblib/format_stack.py → sklearn/externals/joblib/format_stack.py b/sklearn/externals/_joblib/format_stack.py → sklearn/externals/joblib/format_stack.py
diff --git a/sklearn/externals/_joblib/func_inspect.py → sklearn/externals/joblib/func_inspect.py b/sklearn/externals/_joblib/func_inspect.py → sklearn/externals/joblib/func_inspect.py
diff --git a/sklearn/externals/_joblib/hashing.py → sklearn/externals/joblib/hashing.py b/sklearn/externals/_joblib/hashing.py → sklearn/externals/joblib/hashing.py
diff --git a/sklearn/externals/_joblib/logger.py → sklearn/externals/joblib/logger.py b/sklearn/externals/_joblib/logger.py → sklearn/externals/joblib/logger.py
diff --git a/sklearn/externals/_joblib/memory.py → sklearn/externals/joblib/memory.py b/sklearn/externals/_joblib/memory.py → sklearn/externals/joblib/memory.py
diff --git a/sklearn/externals/_joblib/my_exceptions.py → sklearn/externals/joblib/my_exceptions.py b/sklearn/externals/_joblib/my_exceptions.py → sklearn/externals/joblib/my_exceptions.py
diff --git a/sklearn/externals/_joblib/numpy_pickle.py → sklearn/externals/joblib/numpy_pickle.py b/sklearn/externals/_joblib/numpy_pickle.py → sklearn/externals/joblib/numpy_pickle.py
diff --git a/.../externals/_joblib/numpy_pickle_compat.py → ...n/externals/joblib/numpy_pickle_compat.py b/.../externals/_joblib/numpy_pickle_compat.py → ...n/externals/joblib/numpy_pickle_compat.py
diff --git a/...n/externals/_joblib/numpy_pickle_utils.py → ...rn/externals/joblib/numpy_pickle_utils.py b/...n/externals/_joblib/numpy_pickle_utils.py → ...rn/externals/joblib/numpy_pickle_utils.py
diff --git a/sklearn/externals/_joblib/parallel.py → sklearn/externals/joblib/parallel.py b/sklearn/externals/_joblib/parallel.py → sklearn/externals/joblib/parallel.py
@@ -384,15 +384,15 @@ class Parallel(Logger):
         A simple example:
 
         >>> from math import sqrt
-        >>> from sklearn.externals.joblib import Parallel, delayed
+        >>> from sklearn.utils import Parallel, delayed
         >>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
         [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
 
         Reshaping the output when the function has several return
         values:
 
         >>> from math import modf
-        >>> from sklearn.externals.joblib import Parallel, delayed
+        >>> from sklearn.utils import Parallel, delayed
         >>> r = Parallel(n_jobs=1)(delayed(modf)(i/2.) for i in range(10))
         >>> res, i = zip(*r)
         >>> res
@@ -404,7 +404,7 @@ class Parallel(Logger):
         messages:
 
         >>> from time import sleep
-        >>> from sklearn.externals.joblib import Parallel, delayed
+        >>> from sklearn.utils import Parallel, delayed
         >>> r = Parallel(n_jobs=2, verbose=5)(delayed(sleep)(.1) for _ in range(10)) #doctest: +SKIP
         [Parallel(n_jobs=2)]: Done   1 out of  10 | elapsed:    0.1s remaining:    0.9s
         [Parallel(n_jobs=2)]: Done   3 out of  10 | elapsed:    0.2s remaining:    0.5s
@@ -418,7 +418,7 @@ class Parallel(Logger):
         child process:
 
         >>> from heapq import nlargest
-        >>> from sklearn.externals.joblib import Parallel, delayed
+        >>> from sklearn.utils import Parallel, delayed
         >>> Parallel(n_jobs=2)(delayed(nlargest)(2, n) for n in (range(4), 'abcde', 3)) #doctest: +SKIP
         #...
         ---------------------------------------------------------------------------
@@ -449,7 +449,7 @@ class Parallel(Logger):
         number of iterations cannot be reported in the progress messages:
 
         >>> from math import sqrt
-        >>> from sklearn.externals.joblib import Parallel, delayed
+        >>> from sklearn.utils import Parallel, delayed
         >>> def producer():
         ...     for i in range(6):
         ...         print('Produced %s' % i)

diff --git a/sklearn/externals/_joblib/pool.py → sklearn/externals/joblib/pool.py b/sklearn/externals/_joblib/pool.py → sklearn/externals/joblib/pool.py
diff --git a/sklearn/externals/setup.py b/sklearn/externals/setup.py
@@ -4,6 +4,6 @@
 def configuration(parent_package='', top_path=None):
     from numpy.distutils.misc_util import Configuration
     config = Configuration('externals', parent_package, top_path)
-    config.add_subpackage('_joblib')
+    config.add_subpackage('joblib')
 
     return config
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
@@ -15,7 +15,7 @@
 from ..base import MetaEstimatorMixin
 from ..base import clone
 from ..base import is_classifier
-from ..externals.joblib import Parallel, delayed
+from ..utils import Parallel, delayed
 from ..model_selection import check_cv
 from ..model_selection._validation import _score
 from ..metrics.scorer import check_scoring