Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,15 @@ matrix:
NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2"
PILLOW_VERSION="4.0.0" COVERAGE=true
if: type != cron
# This environment tests the newest supported Anaconda release (5.0.0)
# It also runs tests requiring Pandas and PyAMG
# This environment tests the newest supported Anaconda release.
# It runs tests requiring pandas and PyAMG.
# It also runs with the site joblib instead of the vendored copy of joblib.
- env: DISTRIB="conda" PYTHON_VERSION="3.6.2" INSTALL_MKL="true"
NUMPY_VERSION="1.14.2" SCIPY_VERSION="1.0.0" PANDAS_VERSION="0.20.3"
CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" PILLOW_VERSION="4.3.0"
COVERAGE=true
JOBLIB_VERSION="0.12.0" COVERAGE=true
CHECK_PYTEST_SOFT_DEPENDENCY="true" TEST_DOCSTRINGS="true"
SKLEARN_SITE_JOBLIB=1
if: type != cron
# flake8 linting on diff wrt common ancestor with upstream/master
- env: RUN_FLAKE8="true" SKIP_TESTS="true"
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_covertype.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import zero_one_loss
from sklearn.externals.joblib import Memory
from sklearn.utils import Memory
from sklearn.utils import check_array

# Memoize the data extraction and memory map the resulting
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.externals.joblib import Memory
from sklearn.utils import Memory
from sklearn.kernel_approximation import Nystroem
from sklearn.kernel_approximation import RBFSampler
from sklearn.metrics import zero_one_loss
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_plot_nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from sklearn.decomposition.nmf import _initialize_nmf
from sklearn.decomposition.nmf import _beta_divergence
from sklearn.decomposition.nmf import INTEGER_TYPES, _check_init
from sklearn.externals.joblib import Memory
from sklearn.utils import Memory
from sklearn.exceptions import ConvergenceWarning
from sklearn.utils.extmath import safe_sparse_dot, squared_norm
from sklearn.utils import check_array
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_rcv1_logreg_convergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import gc
import time

from sklearn.externals.joblib import Memory
from sklearn.utils import Memory
from sklearn.linear_model import (LogisticRegression, SGDClassifier)
from sklearn.datasets import fetch_rcv1
from sklearn.linear_model.sag import get_auto_step_size
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_saga.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from sklearn.datasets import fetch_rcv1, load_iris, load_digits, \
fetch_20newsgroups_vectorized
from sklearn.externals.joblib import delayed, Parallel, Memory
from sklearn.utils import delayed, Parallel, Memory
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_tsne_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import json
import argparse

from sklearn.externals.joblib import Memory
from sklearn.utils import Memory
from sklearn.datasets import fetch_mldata
from sklearn.manifold import TSNE
from sklearn.neighbors import NearestNeighbors
Expand Down
4 changes: 4 additions & 0 deletions build_tools/travis/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ if [[ "$DISTRIB" == "conda" ]]; then
TO_INSTALL="$TO_INSTALL pillow=$PILLOW_VERSION"
fi

if [[ -n "$JOBLIB_VERSION" ]]; then
TO_INSTALL="$TO_INSTALL joblib=$JOBLIB_VERSION"
fi

conda create -n testenv --yes $TO_INSTALL
source activate testenv

Expand Down
2 changes: 1 addition & 1 deletion doc/developers/utilities.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ should be used when applicable.

- :func:`validation.check_memory` checks that input is ``joblib.Memory``-like,
which means that it can be converted into a
``sklearn.externals.joblib.Memory`` instance (typically a str denoting
``sklearn.utils.Memory`` instance (typically a str denoting
the ``cachedir``) or has the same interface.

If your code relies on a random number generator, it should never use
Expand Down
2 changes: 1 addition & 1 deletion doc/glossary.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1485,7 +1485,7 @@ functions or non-estimator constructors.
sometimes parallelism happens in prediction (e.g. in random forests).
* Some parallelism uses a multi-threading backend by default, some
a multi-processing backend. It is possible to override the default
backend by using :func:`sklearn.externals.joblib.parallel.parallel_backend`.
backend by using :func:`sklearn.utils.parallel_backend`.
* Whether parallel processing is helpful at improving runtime depends
on many factors, and it's usually a good idea to experiment rather
than assuming that increasing the number of jobs is always a good
Expand Down
16 changes: 16 additions & 0 deletions doc/modules/classes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,22 @@ Low-level methods
utils.testing.assert_raise_message
utils.testing.all_estimators

Utilities from joblib:

.. autosummary::
:toctree: generated/
:template: class.rst

utils.Memory
utils.Parallel

.. autosummary::
:toctree: generated/
:template: function.rst

utils.cpu_count
utils.delayed
utils.parallel_backend

Recently deprecated
===================
Expand Down
11 changes: 3 additions & 8 deletions doc/whats_new/v0.20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,6 @@ Details are listed in the changelog below.
(While we are trying to better inform users by providing this information, we
cannot assure that this list is complete.)

**Other backward incompatible change** The vendored version of the joblib
module is now found at `sklearn.externals._joblib` (:issue:`11166`). The
main API of joblib is still exposed in `sklearn.externals.joblib`, but
code doing imports of subpackages of `sklearn.externals.joblib` will
break.

Changelog
---------

Expand Down Expand Up @@ -207,8 +201,9 @@ Misc
:issue:`10280` by `Joel Nothman`_ and :user:`Aman Dalmia <dalmia>`.

- An environment variable to use the site joblib instead of the vendored
one was added (:ref:`environment_variable`).
:issue:`11166` by `Gael Varoquaux`_
one was added (:ref:`environment_variable`). The main API of joblib is now
exposed in :mod:`sklearn.utils`.
:issue:`11166`by `Gael Varoquaux`_

Enhancements
............
Expand Down
2 changes: 1 addition & 1 deletion examples/applications/wikipedia_principal_eigenvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from scipy import sparse

from sklearn.decomposition import randomized_svd
from sklearn.externals.joblib import Memory
from sklearn.utils import Memory
from sklearn.externals.six.moves.urllib.request import urlopen
from sklearn.externals.six import iteritems

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from sklearn.cluster import FeatureAgglomeration
from sklearn.linear_model import BayesianRidge
from sklearn.pipeline import Pipeline
from sklearn.externals.joblib import Memory
from sklearn.utils import Memory
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

Expand Down
2 changes: 1 addition & 1 deletion examples/compose/plot_compare_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@

from tempfile import mkdtemp
from shutil import rmtree
from sklearn.externals.joblib import Memory
from sklearn.utils import Memory

# Create a temporary folder to store the transformers of the pipeline
cachedir = mkdtemp()
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
from ..utils import gen_batches
from ..utils.validation import check_is_fitted
from ..utils.validation import FLOAT_DTYPES
from ..externals.joblib import Parallel
from ..externals.joblib import delayed
from ..utils import Parallel
from ..utils import delayed
from ..externals.six import string_types
from ..exceptions import ConvergenceWarning
from . import _k_means
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/mean_shift_.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from ..base import BaseEstimator, ClusterMixin
from ..neighbors import NearestNeighbors
from ..metrics.pairwise import pairwise_distances_argmin
from ..externals.joblib import Parallel
from ..externals.joblib import delayed
from ..utils import Parallel
from ..utils import delayed


def estimate_bandwidth(X, quantile=0.3, n_samples=None, random_state=0,
Expand Down
2 changes: 1 addition & 1 deletion sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from scipy import sparse

from ..base import clone, TransformerMixin
from ..externals.joblib import Parallel, delayed
from ..utils import Parallel, delayed
from ..externals import six
from ..pipeline import (
_fit_one_transformer, _fit_transform_one, _transform_one, _name_estimators)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/covariance/graph_lasso_.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ..linear_model import lars_path
from ..linear_model import cd_fast
from ..model_selection import check_cv, cross_val_score
from ..externals.joblib import Parallel, delayed
from ..utils import Parallel, delayed


# Helper functions to compute the objective and dual objective functions
Expand Down
2 changes: 1 addition & 1 deletion sklearn/datasets/lfw.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from .base import get_data_home, _fetch_remote, RemoteFileMetadata
from ..utils import Bunch
from ..externals.joblib import Memory
from ..utils import Memory
from ..externals.six import b

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/datasets/svmlight_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def load_svmlight_file(f, n_features=None, dtype=np.float64,
--------
To use joblib.Memory to cache the svmlight file::

from sklearn.externals.joblib import Memory
from sklearn.utils import Memory
from sklearn.datasets import load_svmlight_file
mem = Memory("./mycache")

Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from numpy.lib.stride_tricks import as_strided

from ..base import BaseEstimator, TransformerMixin
from ..externals.joblib import Parallel, delayed, cpu_count
from ..utils import Parallel, delayed, cpu_count
from ..externals.six.moves import zip
from ..utils import (check_array, check_random_state, gen_even_slices,
gen_batches, _get_n_jobs)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/online_lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
gen_batches, gen_even_slices, _get_n_jobs)
from ..utils.fixes import logsumexp
from ..utils.validation import check_non_negative
from ..externals.joblib import Parallel, delayed
from ..utils import Parallel, delayed
from ..externals.six.moves import xrange
from ..exceptions import NotFittedError

Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/tests/test_sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def test_mini_batch_fit_transform():
U1 = spca_lars.transform(Y)
# Test multiple CPUs
if sys.platform == 'win32': # fake parallelism for win32
import sklearn.externals.joblib.parallel as joblib_par
import sklearn.utils._joblib.parallel as joblib_par
_mp = joblib_par.multiprocessing
joblib_par.multiprocessing = None
try:
Expand Down
2 changes: 1 addition & 1 deletion sklearn/ensemble/bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from .base import BaseEnsemble, _partition_estimators
from ..base import ClassifierMixin, RegressorMixin
from ..externals.joblib import Parallel, delayed
from ..utils import Parallel, delayed
from ..externals.six import with_metaclass
from ..externals.six.moves import zip
from ..metrics import r2_score, accuracy_score
Expand Down
2 changes: 1 addition & 1 deletion sklearn/ensemble/forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class calls the ``fit`` method of each sub-estimator on random samples


from ..base import ClassifierMixin, RegressorMixin
from ..externals.joblib import Parallel, delayed
from ..utils import Parallel, delayed
from ..externals import six
from ..metrics import r2_score
from ..preprocessing import OneHotEncoder
Expand Down
2 changes: 1 addition & 1 deletion sklearn/ensemble/partial_dependence.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scipy.stats.mstats import mquantiles

from ..utils.extmath import cartesian
from ..externals.joblib import Parallel, delayed
from ..utils import Parallel, delayed
from ..externals import six
from ..externals.six.moves import map, range, zip
from ..utils import check_array
Expand Down
2 changes: 1 addition & 1 deletion sklearn/ensemble/voting_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from ..base import TransformerMixin
from ..base import clone
from ..preprocessing import LabelEncoder
from ..externals.joblib import Parallel, delayed
from ..utils import Parallel, delayed
from ..utils.validation import has_fit_parameter, check_is_fitted
from ..utils.metaestimators import _BaseComposition
from ..utils import Bunch
Expand Down
6 changes: 3 additions & 3 deletions sklearn/externals/copy_joblib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ else
fi

pip install $JOBLIB --target $INSTALL_FOLDER
cp -r $INSTALL_FOLDER/joblib _joblib
cp -r $INSTALL_FOLDER/joblib joblib
rm -rf $INSTALL_FOLDER

# Needed to rewrite the doctests
# Note: BSD sed -i needs an argument unders OSX
# so first renaming to .bak and then deleting backup files
find _joblib -name "*.py" | xargs sed -i.bak "s/from joblib/from sklearn.externals.joblib/"
find _joblib -name "*.bak" | xargs rm
find joblib -name "*.py" | xargs sed -i.bak "s/from joblib/from sklearn.externals.joblib/"
find joblib -name "*.bak" | xargs rm

# Remove the tests folders to speed-up test time for scikit-learn.
# joblib is already tested on its own CI infrastructure upstream.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
inputs and outputs: Python functions. Joblib can save their
computation to disk and rerun it only if necessary::

>>> from sklearn.externals.joblib import Memory
>>> from sklearn.utils import Memory
>>> mem = Memory(cachedir='/tmp/joblib')
>>> import numpy as np
>>> a = np.vander(np.arange(3)).astype(np.float)
Expand All @@ -77,7 +77,7 @@
2) **Embarrassingly parallel helper:** to make it easy to write readable
parallel code and debug it quickly::

>>> from sklearn.externals.joblib import Parallel, delayed
>>> from sklearn.utils import Parallel, delayed
>>> from math import sqrt
>>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -384,15 +384,15 @@ class Parallel(Logger):
A simple example:

>>> from math import sqrt
>>> from sklearn.externals.joblib import Parallel, delayed
>>> from sklearn.utils import Parallel, delayed
>>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]

Reshaping the output when the function has several return
values:

>>> from math import modf
>>> from sklearn.externals.joblib import Parallel, delayed
>>> from sklearn.utils import Parallel, delayed
>>> r = Parallel(n_jobs=1)(delayed(modf)(i/2.) for i in range(10))
>>> res, i = zip(*r)
>>> res
Expand All @@ -404,7 +404,7 @@ class Parallel(Logger):
messages:

>>> from time import sleep
>>> from sklearn.externals.joblib import Parallel, delayed
>>> from sklearn.utils import Parallel, delayed
>>> r = Parallel(n_jobs=2, verbose=5)(delayed(sleep)(.1) for _ in range(10)) #doctest: +SKIP
[Parallel(n_jobs=2)]: Done 1 out of 10 | elapsed: 0.1s remaining: 0.9s
[Parallel(n_jobs=2)]: Done 3 out of 10 | elapsed: 0.2s remaining: 0.5s
Expand All @@ -418,7 +418,7 @@ class Parallel(Logger):
child process:

>>> from heapq import nlargest
>>> from sklearn.externals.joblib import Parallel, delayed
>>> from sklearn.utils import Parallel, delayed
>>> Parallel(n_jobs=2)(delayed(nlargest)(2, n) for n in (range(4), 'abcde', 3)) #doctest: +SKIP
#...
---------------------------------------------------------------------------
Expand Down Expand Up @@ -449,7 +449,7 @@ class Parallel(Logger):
number of iterations cannot be reported in the progress messages:

>>> from math import sqrt
>>> from sklearn.externals.joblib import Parallel, delayed
>>> from sklearn.utils import Parallel, delayed
>>> def producer():
... for i in range(6):
... print('Produced %s' % i)
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion sklearn/externals/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
def configuration(parent_package='', top_path=None):
from numpy.distutils.misc_util import Configuration
config = Configuration('externals', parent_package, top_path)
config.add_subpackage('_joblib')
config.add_subpackage('joblib')

return config
2 changes: 1 addition & 1 deletion sklearn/feature_selection/rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ..base import MetaEstimatorMixin
from ..base import clone
from ..base import is_classifier
from ..externals.joblib import Parallel, delayed
from ..utils import Parallel, delayed
from ..model_selection import check_cv
from ..model_selection._validation import _score
from ..metrics.scorer import check_scoring
Expand Down
Loading