Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import numpy as np
import scipy.sparse as sp
from joblib import Parallel, delayed, effective_n_jobs

from ..base import BaseEstimator, ClusterMixin, TransformerMixin
from ..metrics.pairwise import euclidean_distances
Expand All @@ -28,9 +29,6 @@
from ..utils import check_random_state
from ..utils.validation import check_is_fitted
from ..utils.validation import FLOAT_DTYPES
from ..utils._joblib import Parallel
from ..utils._joblib import delayed
from ..utils._joblib import effective_n_jobs
from ..exceptions import ConvergenceWarning
from . import _k_means
from ._k_means_elkan import k_means_elkan
Expand Down
3 changes: 1 addition & 2 deletions sklearn/cluster/mean_shift_.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,14 @@

import numpy as np
import warnings
from joblib import Parallel, delayed

from collections import defaultdict
from ..utils.validation import check_is_fitted
from ..utils import check_random_state, gen_batches, check_array
from ..base import BaseEstimator, ClusterMixin
from ..neighbors import NearestNeighbors
from ..metrics.pairwise import pairwise_distances_argmin
from ..utils._joblib import Parallel
from ..utils._joblib import delayed


def estimate_bandwidth(X, quantile=0.3, n_samples=None, random_state=0,
Expand Down
2 changes: 1 addition & 1 deletion sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

import numpy as np
from scipy import sparse
from joblib import Parallel, delayed

from ..base import clone, TransformerMixin
from ..utils._joblib import Parallel, delayed
from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
from ..preprocessing import FunctionTransformer
from ..utils import Bunch
Expand Down
2 changes: 1 addition & 1 deletion sklearn/covariance/graph_lasso_.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import numpy as np
from scipy import linalg
from joblib import Parallel, delayed

from .empirical_covariance_ import (empirical_covariance, EmpiricalCovariance,
log_likelihood)
Expand All @@ -22,7 +23,6 @@
from ..linear_model import cd_fast
from ..linear_model import lars_path_gram
from ..model_selection import check_cv, cross_val_score
from ..utils._joblib import Parallel, delayed


# Helper functions to compute the objective and dual objective functions
Expand Down
7 changes: 4 additions & 3 deletions sklearn/datasets/california_housing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@
import numpy as np
import logging

import joblib

from .base import get_data_home
from .base import _fetch_remote
from .base import _pkl_filepath
from .base import RemoteFileMetadata
from ..utils import Bunch
from ..utils import _joblib

# The original data can be found at:
# https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz
Expand Down Expand Up @@ -124,11 +125,11 @@ def fetch_california_housing(data_home=None, download_if_missing=True,
columns_index = [8, 7, 2, 3, 4, 5, 6, 1, 0]
cal_housing = cal_housing[:, columns_index]

_joblib.dump(cal_housing, filepath, compress=6)
joblib.dump(cal_housing, filepath, compress=6)
remove(archive_path)

else:
cal_housing = _joblib.load(filepath)
cal_housing = joblib.load(filepath)

feature_names = ["MedInc", "HouseAge", "AveRooms", "AveBedrms",
"Population", "AveOccup", "Latitude", "Longitude"]
Expand Down
10 changes: 5 additions & 5 deletions sklearn/datasets/covtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
from os import remove, makedirs

import numpy as np
import joblib

from .base import get_data_home
from .base import _fetch_remote
from .base import RemoteFileMetadata
from ..utils import Bunch
from .base import _pkl_filepath
from ..utils import _joblib
from ..utils import check_random_state

# The original data can be found in:
Expand Down Expand Up @@ -117,16 +117,16 @@ def fetch_covtype(data_home=None, download_if_missing=True,
X = Xy[:, :-1]
y = Xy[:, -1].astype(np.int32, copy=False)

_joblib.dump(X, samples_path, compress=9)
_joblib.dump(y, targets_path, compress=9)
joblib.dump(X, samples_path, compress=9)
joblib.dump(y, targets_path, compress=9)

elif not available and not download_if_missing:
raise IOError("Data not found and `download_if_missing` is False")
try:
X, y
except NameError:
X = _joblib.load(samples_path)
y = _joblib.load(targets_path)
X = joblib.load(samples_path)
y = joblib.load(targets_path)

if shuffle:
ind = np.arange(X.shape[0])
Expand Down
11 changes: 5 additions & 6 deletions sklearn/datasets/kddcup99.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@
from os.path import dirname, exists, join

import numpy as np

import joblib

from .base import _fetch_remote
from .base import get_data_home
from .base import RemoteFileMetadata
from ..utils import Bunch
from ..utils import _joblib
from ..utils import check_random_state
from ..utils import shuffle as shuffle_method

Expand Down Expand Up @@ -284,17 +283,17 @@ def _fetch_brute_kddcup99(data_home=None,
# (error: 'Incorrect data length while decompressing[...] the file
# could be corrupted.')

_joblib.dump(X, samples_path, compress=0)
_joblib.dump(y, targets_path, compress=0)
joblib.dump(X, samples_path, compress=0)
joblib.dump(y, targets_path, compress=0)
elif not available:
if not download_if_missing:
raise IOError("Data not found and `download_if_missing` is False")

try:
X, y
except NameError:
X = _joblib.load(samples_path)
y = _joblib.load(targets_path)
X = joblib.load(samples_path)
y = joblib.load(targets_path)

return Bunch(data=X, target=y)

Expand Down
8 changes: 4 additions & 4 deletions sklearn/datasets/lfw.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
from distutils.version import LooseVersion

import numpy as np
import joblib
from joblib import Memory

from .base import get_data_home, _fetch_remote, RemoteFileMetadata
from ..utils import Bunch
from ..utils._joblib import Memory
from ..utils import _joblib

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -303,7 +303,7 @@ def fetch_lfw_people(data_home=None, funneled=True, resize=0.5,

# wrap the loader in a memoizing function that will return memmaped data
# arrays for optimal memory usage
if LooseVersion(_joblib.__version__) < LooseVersion('0.12'):
if LooseVersion(joblib.__version__) < LooseVersion('0.12'):
# Deal with change of API in joblib
m = Memory(cachedir=lfw_home, compress=6, verbose=0)
else:
Expand Down Expand Up @@ -474,7 +474,7 @@ def fetch_lfw_pairs(subset='train', data_home=None, funneled=True, resize=0.5,

# wrap the loader in a memoizing function that will return memmaped data
# arrays for optimal memory usage
if LooseVersion(_joblib.__version__) < LooseVersion('0.12'):
if LooseVersion(joblib.__version__) < LooseVersion('0.12'):
# Deal with change of API in joblib
m = Memory(cachedir=lfw_home, compress=6, verbose=0)
else:
Expand Down
6 changes: 3 additions & 3 deletions sklearn/datasets/olivetti_faces.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@

import numpy as np
from scipy.io.matlab import loadmat
import joblib

from .base import get_data_home
from .base import _fetch_remote
from .base import RemoteFileMetadata
from .base import _pkl_filepath
from ..utils import _joblib
from ..utils import check_random_state, Bunch

# The original data can be found at:
Expand Down Expand Up @@ -104,10 +104,10 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
remove(mat_path)

faces = mfile['faces'].T.copy()
_joblib.dump(faces, filepath, compress=6)
joblib.dump(faces, filepath, compress=6)
del mfile
else:
faces = _joblib.load(filepath)
faces = joblib.load(filepath)

# We want floating point data, but float32 is enough (there is only
# one byte of precision in the original uint8s anyway)
Expand Down
18 changes: 9 additions & 9 deletions sklearn/datasets/rcv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

import numpy as np
import scipy.sparse as sp
import joblib

from .base import get_data_home
from .base import _pkl_filepath
from .base import _fetch_remote
from .base import RemoteFileMetadata
from ..utils import _joblib
from .svmlight_format import load_svmlight_files
from ..utils import shuffle as shuffle_
from ..utils import Bunch
Expand Down Expand Up @@ -181,16 +181,16 @@ def fetch_rcv1(data_home=None, subset='all', download_if_missing=True,
sample_id = np.hstack((Xy[9], Xy[1], Xy[3], Xy[5], Xy[7]))
sample_id = sample_id.astype(np.uint32, copy=False)

_joblib.dump(X, samples_path, compress=9)
_joblib.dump(sample_id, sample_id_path, compress=9)
joblib.dump(X, samples_path, compress=9)
joblib.dump(sample_id, sample_id_path, compress=9)

# delete archives
for f in files:
f.close()
remove(f.name)
else:
X = _joblib.load(samples_path)
sample_id = _joblib.load(sample_id_path)
X = joblib.load(samples_path)
sample_id = joblib.load(sample_id_path)

# load target (y), categories, and sample_id_bis
if download_if_missing and (not exists(sample_topics_path) or
Expand Down Expand Up @@ -240,11 +240,11 @@ def fetch_rcv1(data_home=None, subset='all', download_if_missing=True,
categories = categories[order]
y = sp.csr_matrix(y[:, order])

_joblib.dump(y, sample_topics_path, compress=9)
_joblib.dump(categories, topics_path, compress=9)
joblib.dump(y, sample_topics_path, compress=9)
joblib.dump(categories, topics_path, compress=9)
else:
y = _joblib.load(sample_topics_path)
categories = _joblib.load(topics_path)
y = joblib.load(sample_topics_path)
categories = joblib.load(topics_path)

if subset == 'all':
pass
Expand Down
8 changes: 4 additions & 4 deletions sklearn/datasets/species_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,16 @@
from os import makedirs, remove
from os.path import exists


import logging
import numpy as np

import joblib

from .base import get_data_home
from .base import _fetch_remote
from .base import RemoteFileMetadata
from ..utils import Bunch
from .base import _pkl_filepath
from ..utils import _joblib

# The original data can be found at:
# https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip
Expand Down Expand Up @@ -257,8 +257,8 @@ def fetch_species_distributions(data_home=None,
test=test,
train=train,
**extra_params)
_joblib.dump(bunch, archive_path, compress=9)
joblib.dump(bunch, archive_path, compress=9)
else:
bunch = _joblib.load(archive_path)
bunch = joblib.load(archive_path)

return bunch
6 changes: 3 additions & 3 deletions sklearn/datasets/twenty_newsgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import numpy as np
import scipy.sparse as sp
import joblib

from .base import get_data_home
from .base import load_files
Expand All @@ -43,7 +44,6 @@
from .base import RemoteFileMetadata
from ..feature_extraction.text import CountVectorizer
from ..preprocessing import normalize
from ..utils import _joblib
from ..utils import check_random_state, Bunch

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -398,12 +398,12 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None,
download_if_missing=download_if_missing)

if os.path.exists(target_file):
X_train, X_test = _joblib.load(target_file)
X_train, X_test = joblib.load(target_file)
else:
vectorizer = CountVectorizer(dtype=np.int16)
X_train = vectorizer.fit_transform(data_train.data).tocsr()
X_test = vectorizer.transform(data_test.data).tocsr()
_joblib.dump((X_train, X_test), target_file, compress=9)
joblib.dump((X_train, X_test), target_file, compress=9)

# the data is stored as int16 for compactness
# but normalize needs floats
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

import numpy as np
from scipy import linalg
from joblib import Parallel, delayed, effective_n_jobs

from ..base import BaseEstimator, TransformerMixin
from ..utils._joblib import Parallel, delayed, effective_n_jobs
from ..utils import (check_array, check_random_state, gen_even_slices,
gen_batches)
from ..utils.extmath import randomized_svd, row_norms
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/online_lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
import numpy as np
import scipy.sparse as sp
from scipy.special import gammaln
from joblib import Parallel, delayed, effective_n_jobs

from ..base import BaseEstimator, TransformerMixin
from ..utils import (check_random_state, check_array,
gen_batches, gen_even_slices)
from ..utils.fixes import logsumexp
from ..utils.validation import check_non_negative
from ..utils.validation import check_is_fitted
from ..utils._joblib import Parallel, delayed, effective_n_jobs

from ._online_lda import (mean_change, _dirichlet_expectation_1d,
_dirichlet_expectation_2d)
Expand Down
8 changes: 4 additions & 4 deletions sklearn/decomposition/tests/test_sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,15 @@ def test_mini_batch_fit_transform():
U1 = spca_lars.transform(Y)
# Test multiple CPUs
if sys.platform == 'win32': # fake parallelism for win32
import sklearn.utils._joblib.parallel as joblib_par
_mp = joblib_par.multiprocessing
joblib_par.multiprocessing = None
import joblib
_mp = joblib.parallel.multiprocessing
joblib.parallel.multiprocessing = None
try:
spca = MiniBatchSparsePCA(n_components=3, n_jobs=2, alpha=alpha,
random_state=0)
U2 = spca.fit(Y).transform(Y)
finally:
joblib_par.multiprocessing = _mp
joblib.parallel.multiprocessing = _mp
else: # we can efficiently use parallelism
spca = MiniBatchSparsePCA(n_components=3, n_jobs=2, alpha=alpha,
random_state=0)
Expand Down
3 changes: 2 additions & 1 deletion sklearn/ensemble/bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
from abc import ABCMeta, abstractmethod
from warnings import warn

from joblib import Parallel, delayed

from .base import BaseEnsemble, _partition_estimators
from ..base import ClassifierMixin, RegressorMixin
from ..utils._joblib import Parallel, delayed
from ..metrics import r2_score, accuracy_score
from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
from ..utils import check_random_state, check_X_y, check_array, column_or_1d
Expand Down
Loading