From af354b47e9b8937becbe37b04b643d1abd4a7a8b Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 20 Nov 2018 17:44:06 -0500 Subject: [PATCH 01/29] Start removing python 2.7 --- .travis.yml | 18 -------------- README.rst | 2 +- appveyor.yml | 5 ---- conftest.py | 4 --- doc/developers/contributing.rst | 14 +---------- doc/install.rst | 2 +- setup.py | 3 --- sklearn/datasets/species_distributions.py | 11 ++------- sklearn/feature_extraction/_hashing.pyx | 10 +++----- sklearn/model_selection/_split.py | 5 ---- sklearn/model_selection/tests/test_search.py | 7 ++---- sklearn/utils/bench.py | 26 -------------------- sklearn/utils/fixes.py | 2 -- 13 files changed, 10 insertions(+), 99 deletions(-) delete mode 100644 sklearn/utils/bench.py diff --git a/.travis.yml b/.travis.yml index e976cacd06c96..f1f161ca9d4e8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,24 +20,6 @@ env: matrix: include: - # This environment tests that scikit-learn can be built against - # versions of numpy, scipy with ATLAS that comes with Ubuntu Trusty 14.04 - # i.e. numpy 1.8.2 and scipy 0.13.3 - - env: DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.5" - COVERAGE=true - if: type != cron - addons: - apt: - packages: - # these only required by the DISTRIB="ubuntu" builds: - - python-scipy - - libatlas3-base - - libatlas-dev - # Python 3.4 build - - env: DISTRIB="conda" PYTHON_VERSION="3.4" INSTALL_MKL="false" - NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2" - PILLOW_VERSION="4.0.0" COVERAGE=true - if: type != cron # Python 3.5 build - env: DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="false" NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2" diff --git a/README.rst b/README.rst index 70537af856f61..b2370fbde06a1 100644 --- a/README.rst +++ b/README.rst @@ -50,7 +50,7 @@ Dependencies scikit-learn requires: -- Python (>= 2.7 or >= 3.4) +- Python (>= 3.5) - NumPy (>= 1.8.2) - SciPy (>= 0.13.3) diff --git a/appveyor.yml b/appveyor.yml index e26a02c90cd39..531aaca31aec5 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,11 +22,6 @@ environment: PYTHON_ARCH: "64" CHECK_WARNINGS: "true" - - PYTHON: "C:\\Python27" - PYTHON_VERSION: "2.7.8" - PYTHON_ARCH: "32" - - # Because we only have a single worker, we don't want to waste precious # appveyor CI time and make other PRs wait for repeated failures in a failing # PR. The following option cancels pending jobs in a given PR after the first diff --git a/conftest.py b/conftest.py index 50a3d3470a47a..495d47a6afc9d 100644 --- a/conftest.py +++ b/conftest.py @@ -11,8 +11,6 @@ import pytest from _pytest.doctest import DoctestItem -from sklearn.utils.fixes import PY3_OR_LATER - PYTEST_MIN_VERSION = '3.3.0' if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION: @@ -50,8 +48,6 @@ def pytest_collection_modifyitems(config, items): # run doctests only for numpy >= 1.14. We want to skip the doctest for # python 2 due to unicode. skip_doctests = False - if not PY3_OR_LATER: - skip_doctests = True try: import numpy as np if LooseVersion(np.__version__) < LooseVersion('1.14'): diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst index 0bb8b1fc39f59..53fa23bafac7a 100644 --- a/doc/developers/contributing.rst +++ b/doc/developers/contributing.rst @@ -891,19 +891,7 @@ in the examples. Python versions supported ------------------------- -All scikit-learn code should work unchanged in both Python 2.7 and 3.4 or -newer. Since Python 3.x is not backwards compatible, that may require changes -to code and it certainly requires testing on both 2.7 and 3.4 or newer. - -For most numerical algorithms, Python 3.x support is easy: -just remember that ``print`` is a function and -integer division is written ``//``. -String handling has been overhauled, though, as have parts of -the Python standard library. -The `six `_ package helps with -cross-compatibility and is included in scikit-learn as -``sklearn.externals.six``. - +Since scikit-learn 0.21, only Python 3.5 and newer is supported. .. _code_review: diff --git a/doc/install.rst b/doc/install.rst index bb6b67af3e3cb..7ac8eb5f077ec 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -17,7 +17,7 @@ Installing the latest release Scikit-learn requires: -- Python (>= 2.7 or >= 3.4), +- Python (>= 3.5), - NumPy (>= 1.8.2), - SciPy (>= 0.13.3). diff --git a/setup.py b/setup.py index e25c50a114a33..1e421acfabea6 100755 --- a/setup.py +++ b/setup.py @@ -183,10 +183,7 @@ def setup_package(): 'Operating System :: POSIX', 'Operating System :: Unix', 'Operating System :: MacOS', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', diff --git a/sklearn/datasets/species_distributions.py b/sklearn/datasets/species_distributions.py index da158eb24aa33..062dceeabbbf2 100644 --- a/sklearn/datasets/species_distributions.py +++ b/sklearn/datasets/species_distributions.py @@ -41,7 +41,6 @@ from os import makedirs, remove from os.path import exists -import sys import logging import numpy as np @@ -53,8 +52,6 @@ from sklearn.datasets.base import _pkl_filepath from sklearn.utils import _joblib -PY3_OR_LATER = sys.version_info[0] >= 3 - # The original data can be found at: # https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip SAMPLES = RemoteFileMetadata( @@ -106,12 +103,8 @@ def _load_csv(F): rec : np.ndarray record array representing the data """ - if PY3_OR_LATER: - # Numpy recarray wants Python 3 str but not bytes... - names = F.readline().decode('ascii').strip().split(',') - else: - # Numpy recarray wants Python 2 str but not unicode - names = F.readline().strip().split(',') + # Numpy recarray wants Python 3 str but not bytes... + names = F.readline().decode('ascii').strip().split(',') rec = np.loadtxt(F, skiprows=0, delimiter=',', dtype='a22,f4,f4') rec.dtype.names = names diff --git a/sklearn/feature_extraction/_hashing.pyx b/sklearn/feature_extraction/_hashing.pyx index c462dd8a24719..24b38a081faf5 100644 --- a/sklearn/feature_extraction/_hashing.pyx +++ b/sklearn/feature_extraction/_hashing.pyx @@ -35,13 +35,9 @@ def transform(raw_X, Py_ssize_t n_features, dtype, bint alternate_sign=1): cdef array.array indices cdef array.array indptr indices = array.array("i") - if sys.version_info >= (3, 3): - indices_array_dtype = "q" - indices_np_dtype = np.longlong - else: - # On Windows with PY2.7 long int would still correspond to 32 bit. - indices_array_dtype = "l" - indices_np_dtype = np.int_ + indices_array_dtype = "q" + indices_np_dtype = np.longlong + indptr = array.array(indices_array_dtype, [0]) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 2eccb50fcc976..356c018c58c7d 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -66,11 +66,6 @@ class BaseCrossValidator(with_metaclass(ABCMeta)): Implementations must define `_iter_test_masks` or `_iter_test_indices`. """ - def __init__(self): - # We need this for the build_repr to work properly in py2.7 - # see #6304 - pass - def split(self, X, y=None, groups=None): """Generate indices to split data into training and test set. diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index dfdcb504912f1..95f8ac1bd9929 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -13,7 +13,6 @@ import pytest from sklearn.utils.fixes import sp_version -from sklearn.utils.fixes import PY3_OR_LATER from sklearn.utils.fixes import _Iterable as Iterable, _Sized as Sized from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_not_equal @@ -429,10 +428,8 @@ def test_grid_search_when_param_grid_includes_range(): # Test that the best estimator contains the right value for foo_param clf = MockClassifier() grid_search = None - if PY3_OR_LATER: - grid_search = GridSearchCV(clf, {'foo_param': range(1, 4)}) - else: - grid_search = GridSearchCV(clf, {'foo_param': xrange(1, 4)}) + grid_search = GridSearchCV(clf, {'foo_param': range(1, 4)}) + grid_search.fit(X, y) assert_equal(grid_search.best_estimator_.foo_param, 2) diff --git a/sklearn/utils/bench.py b/sklearn/utils/bench.py deleted file mode 100644 index 3ea26ec6b395f..0000000000000 --- a/sklearn/utils/bench.py +++ /dev/null @@ -1,26 +0,0 @@ -""" -Helper functions for benchmarking -""" - - -def total_seconds(delta): - """ - helper function to emulate function total_seconds, - introduced in python2.7 - - https://docs.python.org/library/datetime.html\ -#datetime.timedelta.total_seconds - - Parameters - ---------- - delta : datetime object - - Returns - ------- - int - The number of seconds contained in delta - """ - - mu_sec = 1e-6 # number of seconds in one microseconds - - return delta.seconds + delta.microseconds * mu_sec diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index c92a91ad0a0d1..d966b1c7a4875 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -12,7 +12,6 @@ import os import errno -import sys from distutils.version import LooseVersion @@ -42,7 +41,6 @@ def _parse_version(version_string): np_version = _parse_version(np.__version__) sp_version = _parse_version(scipy.__version__) -PY3_OR_LATER = sys.version_info[0] >= 3 # Remove when minimum required NumPy >= 1.10 From b25487637b5c034f65ac2b9412a16db2cb612104 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 20 Nov 2018 17:55:52 -0500 Subject: [PATCH 02/29] remove xrange --- benchmarks/bench_random_projections.py | 3 +- .../bench_sample_without_replacement.py | 13 ++++--- doc/conf.py | 9 +++-- doc/developers/performance.rst | 4 +-- .../plot_out_of_core_classification.py | 8 ++--- examples/applications/svm_gui.py | 3 +- sklearn/cluster/birch.py | 2 +- sklearn/cluster/hierarchical.py | 8 ++--- sklearn/covariance/shrunk_covariance_.py | 6 ++-- sklearn/datasets/_svmlight_format.pyx | 2 +- sklearn/decomposition/factor_analysis.py | 2 +- sklearn/decomposition/fastica_.py | 4 +-- sklearn/decomposition/online_lda.py | 8 ++--- sklearn/decomposition/tests/test_fastica.py | 2 +- .../decomposition/tests/test_online_lda.py | 2 +- sklearn/discriminant_analysis.py | 2 +- sklearn/externals/_arff.py | 4 +-- sklearn/feature_extraction/dict_vectorizer.py | 2 +- sklearn/feature_extraction/tests/test_text.py | 6 +--- sklearn/feature_extraction/text.py | 12 +++---- sklearn/linear_model/coordinate_descent.py | 2 +- sklearn/linear_model/least_angle.py | 4 +-- sklearn/model_selection/tests/test_search.py | 2 +- sklearn/neural_network/rbm.py | 2 +- sklearn/random_projection.py | 2 +- sklearn/svm/tests/test_svm.py | 12 ------- sklearn/utils/_random.pyx | 4 +-- sklearn/utils/extmath.py | 4 +-- sklearn/utils/sparsefuncs_fast.pyx | 36 +++++++++---------- sklearn/utils/tests/test_fast_dict.py | 2 +- sklearn/utils/tests/test_multiclass.py | 4 +-- 31 files changed, 78 insertions(+), 98 deletions(-) diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py index 4379e50e98ab5..dc313a3e9f762 100644 --- a/benchmarks/bench_random_projections.py +++ b/benchmarks/bench_random_projections.py @@ -19,7 +19,6 @@ import scipy.sparse as sp from sklearn import clone -from sklearn.externals.six.moves import xrange from sklearn.random_projection import (SparseRandomProjection, GaussianRandomProjection, johnson_lindenstrauss_min_dim) @@ -212,7 +211,7 @@ def print_row(clf_type, time_fit, time_transform): for name in selected_transformers: print("Perform benchmarks for %s..." % name) - for iteration in xrange(opts.n_times): + for iteration in range(opts.n_times): print("\titer %s..." % iteration, end="") time_to_fit, time_to_transform = bench_scikit_transformer(X_dense, transformers[name]) diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py index 90c382e906bcc..4fb23efb7f24f 100644 --- a/benchmarks/bench_sample_without_replacement.py +++ b/benchmarks/bench_sample_without_replacement.py @@ -15,7 +15,6 @@ import numpy as np import random -from sklearn.externals.six.moves import xrange from sklearn.utils.random import sample_without_replacement @@ -90,9 +89,9 @@ def bench_sample(sampling, n_population, n_samples): # Set Python core input sampling_algorithm["python-core-sample"] = \ lambda n_population, n_sample: \ - random.sample(xrange(n_population), n_sample) + random.sample(range(n_population), n_sample) - ########################################################################### + ########################################################################### # Set custom automatic method selection sampling_algorithm["custom-auto"] = \ lambda n_population, n_samples, random_state=None: \ @@ -156,11 +155,11 @@ def bench_sample(sampling, n_population, n_samples): print("Perform benchmarks for %s..." % name, end="") time[name] = np.zeros(shape=(opts.n_steps, opts.n_times)) - for step in xrange(opts.n_steps): - for it in xrange(opts.n_times): + for step in range(opts.n_steps): + for it in range(opts.n_times): time[name][step, it] = bench_sample(sampling_algorithm[name], - opts.n_population, - n_samples[step]) + opts.n_population, + n_samples[step]) print("done") diff --git a/doc/conf.py b/doc/conf.py index e829a429a4b7b..d19b43ddcb955 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,7 +15,6 @@ from __future__ import print_function import sys import os -from sklearn.externals.six import u # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory @@ -74,8 +73,8 @@ master_doc = 'index' # General information about the project. -project = u('scikit-learn') -copyright = u('2007 - 2018, scikit-learn developers (BSD License)') +project = 'scikit-learn' +copyright = '2007 - 2018, scikit-learn developers (BSD License)' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -214,8 +213,8 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass # [howto/manual]). -latex_documents = [('index', 'user_guide.tex', u('scikit-learn user guide'), - u('scikit-learn developers'), 'manual'), ] +latex_documents = [('index', 'user_guide.tex', 'scikit-learn user guide', + 'scikit-learn developers', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst index 325199a464fab..80ef0b210f7f1 100644 --- a/doc/developers/performance.rst +++ b/doc/developers/performance.rst @@ -227,13 +227,13 @@ Now restart IPython and let us use this new toy:: 178 # values justified in the paper 179 48 144 3.0 0.0 alpha = 1 180 48 113 2.4 0.0 beta = 0.1 - 181 638 1880 2.9 0.1 for n_iter in xrange(1, max_iter + 1): + 181 638 1880 2.9 0.1 for n_iter in range(1, max_iter + 1): 182 638 195133 305.9 10.2 grad = np.dot(WtW, H) - WtV 183 638 495761 777.1 25.9 proj_gradient = norm(grad[np.logical_or(grad < 0, H > 0)]) 184 638 2449 3.8 0.1 if proj_gradient < tol: 185 48 130 2.7 0.0 break 186 - 187 1474 4474 3.0 0.2 for inner_iter in xrange(1, 20): + 187 1474 4474 3.0 0.2 for inner_iter in range(1, 20): 188 1474 83833 56.9 4.4 Hn = H - alpha * grad 189 # Hn = np.where(Hn > 0, Hn, 0) 190 1474 194239 131.8 10.1 Hn = _pos(Hn) diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py index ecff621ce82bd..52495e2d0a423 100644 --- a/examples/applications/plot_out_of_core_classification.py +++ b/examples/applications/plot_out_of_core_classification.py @@ -40,8 +40,8 @@ import matplotlib.pyplot as plt from matplotlib import rcParams -from sklearn.externals.six.moves import html_parser -from sklearn.externals.six.moves.urllib.request import urlretrieve +from html.parser import HTMLParser +from urllib.request import urlretrieve from sklearn.datasets import get_data_home from sklearn.feature_extraction.text import HashingVectorizer from sklearn.linear_model import SGDClassifier @@ -60,11 +60,11 @@ def _not_in_sphinx(): # -class ReutersParser(html_parser.HTMLParser): +class ReutersParser(HTMLParser): """Utility class to parse a SGML file and yield documents one at a time.""" def __init__(self, encoding='latin-1'): - html_parser.HTMLParser.__init__(self) + HTMLParser.__init__(self) self._reset() self.encoding = encoding diff --git a/examples/applications/svm_gui.py b/examples/applications/svm_gui.py index 51a8a5bd48b34..83d290146eaf5 100644 --- a/examples/applications/svm_gui.py +++ b/examples/applications/svm_gui.py @@ -40,7 +40,6 @@ from sklearn import svm from sklearn.datasets import dump_svmlight_file -from sklearn.externals.six.moves import xrange y_min, y_max = -50, 50 x_min, x_max = -50, 50 @@ -188,7 +187,7 @@ def update_example(self, model, idx): def update(self, event, model): if event == "examples_loaded": - for i in xrange(len(model.data)): + for i in range(len(model.data)): self.update_example(model, i) if event == "example_added": diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py index 188eff02b6f02..c08e7862d9b6c 100644 --- a/sklearn/cluster/birch.py +++ b/sklearn/cluster/birch.py @@ -29,7 +29,7 @@ def _iterate_sparse_X(X): X_data = X.data X_indptr = X.indptr - for i in xrange(n_samples): + for i in range(n_samples): row = np.zeros(X.shape[1]) startptr, endptr = X_indptr[i], X_indptr[i + 1] nonzero_indices = X_indices[startptr:endptr] diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 3e5d2e8203ba8..2a44aa81912a1 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -64,10 +64,10 @@ def _fix_connectivity(X, connectivity, affinity): "stopping the tree early." % n_components, stacklevel=2) # XXX: Can we do without completing the matrix? - for i in xrange(n_components): + for i in range(n_components): idx_i = np.where(labels == i)[0] Xi = X[idx_i] - for j in xrange(i): + for j in range(i): idx_j = np.where(labels == j)[0] Xj = X[idx_j] D = pairwise_distances(Xi, Xj, metric=affinity) @@ -527,7 +527,7 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete', children = [] # recursive merge loop - for k in xrange(n_samples, n_nodes): + for k in range(n_samples, n_nodes): # identify the merge while True: edge = heappop(inertia) @@ -632,7 +632,7 @@ def _hc_cut(n_clusters, children, n_leaves): # are interested in largest elements # children[-1] is the root of the tree nodes = [-(max(children[-1]) + 1)] - for _ in xrange(n_clusters - 1): + for _ in range(n_clusters - 1): # As we have a heap, nodes[0] is the smallest element these_children = children[-nodes[0] - n_leaves] # Insert the 2 children and remove the largest node diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py index 892d04c94d082..eed39a45bddc4 100644 --- a/sklearn/covariance/shrunk_covariance_.py +++ b/sklearn/covariance/shrunk_covariance_.py @@ -227,8 +227,8 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000): beta_ = 0. # sum of the coefficients of delta_ = 0. # sum of the *squared* coefficients of # starting block computation - for i in xrange(n_splits): - for j in xrange(n_splits): + for i in range(n_splits): + for j in range(n_splits): rows = slice(block_size * i, block_size * (i + 1)) cols = slice(block_size * j, block_size * (j + 1)) beta_ += np.sum(np.dot(X2.T[rows], X2[:, cols])) @@ -237,7 +237,7 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000): beta_ += np.sum(np.dot(X2.T[rows], X2[:, block_size * n_splits:])) delta_ += np.sum( np.dot(X.T[rows], X[:, block_size * n_splits:]) ** 2) - for j in xrange(n_splits): + for j in range(n_splits): cols = slice(block_size * j, block_size * (j + 1)) beta_ += np.sum(np.dot(X2.T[block_size * n_splits:], X2[:, cols])) delta_ += np.sum( diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx index 152bd4325dbfb..bba5db9d3cf50 100644 --- a/sklearn/datasets/_svmlight_format.pyx +++ b/sklearn/datasets/_svmlight_format.pyx @@ -92,7 +92,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based, features.pop(0) n_features -= 1 - for i in xrange(0, n_features): + for i in range(0, n_features): idx_s, value = features[i].split(COLON, 1) idx = int(idx_s) if idx < 0 or not zero_based and idx == 0: diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py index eea477937e149..5ede30bca42bc 100644 --- a/sklearn/decomposition/factor_analysis.py +++ b/sklearn/decomposition/factor_analysis.py @@ -211,7 +211,7 @@ def my_svd(X): raise ValueError('SVD method %s is not supported. Please consider' ' the documentation' % self.svd_method) - for i in xrange(self.max_iter): + for i in range(self.max_iter): # SMALL helps numerics sqrt_psi = np.sqrt(psi) + SMALL s, V, unexp_var = my_svd(X / (sqrt_psi * nsqrt)) diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py index 693d46d31fab5..7de65c9829fd8 100644 --- a/sklearn/decomposition/fastica_.py +++ b/sklearn/decomposition/fastica_.py @@ -75,7 +75,7 @@ def _ica_def(X, tol, g, fun_args, max_iter, w_init): w = w_init[j, :].copy() w /= np.sqrt((w ** 2).sum()) - for i in moves.xrange(max_iter): + for i in moves.range(max_iter): gwtx, g_wtx = g(np.dot(w.T, X), fun_args) w1 = (X * gwtx).mean(axis=1) - g_wtx.mean() * w @@ -104,7 +104,7 @@ def _ica_par(X, tol, g, fun_args, max_iter, w_init): W = _sym_decorrelation(w_init) del w_init p_ = float(X.shape[1]) - for ii in moves.xrange(max_iter): + for ii in moves.range(max_iter): gwtx, g_wtx = g(np.dot(W, X), fun_args) W1 = _sym_decorrelation(np.dot(gwtx, X.T) / p_ - g_wtx[:, np.newaxis] * W) diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py index e35f40ec05c14..b9bf1025e315e 100644 --- a/sklearn/decomposition/online_lda.py +++ b/sklearn/decomposition/online_lda.py @@ -93,7 +93,7 @@ def _update_doc_distribution(X, exp_topic_word_distr, doc_topic_prior, X_indices = X.indices X_indptr = X.indptr - for idx_d in xrange(n_samples): + for idx_d in range(n_samples): if is_sparse_x: ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]] cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]] @@ -107,7 +107,7 @@ def _update_doc_distribution(X, exp_topic_word_distr, doc_topic_prior, exp_topic_word_d = exp_topic_word_distr[:, ids] # Iterate between `doc_topic_d` and `norm_phi` until convergence - for _ in xrange(0, max_iters): + for _ in range(0, max_iters): last_d = doc_topic_d # The optimal phi_{dwk} is proportional to @@ -544,7 +544,7 @@ def fit(self, X, y=None): n_jobs = effective_n_jobs(self.n_jobs) with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel: - for i in xrange(max_iter): + for i in range(max_iter): if learning_method == 'online': for idx_slice in gen_batches(n_samples, batch_size): self._em_step(X[idx_slice, :], total_samples=n_samples, @@ -682,7 +682,7 @@ def _loglikelihood(prior, distr, dirichlet_distr, size): X_indptr = X.indptr # E[log p(docs | theta, beta)] - for idx_d in xrange(0, n_samples): + for idx_d in range(0, n_samples): if is_sparse_x: ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]] cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]] diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index e834a00b03118..d5a329c7340c8 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -130,7 +130,7 @@ def g_test(x): ica = FastICA(fun=fn, algorithm=algo, random_state=0) assert_raises(ValueError, ica.fit, m.T) - assert_raises(TypeError, FastICA(fun=moves.xrange(10)).fit, m.T) + assert_raises(TypeError, FastICA(fun=moves.range(10)).fit, m.T) def test_fastica_nowhiten(): diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py index 7e06d28faefcf..63346f3dbddbd 100644 --- a/sklearn/decomposition/tests/test_online_lda.py +++ b/sklearn/decomposition/tests/test_online_lda.py @@ -90,7 +90,7 @@ def test_lda_partial_fit(): lda = LatentDirichletAllocation(n_components=n_components, learning_offset=10., total_samples=100, random_state=rng) - for i in xrange(3): + for i in range(3): lda.partial_fit(X) correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)] diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index a2919c7ff7a69..0418ebb62dc9a 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -660,7 +660,7 @@ def fit(self, X, y): means = [] scalings = [] rotations = [] - for ind in xrange(n_classes): + for ind in range(n_classes): Xg = X[y == ind, :] meang = Xg.mean(0) means.append(meang) diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py index 82f504542f9a9..2552e77b894a6 100644 --- a/sklearn/externals/_arff.py +++ b/sklearn/externals/_arff.py @@ -431,7 +431,7 @@ def decode_data(self, s, conversors): raise BadDataFormat(s) # XXX: int 0 is used for implicit values, not '0' values = [values[i] if i in values else 0 for i in - xrange(len(conversors))] + range(len(conversors))] else: if len(values) != len(conversors): raise BadDataFormat(s) @@ -524,7 +524,7 @@ def encode_data(self, data, attributes): data = data.data # Check if the rows are sorted - if not all(row[i] <= row[i + 1] for i in xrange(len(row) - 1)): + if not all(row[i] <= row[i + 1] for i in range(len(row) - 1)): raise ValueError("liac-arff can only output COO matrices with " "sorted rows.") diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py index d078a325b69a6..f37241975645f 100644 --- a/sklearn/feature_extraction/dict_vectorizer.py +++ b/sklearn/feature_extraction/dict_vectorizer.py @@ -258,7 +258,7 @@ def inverse_transform(self, X, dict_type=dict): n_samples = X.shape[0] names = self.feature_names_ - dicts = [dict_type() for _ in xrange(n_samples)] + dicts = [dict_type() for _ in range(n_samples)] if sp.issparse(X): for i, j in zip(*X.nonzero()): diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 9798175e4d5bc..503a167c5fe35 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -5,7 +5,6 @@ import pytest from scipy import sparse -from sklearn.externals.six import PY2 from sklearn.feature_extraction.text import strip_tags from sklearn.feature_extraction.text import strip_accents_unicode from sklearn.feature_extraction.text import strip_accents_ascii @@ -1132,10 +1131,7 @@ def _check_stop_words_consistency(estimator): @fails_if_pypy def test_vectorizer_stop_words_inconsistent(): - if PY2: - lstr = "[u'and', u'll', u've']" - else: - lstr = "['and', 'll', 've']" + lstr = "['and', 'll', 've']" message = ('Your stop_words may be inconsistent with your ' 'preprocessing. Tokenizing the stop words generated ' 'tokens %s not in stop_words.' % lstr) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 6120c1d4a8f3a..1162b942e5515 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -170,9 +170,9 @@ def _word_ngrams(self, tokens, stop_words=None): tokens_append = tokens.append space_join = " ".join - for n in xrange(min_n, + for n in range(min_n, min(max_n + 1, n_original_tokens + 1)): - for i in xrange(n_original_tokens - n + 1): + for i in range(n_original_tokens - n + 1): tokens_append(space_join(original_tokens[i: i + n])) return tokens @@ -195,8 +195,8 @@ def _char_ngrams(self, text_document): # bind method outside of loop to reduce overhead ngrams_append = ngrams.append - for n in xrange(min_n, min(max_n + 1, text_len + 1)): - for i in xrange(text_len - n + 1): + for n in range(min_n, min(max_n + 1, text_len + 1)): + for i in range(text_len - n + 1): ngrams_append(text_document[i: i + n]) return ngrams @@ -218,7 +218,7 @@ def _char_wb_ngrams(self, text_document): for w in text_document.split(): w = ' ' + w + ' ' w_len = len(w) - for n in xrange(min_n, max_n + 1): + for n in range(min_n, max_n + 1): offset = 0 ngrams_append(w[offset:offset + n]) while offset + n < w_len: @@ -348,7 +348,7 @@ def _validate_vocabulary(self): indices = set(six.itervalues(vocabulary)) if len(indices) != len(vocabulary): raise ValueError("Vocabulary contains repeated indices.") - for i in xrange(len(vocabulary)): + for i in range(len(vocabulary)): if i not in indices: msg = ("Vocabulary of size %d doesn't contain index " "%d." % (len(vocabulary), i)) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index c8907574121a0..2d046332889d3 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -742,7 +742,7 @@ def fit(self, X, y, check_input=True): dual_gaps_ = np.zeros(n_targets, dtype=X.dtype) self.n_iter_ = [] - for k in xrange(n_targets): + for k in range(n_targets): if Xy is not None: this_Xy = Xy[:, k] else: diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index 5cc05961abd7e..d0b5f6ea25720 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -636,7 +636,7 @@ def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None): if fit_path: self.active_ = [] self.coef_path_ = [] - for k in xrange(n_targets): + for k in range(n_targets): this_Xy = None if Xy is None else Xy[:, k] alphas, active, coef_path, n_iter_ = lars_path( X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X, @@ -656,7 +656,7 @@ def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None): self.coef_)] self.n_iter_ = self.n_iter_[0] else: - for k in xrange(n_targets): + for k in range(n_targets): this_Xy = None if Xy is None else Xy[:, k] alphas, _, self.coef_[k], n_iter_ = lars_path( X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X, diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 95f8ac1bd9929..b0dcc11c9003a 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -156,7 +156,7 @@ def test_parameter_grid(): assert_equal(len(grid2), 6) # loop to assert we can iterate over the grid multiple times - for i in xrange(2): + for i in range(2): # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2) points = set(tuple(chain(*(sorted(p.items())))) for p in grid2) assert_equal(points, diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py index 1361bffe0d240..cbf4906782d7c 100644 --- a/sklearn/neural_network/rbm.py +++ b/sklearn/neural_network/rbm.py @@ -349,7 +349,7 @@ def fit(self, X, y=None): n_batches, n_samples)) verbose = self.verbose begin = time.time() - for iteration in xrange(1, self.n_iter + 1): + for iteration in range(1, self.n_iter + 1): for batch_slice in batch_slices: self._fit(X[batch_slice], rng) diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index f8b516eb61957..6f6a8b2ba8341 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -271,7 +271,7 @@ def sparse_random_matrix(n_components, n_features, density='auto', indices = [] offset = 0 indptr = [offset] - for _ in xrange(n_components): + for _ in range(n_components): # find the indices of the non-zero components for row i n_nonzero_i = rng.binomial(n_features, density) indices_i = sample_without_replacement(n_features, n_nonzero_i, diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 86d3c8d327ce0..90072a307e88e 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -26,7 +26,6 @@ from sklearn.exceptions import ConvergenceWarning from sklearn.exceptions import NotFittedError, UndefinedMetricWarning from sklearn.multiclass import OneVsRestClassifier -from sklearn.externals import six # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] @@ -523,17 +522,6 @@ def test_bad_input(): def test_unicode_kernel(): # Test that a unicode kernel name does not cause a TypeError - if six.PY2: - # Test unicode (same as str on python3) - clf = svm.SVC(kernel=u'linear', probability=True) - clf.fit(X, Y) - clf.predict_proba(T) - svm.libsvm.cross_validation(iris.data, - iris.target.astype(np.float64), 5, - kernel=u'linear', - random_seed=0) - - # Test default behavior on both versions clf = svm.SVC(gamma='scale', kernel='linear', probability=True) clf.fit(X, Y) clf.predict_proba(T) diff --git a/sklearn/utils/_random.pyx b/sklearn/utils/_random.pyx index 7913684d3107a..9687c583b9a3c 100644 --- a/sklearn/utils/_random.pyx +++ b/sklearn/utils/_random.pyx @@ -149,12 +149,12 @@ cpdef _sample_without_replacement_with_pool(np.int_t n_population, rng_randint = rng.randint # Initialize the pool - for i in xrange(n_population): + for i in range(n_population): pool[i] = i # The following line of code are heavily inspired from python core, # more precisely of random.sample. - for i in xrange(n_samples): + for i in range(n_samples): j = rng_randint(n_population - i) # invariant: non-selected at [0,n-i) out[i] = pool[j] pool[j] = pool[n_population - i - 1] # move non-selected item into diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 6cdca1bda1d1f..73a719da56551 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -514,13 +514,13 @@ def svd_flip(u, v, u_based_decision=True): if u_based_decision: # columns of u, rows of v max_abs_cols = np.argmax(np.abs(u), axis=0) - signs = np.sign(u[max_abs_cols, xrange(u.shape[1])]) + signs = np.sign(u[max_abs_cols, range(u.shape[1])]) u *= signs v *= signs[:, np.newaxis] else: # rows of v, columns of u max_abs_rows = np.argmax(np.abs(v), axis=1) - signs = np.sign(v[xrange(v.shape[0]), max_abs_rows]) + signs = np.sign(v[range(v.shape[0]), max_abs_rows]) u *= signs v *= signs[:, np.newaxis] return u, v diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx index b40b843e94322..4e13fce315c57 100644 --- a/sklearn/utils/sparsefuncs_fast.pyx +++ b/sklearn/utils/sparsefuncs_fast.pyx @@ -111,24 +111,24 @@ def _csr_mean_variance_axis0(np.ndarray[floating, ndim=1, mode="c"] X_data, np.ndarray[np.int64_t, ndim=1] counts_nan = np.zeros(n_features, dtype=np.int64) - for i in xrange(non_zero): + for i in range(non_zero): col_ind = X_indices[i] if not isnan(X_data[i]): means[col_ind] += X_data[i] else: counts_nan[col_ind] += 1 - for i in xrange(n_features): + for i in range(n_features): means[i] /= (n_samples - counts_nan[i]) - for i in xrange(non_zero): + for i in range(non_zero): col_ind = X_indices[i] if not isnan(X_data[i]): diff = X_data[i] - means[col_ind] variances[col_ind] += diff * diff counts[col_ind] += 1 - for i in xrange(n_features): + for i in range(n_features): variances[i] += (n_samples - counts_nan[i] - counts[i]) * means[i]**2 variances[i] /= (n_samples - counts_nan[i]) @@ -189,13 +189,13 @@ def _csc_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data, cdef np.ndarray[np.int64_t, ndim=1] counts_nan = np.zeros(n_features, dtype=np.int64) - for i in xrange(n_features): + for i in range(n_features): startptr = X_indptr[i] endptr = X_indptr[i + 1] counts = endptr - startptr - for j in xrange(startptr, endptr): + for j in range(startptr, endptr): if not isnan(X_data[j]): means[i] += X_data[j] else: @@ -203,7 +203,7 @@ def _csc_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data, counts -= counts_nan[i] means[i] /= (n_samples - counts_nan[i]) - for j in xrange(startptr, endptr): + for j in range(startptr, endptr): if not isnan(X_data[j]): diff = X_data[j] - means[i] variances[i] += diff * diff @@ -321,12 +321,12 @@ def _incr_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data, new_mean, new_var, counts_nan = _csc_mean_variance_axis0( X_data, n_samples, n_features, X_indices, X_indptr) - for i in xrange(n_features): + for i in range(n_features): new_n[i] -= counts_nan[i] # First pass cdef bint is_first_pass = True - for i in xrange(n_features): + for i in range(n_features): if last_n[i] > 0: is_first_pass = False break @@ -334,19 +334,19 @@ def _incr_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data, return new_mean, new_var, new_n # Next passes - for i in xrange(n_features): + for i in range(n_features): updated_n[i] = last_n[i] + new_n[i] last_over_new_n[i] = last_n[i] / new_n[i] # Unnormalized stats - for i in xrange(n_features): + for i in range(n_features): last_mean[i] *= last_n[i] last_var[i] *= last_n[i] new_mean[i] *= new_n[i] new_var[i] *= new_n[i] # Update stats - for i in xrange(n_features): + for i in range(n_features): updated_var[i] = (last_var[i] + new_var[i] + last_over_new_n[i] / updated_n[i] * (last_mean[i] / last_over_new_n[i] - new_mean[i])**2) @@ -375,10 +375,10 @@ def _inplace_csr_row_normalize_l1(np.ndarray[floating, ndim=1] X_data, cdef np.npy_intp i, j cdef double sum_ - for i in xrange(n_samples): + for i in range(n_samples): sum_ = 0.0 - for j in xrange(X_indptr[i], X_indptr[i + 1]): + for j in range(X_indptr[i], X_indptr[i + 1]): sum_ += fabs(X_data[j]) if sum_ == 0.0: @@ -386,7 +386,7 @@ def _inplace_csr_row_normalize_l1(np.ndarray[floating, ndim=1] X_data, # correctly) continue - for j in xrange(X_indptr[i], X_indptr[i + 1]): + for j in range(X_indptr[i], X_indptr[i + 1]): X_data[j] /= sum_ @@ -405,10 +405,10 @@ def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data, cdef np.npy_intp i, j cdef double sum_ - for i in xrange(n_samples): + for i in range(n_samples): sum_ = 0.0 - for j in xrange(X_indptr[i], X_indptr[i + 1]): + for j in range(X_indptr[i], X_indptr[i + 1]): sum_ += (X_data[j] * X_data[j]) if sum_ == 0.0: @@ -418,7 +418,7 @@ def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data, sum_ = sqrt(sum_) - for j in xrange(X_indptr[i], X_indptr[i + 1]): + for j in range(X_indptr[i], X_indptr[i + 1]): X_data[j] /= sum_ diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py index 1b0b0fd68e086..77c1259aa8bf4 100644 --- a/sklearn/utils/tests/test_fast_dict.py +++ b/sklearn/utils/tests/test_fast_dict.py @@ -20,7 +20,7 @@ def test_int_float_dict(): d.append(120, 3.) assert_equal(d[120], 3.0) assert_equal(len(d), len(keys) + 1) - for i in xrange(2000): + for i in range(2000): d.append(i + 1000, 4.0) assert_equal(d[1100], 4.0) diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index deef9a802dd1c..d33778c848c2a 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -166,7 +166,7 @@ def test_unique_labels(): assert_raises(ValueError, unique_labels) # Multiclass problem - assert_array_equal(unique_labels(xrange(10)), np.arange(10)) + assert_array_equal(unique_labels(range(10)), np.arange(10)) assert_array_equal(unique_labels(np.arange(10)), np.arange(10)) assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4])) @@ -181,7 +181,7 @@ def test_unique_labels(): np.arange(3)) # Several arrays passed - assert_array_equal(unique_labels([4, 0, 2], xrange(5)), + assert_array_equal(unique_labels([4, 0, 2], range(5)), np.arange(5)) assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3)) From f01b60fdd811a4e8e37247bd26c894efbc908350 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 20 Nov 2018 18:02:30 -0500 Subject: [PATCH 03/29] removing xrange and more six stuff --- .../applications/wikipedia_principal_eigenvector.py | 5 ++--- sklearn/base.py | 5 ++--- sklearn/cluster/bicluster.py | 4 +--- sklearn/cluster/birch.py | 1 - sklearn/cluster/hierarchical.py | 5 +---- sklearn/cluster/k_means_.py | 5 ++--- sklearn/compose/_column_transformer.py | 10 +++++----- sklearn/compose/tests/test_column_transformer.py | 2 +- sklearn/datasets/openml.py | 4 ++-- sklearn/datasets/svmlight_format.py | 2 +- sklearn/datasets/tests/test_openml.py | 4 ++-- sklearn/decomposition/fastica_.py | 2 +- sklearn/decomposition/pca.py | 2 +- sklearn/discriminant_analysis.py | 4 ++-- sklearn/ensemble/forest.py | 2 +- sklearn/ensemble/gradient_boosting.py | 6 +++--- sklearn/ensemble/iforest.py | 2 +- sklearn/ensemble/partial_dependence.py | 4 ++-- sklearn/externals/six.py | 4 ++-- sklearn/feature_extraction/dict_vectorizer.py | 6 +++--- sklearn/feature_extraction/text.py | 10 +++++----- sklearn/feature_selection/from_model.py | 2 +- sklearn/gaussian_process/kernels.py | 4 ++-- sklearn/impute.py | 2 +- sklearn/linear_model/base.py | 2 +- sklearn/linear_model/coordinate_descent.py | 2 +- sklearn/linear_model/least_angle.py | 4 ++-- sklearn/linear_model/logistic.py | 4 ++-- sklearn/manifold/spectral_embedding_.py | 2 +- sklearn/manifold/t_sne.py | 4 ++-- sklearn/metrics/regression.py | 12 ++++++------ sklearn/metrics/scorer.py | 10 +++++----- sklearn/model_selection/_search.py | 4 ++-- sklearn/preprocessing/_encoders.py | 8 ++++---- sklearn/preprocessing/base.py | 2 +- sklearn/tree/export.py | 4 ++-- sklearn/tree/tree.py | 2 +- sklearn/utils/class_weight.py | 4 ++-- sklearn/utils/multiclass.py | 10 +++++----- sklearn/utils/validation.py | 8 ++++---- 40 files changed, 85 insertions(+), 94 deletions(-) diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py index 3c91479f48ec3..27f3844a23094 100644 --- a/examples/applications/wikipedia_principal_eigenvector.py +++ b/examples/applications/wikipedia_principal_eigenvector.py @@ -47,8 +47,7 @@ from joblib import Memory from sklearn.decomposition import randomized_svd -from sklearn.externals.six.moves.urllib.request import urlopen -from sklearn.externals.six import iteritems +from urllib.request import urlopen print(__doc__) @@ -173,7 +172,7 @@ def get_adjacency_matrix(redirects_filename, page_links_filename, limit=None): # stop after 5M links to make it possible to work in RAM X, redirects, index_map = get_adjacency_matrix( redirects_filename, page_links_filename, limit=5000000) -names = dict((i, name) for name, i in iteritems(index_map)) +names = dict((i, name) for name, i in index_map.items()) print("Computing the principal singular vectors using randomized_svd") t0 = time() diff --git a/sklearn/base.py b/sklearn/base.py index 34998270cea88..6f2eaf062d9be 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -9,7 +9,6 @@ import numpy as np from scipy import sparse -from .externals import six from .utils.fixes import signature from . import __version__ @@ -58,7 +57,7 @@ def clone(estimator, safe=True): % (repr(estimator), type(estimator))) klass = estimator.__class__ new_object_params = estimator.get_params(deep=False) - for name, param in six.iteritems(new_object_params): + for name, param in new_object_params.items(): new_object_params[name] = clone(param, safe=False) new_object = klass(**new_object_params) params_set = new_object.get_params(deep=False) @@ -97,7 +96,7 @@ def _pprint(params, offset=0, printer=repr): params_list = list() this_line_length = offset line_sep = ',\n' + (1 + offset // 2) * ' ' - for i, (k, v) in enumerate(sorted(six.iteritems(params))): + for i, (k, v) in enumerate(sorted(params.items())): if type(v) is float: # use str for representing floating point numbers # this way we get consistent representation across diff --git a/sklearn/cluster/bicluster.py b/sklearn/cluster/bicluster.py index 18260a0f3b1c2..567b9c063f189 100644 --- a/sklearn/cluster/bicluster.py +++ b/sklearn/cluster/bicluster.py @@ -14,7 +14,6 @@ from . import KMeans, MiniBatchKMeans from ..base import BaseEstimator, BiclusterMixin -from ..externals import six from ..utils import check_random_state from ..utils.extmath import (make_nonnegative, randomized_svd, @@ -85,8 +84,7 @@ def _log_normalize(X): return L - row_avg - col_avg + avg -class BaseSpectral(six.with_metaclass(ABCMeta, BaseEstimator, - BiclusterMixin)): +class BaseSpectral(BaseEstimator, BiclusterMixin, metaclass=ABCMeta): """Base class for spectral biclustering.""" @abstractmethod diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py index c08e7862d9b6c..4b5f72ada9ad3 100644 --- a/sklearn/cluster/birch.py +++ b/sklearn/cluster/birch.py @@ -11,7 +11,6 @@ from ..metrics.pairwise import euclidean_distances from ..base import TransformerMixin, ClusterMixin, BaseEstimator -from ..externals.six.moves import xrange from ..utils import check_array from ..utils.extmath import row_norms, safe_sparse_dot from ..utils.validation import check_is_fitted diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 2a44aa81912a1..37c9550d94d63 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -15,7 +15,6 @@ from scipy.sparse.csgraph import connected_components from ..base import BaseEstimator, ClusterMixin -from ..externals import six from ..metrics.pairwise import paired_distances, pairwise_distances from ..utils import check_array from ..utils.validation import check_memory @@ -24,8 +23,6 @@ from ._feature_agglomeration import AgglomerationTransform from ..utils.fast_dict import IntFloatDict -from ..externals.six.moves import xrange - ############################################################################### # For non fully-connected graphs @@ -274,7 +271,7 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False): inertia = np.empty(len(coord_row), dtype=np.float64, order='C') _hierarchical.compute_ward_dist(moments_1, moments_2, coord_row, coord_col, inertia) - inertia = list(six.moves.zip(inertia, coord_row, coord_col)) + inertia = list(zip(inertia, coord_row, coord_col)) heapify(inertia) # prepare the main fields diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py index e0d520f09fd18..ea31f042c0419 100644 --- a/sklearn/cluster/k_means_.py +++ b/sklearn/cluster/k_means_.py @@ -32,7 +32,6 @@ from ..utils._joblib import Parallel from ..utils._joblib import delayed from ..utils._joblib import effective_n_jobs -from ..externals.six import string_types from ..exceptions import ConvergenceWarning from . import _k_means from ._k_means_elkan import k_means_elkan @@ -743,10 +742,10 @@ def _init_centroids(X, k, init, random_state=None, x_squared_norms=None, raise ValueError( "n_samples=%d should be larger than k=%d" % (n_samples, k)) - if isinstance(init, string_types) and init == 'k-means++': + if isinstance(init, str) and init == 'k-means++': centers = _k_init(X, k, random_state=random_state, x_squared_norms=x_squared_norms) - elif isinstance(init, string_types) and init == 'random': + elif isinstance(init, str) and init == 'random': seeds = random_state.permutation(n_samples)[:k] centers = X[seeds] elif hasattr(init, '__array__'): diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 68b9c65e42299..29b11a5a445a7 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -545,7 +545,7 @@ def _check_key_type(key, superclass): ---------- key : scalar, list, slice, array-like The column specification to check - superclass : int or six.string_types + superclass : int or six.str The type for which to check the `key` """ @@ -560,7 +560,7 @@ def _check_key_type(key, superclass): if superclass is int: return key.dtype.kind == 'i' else: - # superclass = six.string_types + # superclass = six.str return key.dtype.kind in ('O', 'U', 'S') return False @@ -589,7 +589,7 @@ def _get_column(X, key): # check whether we have string column names or integers if _check_key_type(key, int): column_names = False - elif _check_key_type(key, six.string_types): + elif _check_key_type(key, six.str): column_names = True elif hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_): # boolean mask @@ -635,13 +635,13 @@ def _get_column_indices(X, key): else: return list(key) - elif _check_key_type(key, six.string_types): + elif _check_key_type(key, six.str): try: all_columns = list(X.columns) except AttributeError: raise ValueError("Specifying the columns using strings is only " "supported for pandas DataFrames") - if isinstance(key, six.string_types): + if isinstance(key, six.str): columns = [key] elif isinstance(key, slice): start, stop = key.start, key.stop diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index b7631336ef3dd..fce40c11422a6 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -785,7 +785,7 @@ def test_column_transformer_remainder_numpy(key): def test_column_transformer_remainder_pandas(key): # test different ways that columns are specified with passthrough pd = pytest.importorskip('pandas') - if isinstance(key, six.string_types) and key == 'pd-index': + if isinstance(key, six.str) and key == 'pd-index': key = pd.Index(['first']) X_array = np.array([[0, 1, 2], [2, 4, 6]]).T diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py index 1aec0aafab140..d1d240cad8301 100644 --- a/sklearn/datasets/openml.py +++ b/sklearn/datasets/openml.py @@ -21,7 +21,7 @@ from sklearn.externals import _arff from .base import get_data_home -from ..externals.six import string_types, PY2, BytesIO +from ..externals.six import str, PY2, BytesIO from ..externals.six.moves.urllib.error import HTTPError from ..utils import Bunch @@ -567,7 +567,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None, # see issue: https://github.com/openml/OpenML/issues/768) target_column = [feature['name'] for feature in features_list if feature['is_target'] == 'true'] - elif isinstance(target_column, string_types): + elif isinstance(target_column, str): # for code-simplicity, make target_column by default a list target_column = [target_column] elif target_column is None: diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py index dfcc5431ebd97..81c0d7c1ac1c5 100644 --- a/sklearn/datasets/svmlight_format.py +++ b/sklearn/datasets/svmlight_format.py @@ -159,7 +159,7 @@ def get_data(): def _gen_open(f): if isinstance(f, int): # file descriptor return io.open(f, "rb", closefd=False) - elif not isinstance(f, six.string_types): + elif not isinstance(f, six.str): raise TypeError("expected {str, int, file-like}, got %s" % type(f)) _, ext = os.path.splitext(f) diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py index fdf6506a30405..c192d50d9caf5 100644 --- a/sklearn/datasets/tests/test_openml.py +++ b/sklearn/datasets/tests/test_openml.py @@ -17,7 +17,7 @@ _retry_with_clean_cache) from sklearn.utils.testing import (assert_warns_message, assert_raise_message) -from sklearn.externals.six import string_types +from sklearn.externals.six import str from sklearn.externals.six.moves.urllib.error import HTTPError from sklearn.datasets.tests.test_common import check_return_X_y from functools import partial @@ -101,7 +101,7 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version, assert data_by_id.target.dtype == expected_target_dtype assert len(data_by_id.feature_names) == expected_features for feature in data_by_id.feature_names: - assert isinstance(feature, string_types) + assert isinstance(feature, str) # TODO: pass in a list of expected nominal features for feature, categories in data_by_id.categories.items(): diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py index 7de65c9829fd8..c7e5fbe6fd685 100644 --- a/sklearn/decomposition/fastica_.py +++ b/sklearn/decomposition/fastica_.py @@ -286,7 +286,7 @@ def my_g(x): def g(x, fun_args): return fun(x, **fun_args) else: - exc = ValueError if isinstance(fun, six.string_types) else TypeError + exc = ValueError if isinstance(fun, six.str) else TypeError raise exc("Unknown function %r;" " should be one of 'logcosh', 'exp', 'cube' or callable" % fun) diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py index db183af45af0c..eff2a6653401c 100644 --- a/sklearn/decomposition/pca.py +++ b/sklearn/decomposition/pca.py @@ -479,7 +479,7 @@ def _fit_truncated(self, X, n_components, svd_solver): """ n_samples, n_features = X.shape - if isinstance(n_components, six.string_types): + if isinstance(n_components, six.str): raise ValueError("n_components=%r cannot be a string " "with svd_solver='%s'" % (n_components, svd_solver)) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 0418ebb62dc9a..7f8d9808cc0e2 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -13,7 +13,7 @@ import warnings import numpy as np from scipy import linalg -from .externals.six import string_types +from .externals.six import str from .externals.six.moves import xrange from .base import BaseEstimator, TransformerMixin, ClassifierMixin @@ -49,7 +49,7 @@ def _cov(X, shrinkage=None): Estimated covariance matrix. """ shrinkage = "empirical" if shrinkage is None else shrinkage - if isinstance(shrinkage, string_types): + if isinstance(shrinkage, str): if shrinkage == 'auto': sc = StandardScaler() # standardize features X = sc.fit_transform(X) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 33d166d00969e..db793a494e594 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -493,7 +493,7 @@ def _validate_y_class_weight(self, y): if self.class_weight is not None: valid_presets = ('balanced', 'balanced_subsample') - if isinstance(self.class_weight, six.string_types): + if isinstance(self.class_weight, six.str): if self.class_weight not in valid_presets: raise ValueError('Valid presets for class_weight include ' '"balanced" and "balanced_subsample". Given "%s".' diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index b15ed82833fd6..9dcdcfa934881 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -1234,7 +1234,7 @@ def _check_params(self): "was %r" % self.subsample) if self.init is not None: - if isinstance(self.init, six.string_types): + if isinstance(self.init, six.str): if self.init not in INIT_ESTIMATORS: raise ValueError('init="%s" is not supported' % self.init) else: @@ -1248,7 +1248,7 @@ def _check_params(self): raise ValueError("alpha must be in (0.0, 1.0) but " "was %r" % self.alpha) - if isinstance(self.max_features, six.string_types): + if isinstance(self.max_features, six.str): if self.max_features == "auto": # if is_classification if self.n_classes_ > 1: @@ -1293,7 +1293,7 @@ def _init_state(self): if self.init is None: self.init_ = self.loss_.init_estimator() - elif isinstance(self.init, six.string_types): + elif isinstance(self.init, six.str): self.init_ = INIT_ESTIMATORS[self.init]() else: self.init_ = self.init diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index bb66e55ed32df..61000b05e79dc 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -242,7 +242,7 @@ def fit(self, X, y=None, sample_weight=None): # ensure that max_sample is in [1, n_samples]: n_samples = X.shape[0] - if isinstance(self.max_samples, six.string_types): + if isinstance(self.max_samples, six.str): if self.max_samples == 'auto': max_samples = min(256, n_samples) else: diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py index 2d669d413c054..b9ec635278ce4 100644 --- a/sklearn/ensemble/partial_dependence.py +++ b/sklearn/ensemble/partial_dependence.py @@ -275,7 +275,7 @@ def plot_partial_dependence(gbrt, X, features, feature_names=None, feature_names = feature_names.tolist() def convert_feature(fx): - if isinstance(fx, six.string_types): + if isinstance(fx, six.str): try: fx = feature_names.index(fx) except ValueError: @@ -285,7 +285,7 @@ def convert_feature(fx): # convert features into a seq of int tuples tmp_features = [] for fxs in features: - if isinstance(fxs, (numbers.Integral,) + six.string_types): + if isinstance(fxs, (numbers.Integral,) + six.str): fxs = (fxs,) try: fxs = np.array([convert_feature(fx) for fx in fxs], dtype=np.int32) diff --git a/sklearn/externals/six.py b/sklearn/externals/six.py index 85898ec71275f..5b7dc61f98e7e 100644 --- a/sklearn/externals/six.py +++ b/sklearn/externals/six.py @@ -33,7 +33,7 @@ PY3 = sys.version_info[0] == 3 if PY3: - string_types = str, + str = str, integer_types = int, class_types = type, text_type = str @@ -41,7 +41,7 @@ MAXSIZE = sys.maxsize else: - string_types = basestring, + str = basestring, integer_types = (int, long) class_types = (type, types.ClassType) text_type = unicode diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py index f37241975645f..6393914f76e7a 100644 --- a/sklearn/feature_extraction/dict_vectorizer.py +++ b/sklearn/feature_extraction/dict_vectorizer.py @@ -119,7 +119,7 @@ def fit(self, X, y=None): for x in X: for f, v in six.iteritems(x): - if isinstance(v, six.string_types): + if isinstance(v, six.str): f = "%s%s%s" % (f, self.separator, v) if f not in vocab: feature_names.append(f) @@ -165,7 +165,7 @@ def _transform(self, X, fitting): # same time for x in X: for f, v in six.iteritems(x): - if isinstance(v, six.string_types): + if isinstance(v, six.str): f = "%s%s%s" % (f, self.separator, v) v = 1 if f in vocab: @@ -299,7 +299,7 @@ def transform(self, X): for i, x in enumerate(X): for f, v in six.iteritems(x): - if isinstance(v, six.string_types): + if isinstance(v, six.str): f = "%s%s%s" % (f, self.separator, v) v = 1 try: diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 1162b942e5515..d5a66f6e1462b 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -107,7 +107,7 @@ def strip_tags(s): def _check_stop_list(stop): if stop == "english": return ENGLISH_STOP_WORDS - elif isinstance(stop, six.string_types): + elif isinstance(stop, six.str): raise ValueError("not a built-in stop list: %s" % stop) elif stop is None: return None @@ -588,7 +588,7 @@ def fit(self, X, y=None): Training data. """ # triggers a parameter validation - if isinstance(X, six.string_types): + if isinstance(X, six.str): raise ValueError( "Iterable over raw text documents expected, " "string object received.") @@ -613,7 +613,7 @@ def transform(self, X): X : scipy.sparse matrix, shape = (n_samples, self.n_features) Document-term matrix. """ - if isinstance(X, six.string_types): + if isinstance(X, six.str): raise ValueError( "Iterable over raw text documents expected, " "string object received.") @@ -1018,7 +1018,7 @@ def fit_transform(self, raw_documents, y=None): # We intentionally don't call the transform method to make # fit_transform overridable without unwanted side effects in # TfidfVectorizer. - if isinstance(raw_documents, six.string_types): + if isinstance(raw_documents, six.str): raise ValueError( "Iterable over raw text documents expected, " "string object received.") @@ -1073,7 +1073,7 @@ def transform(self, raw_documents): X : sparse matrix, [n_samples, n_features] Document-term matrix. """ - if isinstance(raw_documents, six.string_types): + if isinstance(raw_documents, six.str): raise ValueError( "Iterable over raw text documents expected, " "string object received.") diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py index 3e2efdbeb1e73..38455d1f9224c 100644 --- a/sklearn/feature_selection/from_model.py +++ b/sklearn/feature_selection/from_model.py @@ -48,7 +48,7 @@ def _calculate_threshold(estimator, importances, threshold): else: threshold = "mean" - if isinstance(threshold, six.string_types): + if isinstance(threshold, six.str): if "*" in threshold: scale, reference = threshold.split("*") scale = float(scale.strip()) diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index 79d913bca1cb5..b48113c4a75ec 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -91,7 +91,7 @@ class Hyperparameter(namedtuple('Hyperparameter', __slots__ = () def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None): - if not isinstance(bounds, six.string_types) or bounds != "fixed": + if not isinstance(bounds, six.str) or bounds != "fixed": bounds = np.atleast_2d(bounds) if n_elements > 1: # vector-valued parameter if bounds.shape[0] == 1: @@ -102,7 +102,7 @@ def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None): % (name, n_elements, bounds.shape[0])) if fixed is None: - fixed = isinstance(bounds, six.string_types) and bounds == "fixed" + fixed = isinstance(bounds, six.str) and bounds == "fixed" return super(Hyperparameter, cls).__new__( cls, name, value_type, bounds, n_elements, fixed) diff --git a/sklearn/impute.py b/sklearn/impute.py index a10f6c9eb947f..aa5fa90030f17 100644 --- a/sklearn/impute.py +++ b/sklearn/impute.py @@ -565,7 +565,7 @@ def fit(self, X, y=None): raise ValueError("'features' has to be either 'missing-only' or " "'all'. Got {} instead.".format(self.features)) - if not ((isinstance(self.sparse, six.string_types) and + if not ((isinstance(self.sparse, six.str) and self.sparse == "auto") or isinstance(self.sparse, bool)): raise ValueError("'sparse' has to be a boolean or 'auto'. " "Got {!r} instead.".format(self.sparse)) diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index 51abed0384806..539473b57cf4b 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -519,7 +519,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, Xy = None # precompute if n_samples > n_features - if isinstance(precompute, six.string_types) and precompute == 'auto': + if isinstance(precompute, six.str) and precompute == 'auto': precompute = (n_samples > n_features) if precompute is True: diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 2d046332889d3..d70f85a6d3caf 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -698,7 +698,7 @@ def fit(self, X, y, check_input=True): "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) - if isinstance(self.precompute, six.string_types): + if isinstance(self.precompute, six.str): raise ValueError('precompute should be one of True, False or' ' array-like. Got %r' % self.precompute) diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index d0b5f6ea25720..3c5b192fd709c 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -25,7 +25,7 @@ from ..exceptions import ConvergenceWarning from ..utils._joblib import Parallel, delayed from ..externals.six.moves import xrange -from ..externals.six import string_types +from ..externals.six import str solve_triangular_args = {'check_finite': False} @@ -181,7 +181,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, # and allows to easily swap columns X = X.copy('F') - elif isinstance(Gram, string_types) and Gram == 'auto' or Gram is True: + elif isinstance(Gram, str) and Gram == 'auto' or Gram is True: if Gram is True or X.shape[0] > X.shape[1]: Gram = np.dot(X.T, X) else: diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 2f24c76397ccd..e1cfbc5cc246f 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -976,7 +976,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10, scores = list() - if isinstance(scoring, six.string_types): + if isinstance(scoring, six.str): scoring = get_scorer(scoring) for w in coefs: if multi_class == 'ovr': @@ -1919,7 +1919,7 @@ def score(self, X, y, sample_weight=None): "This warning will disappear in version 0.22.", ChangedBehaviorWarning) scoring = self.scoring or 'accuracy' - if isinstance(scoring, six.string_types): + if isinstance(scoring, six.str): scoring = get_scorer(scoring) return scoring(self, X, y, sample_weight=sample_weight) diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index d0c226b51ca5e..442b79c6a399e 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -510,7 +510,7 @@ def fit(self, X, y=None): X = check_array(X, ensure_min_samples=2, estimator=self) random_state = check_random_state(self.random_state) - if isinstance(self.affinity, six.string_types): + if isinstance(self.affinity, six.str): if self.affinity not in set(("nearest_neighbors", "rbf", "precomputed")): raise ValueError(("%s is not a valid affinity. Expected " diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py index fe7268515abfd..649b44f0916b7 100644 --- a/sklearn/manifold/t_sne.py +++ b/sklearn/manifold/t_sne.py @@ -25,7 +25,7 @@ from ..metrics.pairwise import pairwise_distances from . import _utils from . import _barnes_hut_tsne -from ..externals.six import string_types +from ..externals.six import str MACHINE_EPSILON = np.finfo(np.double).eps @@ -671,7 +671,7 @@ def _fit(self, X, skip_num_points=0): if self.angle < 0.0 or self.angle > 1.0: raise ValueError("'angle' must be between 0.0 - 1.0") if self.metric == "precomputed": - if isinstance(self.init, string_types) and self.init == 'pca': + if isinstance(self.init, str) and self.init == 'pca': raise ValueError("The parameter init=\"pca\" cannot be " "used with metric=\"precomputed\".") if X.shape[0] != X.shape[1]: diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py index f4854ff244bc4..22e97d399fd96 100644 --- a/sklearn/metrics/regression.py +++ b/sklearn/metrics/regression.py @@ -27,7 +27,7 @@ from ..utils.validation import check_array, check_consistent_length from ..utils.validation import column_or_1d -from ..externals.six import string_types +from ..externals.six import str __ALL__ = [ @@ -90,7 +90,7 @@ def _check_reg_targets(y_true, y_pred, multioutput): n_outputs = y_true.shape[1] allowed_multioutput_str = ('raw_values', 'uniform_average', 'variance_weighted') - if isinstance(multioutput, string_types): + if isinstance(multioutput, str): if multioutput not in allowed_multioutput_str: raise ValueError("Allowed 'multioutput' string values are {}. " "You provided multioutput={!r}".format( @@ -172,7 +172,7 @@ def mean_absolute_error(y_true, y_pred, check_consistent_length(y_true, y_pred, sample_weight) output_errors = np.average(np.abs(y_pred - y_true), weights=sample_weight, axis=0) - if isinstance(multioutput, string_types): + if isinstance(multioutput, str): if multioutput == 'raw_values': return output_errors elif multioutput == 'uniform_average': @@ -241,7 +241,7 @@ def mean_squared_error(y_true, y_pred, check_consistent_length(y_true, y_pred, sample_weight) output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight) - if isinstance(multioutput, string_types): + if isinstance(multioutput, str): if multioutput == 'raw_values': return output_errors elif multioutput == 'uniform_average': @@ -431,7 +431,7 @@ def explained_variance_score(y_true, y_pred, output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score]) output_scores[nonzero_numerator & ~nonzero_denominator] = 0. - if isinstance(multioutput, string_types): + if isinstance(multioutput, str): if multioutput == 'raw_values': # return scores individually return output_scores @@ -555,7 +555,7 @@ def r2_score(y_true, y_pred, sample_weight=None, # arbitrary set to zero to avoid -inf scores, having a constant # y_true is not interesting for scoring a regression anyway output_scores[nonzero_numerator & ~nonzero_denominator] = 0. - if isinstance(multioutput, string_types): + if isinstance(multioutput, str): if multioutput == 'raw_values': # return scores individually return output_scores diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 3907e2439dfeb..d5dfac2ef65e3 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -224,7 +224,7 @@ def get_scorer(scoring): scorer : callable The scorer. """ - if isinstance(scoring, six.string_types): + if isinstance(scoring, six.str): try: scorer = SCORERS[scoring] except KeyError: @@ -269,7 +269,7 @@ def check_scoring(estimator, scoring=None, allow_none=False): if not hasattr(estimator, 'fit'): raise TypeError("estimator should be an estimator implementing " "'fit' method, %r was passed" % estimator) - if isinstance(scoring, six.string_types): + if isinstance(scoring, six.str): return get_scorer(scoring) elif callable(scoring): # Heuristic to ensure user has not passed a metric @@ -339,7 +339,7 @@ def _check_multimetric_scoring(estimator, scoring=None): False if scorer is None/str/callable """ if callable(scoring) or scoring is None or isinstance(scoring, - six.string_types): + six.str): scorers = {"score": check_scoring(estimator, scoring=scoring)} return scorers, False else: @@ -365,7 +365,7 @@ def _check_multimetric_scoring(estimator, scoring=None): raise ValueError(err_msg + "Duplicate elements were found in" " the given list. %r" % repr(scoring)) elif len(keys) > 0: - if not all(isinstance(k, six.string_types) for k in keys): + if not all(isinstance(k, six.str) for k in keys): if any(callable(k) for k in keys): raise ValueError(err_msg + "One or more of the elements were " @@ -385,7 +385,7 @@ def _check_multimetric_scoring(estimator, scoring=None): elif isinstance(scoring, dict): keys = set(scoring) - if not all(isinstance(k, six.string_types) for k in keys): + if not all(isinstance(k, six.str) for k in keys): raise ValueError("Non-string types were found in the keys of " "the given dict. scoring=%r" % repr(scoring)) if len(keys) == 0: diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index d08d96e31c6da..6c54afb7a9ee8 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -371,7 +371,7 @@ def _check_param_grid(param_grid): if isinstance(v, np.ndarray) and v.ndim > 1: raise ValueError("Parameter array should be one-dimensional.") - if (isinstance(v, six.string_types) or + if (isinstance(v, six.str) or not isinstance(v, (np.ndarray, Sequence))): raise ValueError("Parameter values for parameter ({0}) need " "to be a sequence(but not a string) or" @@ -619,7 +619,7 @@ def fit(self, X, y=None, groups=None, **fit_params): if self.multimetric_: if self.refit is not False and ( - not isinstance(self.refit, six.string_types) or + not isinstance(self.refit, six.str) or # This will work for both dict / list (tuple) self.refit not in scorers): raise ValueError("For multi-metric scoring, the parameter " diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index 089488bdbaad9..cffe18e6f17e8 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -371,7 +371,7 @@ def _handle_deprecations(self, X): # if user specified categorical_features -> always use legacy mode if self.categorical_features is not None: - if (isinstance(self.categorical_features, six.string_types) + if (isinstance(self.categorical_features, six.str) and self.categorical_features == 'all'): warnings.warn( "The 'categorical_features' keyword is deprecated in " @@ -438,7 +438,7 @@ def _legacy_fit_transform(self, X): "be able to use arbitrary integer values as " "category identifiers.") n_samples, n_features = X.shape - if (isinstance(self._n_values, six.string_types) and + if (isinstance(self._n_values, six.str) and self._n_values == 'auto'): n_values = np.max(X, axis=0) + 1 elif isinstance(self._n_values, numbers.Integral): @@ -473,7 +473,7 @@ def _legacy_fit_transform(self, X): shape=(n_samples, indices[-1]), dtype=self.dtype).tocsr() - if (isinstance(self._n_values, six.string_types) and + if (isinstance(self._n_values, six.str) and self._n_values == 'auto'): mask = np.array(out.sum(axis=0)).ravel() != 0 active_features = np.where(mask)[0] @@ -553,7 +553,7 @@ def _legacy_transform(self, X): out = sparse.coo_matrix((data, (row_indices, column_indices)), shape=(n_samples, indices[-1]), dtype=self.dtype).tocsr() - if (isinstance(self._n_values, six.string_types) and + if (isinstance(self._n_values, six.str) and self._n_values == 'auto'): out = out[:, self._active_features_] diff --git a/sklearn/preprocessing/base.py b/sklearn/preprocessing/base.py index 4b0cdbc35e1e5..45e39a2bfcbc5 100644 --- a/sklearn/preprocessing/base.py +++ b/sklearn/preprocessing/base.py @@ -48,7 +48,7 @@ def _transform_selected(X, transform, dtype, selected="all", copy=True, raise ValueError("The retain_order option can only be set to True " "for dense matrices.") - if isinstance(selected, six.string_types) and selected == "all": + if isinstance(selected, six.str) and selected == "all": return transform(X) if len(selected) == 0: diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py index fe127d77302b6..81900da12e1a3 100644 --- a/sklearn/tree/export.py +++ b/sklearn/tree/export.py @@ -281,7 +281,7 @@ def node_to_str(self, tree, node_id, criterion): if self.impurity: if isinstance(criterion, _criterion.FriedmanMSE): criterion = "friedman_mse" - elif not isinstance(criterion, six.string_types): + elif not isinstance(criterion, six.str): criterion = "impurity" if labels: node_string += '%s = ' % criterion @@ -755,7 +755,7 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None, own_file = False return_string = False try: - if isinstance(out_file, six.string_types): + if isinstance(out_file, six.str): if six.PY3: out_file = open(out_file, "w", encoding="utf-8") else: diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index faa83efbb7703..f31858979cf4f 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -219,7 +219,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, min_samples_split = max(min_samples_split, 2 * min_samples_leaf) - if isinstance(self.max_features, six.string_types): + if isinstance(self.max_features, six.str): if self.max_features == "auto": if is_classification: max_features = max(1, int(np.sqrt(self.n_features_))) diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py index cd2a91601cf9b..88afa758f69c9 100644 --- a/sklearn/utils/class_weight.py +++ b/sklearn/utils/class_weight.py @@ -114,12 +114,12 @@ def compute_sample_weight(class_weight, y, indices=None): y = np.reshape(y, (-1, 1)) n_outputs = y.shape[1] - if isinstance(class_weight, six.string_types): + if isinstance(class_weight, six.str): if class_weight not in ['balanced']: raise ValueError('The only valid preset for class_weight is ' '"balanced". Given "%s".' % class_weight) elif (indices is not None and - not isinstance(class_weight, six.string_types)): + not isinstance(class_weight, six.str)): raise ValueError('The only valid class_weight for subsampling is ' '"balanced". Given "%s".' % class_weight) elif n_outputs > 1: diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py index f4d28ec227bab..3c25de040e2a2 100644 --- a/sklearn/utils/multiclass.py +++ b/sklearn/utils/multiclass.py @@ -16,7 +16,7 @@ import numpy as np -from ..externals.six import string_types +from ..externals.six import str from ..utils.fixes import _Sequence as Sequence from .validation import check_array @@ -98,7 +98,7 @@ def unique_labels(*ys): ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys)) # Check that we don't mix string type with number type - if (len(set(isinstance(label, string_types) for label in ys_labels)) > 1): + if (len(set(isinstance(label, str) for label in ys_labels)) > 1): raise ValueError("Mix of label input types (string and number)") return np.array(sorted(ys_labels)) @@ -236,7 +236,7 @@ def type_of_target(y): 'multilabel-indicator' """ valid = ((isinstance(y, (Sequence, spmatrix)) or hasattr(y, '__array__')) - and not isinstance(y, string_types)) + and not isinstance(y, str)) if not valid: raise ValueError('Expected array-like (array or non-string sequence), ' @@ -258,7 +258,7 @@ def type_of_target(y): # The old sequence of sequences format try: if (not hasattr(y[0], '__array__') and isinstance(y[0], Sequence) - and not isinstance(y[0], string_types)): + and not isinstance(y[0], str)): raise ValueError('You appear to be using a legacy multi-label data' ' representation. Sequence of sequences are no' ' longer supported; use a binary array or sparse' @@ -268,7 +268,7 @@ def type_of_target(y): # Invalid inputs if y.ndim > 2 or (y.dtype == object and len(y) and - not isinstance(y.flat[0], string_types)): + not isinstance(y.flat[0], str)): return 'unknown' # [[[1, 2]]] or [obj_1] and not ["label_1"] if y.ndim == 2 and y.shape[1] == 0: diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 3181b925ba83a..cf5588a5506d5 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -205,7 +205,7 @@ def check_memory(memory): If ``memory`` is not joblib.Memory-like. """ - if memory is None or isinstance(memory, six.string_types): + if memory is None or isinstance(memory, six.str): if LooseVersion(joblib_version) < '0.12': memory = Memory(cachedir=memory, verbose=0) else: @@ -308,7 +308,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, changed_format = False - if isinstance(accept_sparse, six.string_types): + if isinstance(accept_sparse, six.str): accept_sparse = [accept_sparse] # Indices dtype validation @@ -467,7 +467,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, array_orig = array # store whether originally we wanted numeric dtype - dtype_numeric = isinstance(dtype, six.string_types) and dtype == "numeric" + dtype_numeric = isinstance(dtype, six.str) and dtype == "numeric" dtype_orig = getattr(array, "dtype", None) if not hasattr(dtype_orig, 'kind'): @@ -501,7 +501,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, '. Got {!r} instead'.format(force_all_finite)) if estimator is not None: - if isinstance(estimator, six.string_types): + if isinstance(estimator, six.str): estimator_name = estimator else: estimator_name = estimator.__class__.__name__ From d0dd1d1eb49ac3b6a81553af38f0c4a8f7bb7bdb Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 20 Nov 2018 18:12:26 -0500 Subject: [PATCH 04/29] remove string_types and six imports --- examples/ensemble/plot_adaboost_multiclass.py | 2 -- sklearn/cluster/mean_shift_.py | 3 +-- sklearn/cluster/tests/test_k_means.py | 2 +- sklearn/cluster/tests/test_spectral.py | 4 ++-- sklearn/compose/_column_transformer.py | 11 +++++------ sklearn/compose/tests/test_column_transformer.py | 2 +- sklearn/cross_decomposition/pls_.py | 1 - sklearn/datasets/kddcup99.py | 1 - sklearn/datasets/openml.py | 2 +- sklearn/datasets/samples_generator.py | 1 - sklearn/datasets/svmlight_format.py | 6 ++---- sklearn/datasets/tests/test_lfw.py | 1 - sklearn/decomposition/base.py | 1 - sklearn/decomposition/factor_analysis.py | 1 - sklearn/decomposition/fastica_.py | 5 ++--- sklearn/decomposition/pca.py | 4 +--- sklearn/decomposition/tests/test_online_lda.py | 1 - sklearn/discriminant_analysis.py | 1 - sklearn/ensemble/forest.py | 3 +-- sklearn/ensemble/gradient_boosting.py | 7 +++---- sklearn/ensemble/iforest.py | 3 +-- sklearn/ensemble/partial_dependence.py | 6 ++---- sklearn/ensemble/tests/test_gradient_boosting.py | 5 +++-- sklearn/ensemble/weight_boosting.py | 4 +--- sklearn/feature_extraction/dict_vectorizer.py | 8 +++----- sklearn/feature_extraction/text.py | 12 +++++------- sklearn/feature_selection/base.py | 1 - sklearn/feature_selection/from_model.py | 3 +-- sklearn/feature_selection/tests/test_rfe.py | 2 +- sklearn/gaussian_process/kernels.py | 5 ++--- sklearn/impute.py | 6 +----- sklearn/linear_model/base.py | 3 +-- sklearn/linear_model/coordinate_descent.py | 4 +--- sklearn/linear_model/least_angle.py | 2 -- sklearn/linear_model/logistic.py | 5 ++--- sklearn/linear_model/ridge.py | 1 - sklearn/linear_model/stochastic_gradient.py | 1 - sklearn/linear_model/tests/test_least_angle.py | 2 +- sklearn/linear_model/theil_sen.py | 1 - sklearn/manifold/spectral_embedding_.py | 3 +-- sklearn/manifold/tests/test_t_sne.py | 2 +- sklearn/metrics/scorer.py | 11 +++++------ sklearn/mixture/base.py | 1 - sklearn/mixture/tests/test_gaussian_mixture.py | 2 +- sklearn/model_selection/_search.py | 5 ++--- sklearn/model_selection/tests/test_search.py | 3 +-- sklearn/model_selection/tests/test_split.py | 3 --- sklearn/model_selection/tests/test_validation.py | 2 +- sklearn/multioutput.py | 1 - sklearn/naive_bayes.py | 1 - sklearn/neighbors/base.py | 1 - sklearn/neural_network/multilayer_perceptron.py | 1 - sklearn/neural_network/rbm.py | 1 - sklearn/neural_network/tests/test_mlp.py | 2 +- sklearn/neural_network/tests/test_rbm.py | 2 +- sklearn/pipeline.py | 1 - sklearn/preprocessing/_encoders.py | 9 ++++----- sklearn/preprocessing/base.py | 3 +-- sklearn/preprocessing/data.py | 1 - sklearn/preprocessing/imputation.py | 4 ---- sklearn/preprocessing/label.py | 4 ---- sklearn/preprocessing/tests/test_discretization.py | 1 - sklearn/random_projection.py | 3 +-- sklearn/semi_supervised/label_propagation.py | 1 - sklearn/svm/base.py | 1 - sklearn/tree/export.py | 5 ++--- sklearn/tree/tree.py | 3 +-- sklearn/utils/class_weight.py | 5 ++--- sklearn/utils/extmath.py | 1 - sklearn/utils/metaestimators.py | 1 - sklearn/utils/tests/test_estimator_checks.py | 2 +- sklearn/utils/tests/test_fast_dict.py | 1 - sklearn/utils/tests/test_multiclass.py | 1 - sklearn/utils/validation.py | 9 ++++----- 74 files changed, 75 insertions(+), 155 deletions(-) diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index 906df85ccf645..941a2ab731769 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -29,8 +29,6 @@ # # License: BSD 3 clause -from sklearn.externals.six.moves import zip - import matplotlib.pyplot as plt from sklearn.datasets import make_gaussian_quantiles diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py index b6fb0f8f09bd7..89117164a63f8 100644 --- a/sklearn/cluster/mean_shift_.py +++ b/sklearn/cluster/mean_shift_.py @@ -18,7 +18,6 @@ import warnings from collections import defaultdict -from ..externals import six from ..utils.validation import check_is_fitted from ..utils import check_random_state, gen_batches, check_array from ..base import BaseEstimator, ClusterMixin @@ -285,7 +284,7 @@ def get_bin_seeds(X, bin_size, min_bin_freq=1): bin_sizes[tuple(binned_point)] += 1 # Select only those bins as seeds which have enough members - bin_seeds = np.array([point for point, freq in six.iteritems(bin_sizes) if + bin_seeds = np.array([point for point, freq in bin_sizes.items()) if freq >= min_bin_freq], dtype=np.float32) if len(bin_seeds) == len(X): warnings.warn("Binning data failed with provided bin_size=%f," diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index cec0fa2897546..0386665610915 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -30,7 +30,7 @@ from sklearn.cluster.k_means_ import _labels_inertia from sklearn.cluster.k_means_ import _mini_batch_step from sklearn.datasets.samples_generator import make_blobs -from sklearn.externals.six.moves import cStringIO as StringIO +from io import StringIO from sklearn.metrics.cluster import homogeneity_score diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py index 0c220e7615e67..4f14d3f72c0ae 100644 --- a/sklearn/cluster/tests/test_spectral.py +++ b/sklearn/cluster/tests/test_spectral.py @@ -6,7 +6,7 @@ import pytest -from sklearn.externals.six.moves import cPickle +import pickle from sklearn.utils import check_random_state from sklearn.utils.testing import assert_equal @@ -52,7 +52,7 @@ def test_spectral_clustering(eigen_solver, assign_labels): assert adjusted_rand_score(labels, [1, 1, 1, 0, 0, 0, 0]) == 1 - model_copy = cPickle.loads(cPickle.dumps(model)) + model_copy = pickle.loads(pickle.dumps(model)) assert model_copy.n_clusters == model.n_clusters assert model_copy.eigen_solver == model.eigen_solver assert_array_equal(model_copy.labels_, model.labels_) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 29b11a5a445a7..6ebbb8701658a 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -15,7 +15,6 @@ from ..base import clone, TransformerMixin from ..utils._joblib import Parallel, delayed -from ..externals import six from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer from ..utils import Bunch @@ -545,7 +544,7 @@ def _check_key_type(key, superclass): ---------- key : scalar, list, slice, array-like The column specification to check - superclass : int or six.str + superclass : int or str The type for which to check the `key` """ @@ -560,7 +559,7 @@ def _check_key_type(key, superclass): if superclass is int: return key.dtype.kind == 'i' else: - # superclass = six.str + # superclass = str return key.dtype.kind in ('O', 'U', 'S') return False @@ -589,7 +588,7 @@ def _get_column(X, key): # check whether we have string column names or integers if _check_key_type(key, int): column_names = False - elif _check_key_type(key, six.str): + elif _check_key_type(key, str): column_names = True elif hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_): # boolean mask @@ -635,13 +634,13 @@ def _get_column_indices(X, key): else: return list(key) - elif _check_key_type(key, six.str): + elif _check_key_type(key, str): try: all_columns = list(X.columns) except AttributeError: raise ValueError("Specifying the columns using strings is only " "supported for pandas DataFrames") - if isinstance(key, six.str): + if isinstance(key, str): columns = [key] elif isinstance(key, slice): start, stop = key.start, key.stop diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index fce40c11422a6..e39b6de4d0859 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -785,7 +785,7 @@ def test_column_transformer_remainder_numpy(key): def test_column_transformer_remainder_pandas(key): # test different ways that columns are specified with passthrough pd = pytest.importorskip('pandas') - if isinstance(key, six.str) and key == 'pd-index': + if isinstance(key, str) and key == 'pd-index': key = pd.Index(['first']) X_array = np.array([[0, 1, 2], [2, 4, 6]]).T diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py index df7cb22b895f7..dcc614677f5f7 100644 --- a/sklearn/cross_decomposition/pls_.py +++ b/sklearn/cross_decomposition/pls_.py @@ -17,7 +17,6 @@ from ..utils.extmath import svd_flip from ..utils.validation import check_is_fitted, FLOAT_DTYPES from ..exceptions import ConvergenceWarning -from ..externals import six __all__ = ['PLSCanonical', 'PLSRegression', 'PLSSVD'] diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index 4fac89d7a65df..e460503474a9f 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -21,7 +21,6 @@ from .base import _fetch_remote from .base import get_data_home from .base import RemoteFileMetadata -from ..externals import six from ..utils import Bunch from ..utils import _joblib from ..utils import check_random_state diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py index d1d240cad8301..84385a6bb2700 100644 --- a/sklearn/datasets/openml.py +++ b/sklearn/datasets/openml.py @@ -574,7 +574,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None, target_column = [] elif not isinstance(target_column, list): raise TypeError("Did not recognize type of target_column" - "Should be six.string_type, list or None. Got: " + "Should be string_type, list or None. Got: " "{}".format(type(target_column))) data_columns = _valid_data_column_names(features_list, target_column) diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index 17a30b1ec9f37..b8ad97628cbe7 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -17,7 +17,6 @@ from ..utils import shuffle as util_shuffle from ..utils.fixes import _Iterable as Iterable from ..utils.random import sample_without_replacement -from ..externals import six map = six.moves.map zip = six.moves.zip diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py index 81c0d7c1ac1c5..f5e2edfe53354 100644 --- a/sklearn/datasets/svmlight_format.py +++ b/sklearn/datasets/svmlight_format.py @@ -23,9 +23,7 @@ import scipy.sparse as sp from .. import __version__ -from ..externals import six -from ..externals.six import u, b -from ..externals.six.moves import range, zip + from ..utils import check_array, IS_PYPY if not IS_PYPY: @@ -159,7 +157,7 @@ def get_data(): def _gen_open(f): if isinstance(f, int): # file descriptor return io.open(f, "rb", closefd=False) - elif not isinstance(f, six.str): + elif not isinstance(f, str): raise TypeError("expected {str, int, file-like}, got %s" % type(f)) _, ext = os.path.splitext(f) diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index be78480b6ce50..422969881fe86 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -14,7 +14,6 @@ import tempfile import numpy as np from functools import partial -from sklearn.externals import six from sklearn.externals._pilutil import pillow_installed, imsave from sklearn.datasets import fetch_lfw_pairs from sklearn.datasets import fetch_lfw_people diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py index cc647e2658374..faebbd0c74ac6 100644 --- a/sklearn/decomposition/base.py +++ b/sklearn/decomposition/base.py @@ -14,7 +14,6 @@ from ..base import BaseEstimator, TransformerMixin from ..utils import check_array from ..utils.validation import check_is_fitted -from ..externals import six from abc import ABCMeta, abstractmethod diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py index 5ede30bca42bc..f5b1834643c5d 100644 --- a/sklearn/decomposition/factor_analysis.py +++ b/sklearn/decomposition/factor_analysis.py @@ -26,7 +26,6 @@ from ..base import BaseEstimator, TransformerMixin -from ..externals.six.moves import xrange from ..utils import check_array, check_random_state from ..utils.extmath import fast_logdet, randomized_svd, squared_norm from ..utils.validation import check_is_fitted diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py index c7e5fbe6fd685..5995357c4f4a9 100644 --- a/sklearn/decomposition/fastica_.py +++ b/sklearn/decomposition/fastica_.py @@ -16,8 +16,7 @@ from ..base import BaseEstimator, TransformerMixin from ..exceptions import ConvergenceWarning -from ..externals import six -from ..externals.six import moves + from ..utils import check_array, as_float_array, check_random_state from ..utils.validation import check_is_fitted from ..utils.validation import FLOAT_DTYPES @@ -286,7 +285,7 @@ def my_g(x): def g(x, fun_args): return fun(x, **fun_args) else: - exc = ValueError if isinstance(fun, six.str) else TypeError + exc = ValueError if isinstance(fun, str) else TypeError raise exc("Unknown function %r;" " should be one of 'logcosh', 'exp', 'cube' or callable" % fun) diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py index eff2a6653401c..887c508ead78c 100644 --- a/sklearn/decomposition/pca.py +++ b/sklearn/decomposition/pca.py @@ -19,8 +19,6 @@ from scipy.sparse import issparse from scipy.sparse.linalg import svds -from ..externals import six - from .base import _BasePCA from ..utils import check_random_state from ..utils import check_array @@ -479,7 +477,7 @@ def _fit_truncated(self, X, n_components, svd_solver): """ n_samples, n_features = X.shape - if isinstance(n_components, six.str): + if isinstance(n_components, str): raise ValueError("n_components=%r cannot be a string " "with svd_solver='%s'" % (n_components, svd_solver)) diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py index 63346f3dbddbd..b7d95eeb6d899 100644 --- a/sklearn/decomposition/tests/test_online_lda.py +++ b/sklearn/decomposition/tests/test_online_lda.py @@ -20,7 +20,6 @@ from sklearn.utils.testing import if_safe_multiprocessing_with_blas from sklearn.exceptions import NotFittedError -from sklearn.externals.six.moves import xrange from sklearn.externals.six import StringIO diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 7f8d9808cc0e2..184f0b9b530a1 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -14,7 +14,6 @@ import numpy as np from scipy import linalg from .externals.six import str -from .externals.six.moves import xrange from .base import BaseEstimator, TransformerMixin, ClassifierMixin from .linear_model.base import LinearClassifierMixin diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index db793a494e594..a5f539a8653a9 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -51,7 +51,6 @@ class calls the ``fit`` method of each sub-estimator on random samples from ..base import ClassifierMixin, RegressorMixin from ..utils._joblib import Parallel, delayed -from ..externals import six from ..metrics import r2_score from ..preprocessing import OneHotEncoder from ..tree import (DecisionTreeClassifier, DecisionTreeRegressor, @@ -493,7 +492,7 @@ def _validate_y_class_weight(self, y): if self.class_weight is not None: valid_presets = ('balanced', 'balanced_subsample') - if isinstance(self.class_weight, six.str): + if isinstance(self.class_weight, str): if self.class_weight not in valid_presets: raise ValueError('Valid presets for class_weight include ' '"balanced" and "balanced_subsample". Given "%s".' diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index 9dcdcfa934881..04d8dab7570a8 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -29,7 +29,6 @@ from .base import BaseEnsemble from ..base import ClassifierMixin from ..base import RegressorMixin -from ..externals import six from ._gradient_boosting import predict_stages from ._gradient_boosting import predict_stage @@ -1234,7 +1233,7 @@ def _check_params(self): "was %r" % self.subsample) if self.init is not None: - if isinstance(self.init, six.str): + if isinstance(self.init, str): if self.init not in INIT_ESTIMATORS: raise ValueError('init="%s" is not supported' % self.init) else: @@ -1248,7 +1247,7 @@ def _check_params(self): raise ValueError("alpha must be in (0.0, 1.0) but " "was %r" % self.alpha) - if isinstance(self.max_features, six.str): + if isinstance(self.max_features, str): if self.max_features == "auto": # if is_classification if self.n_classes_ > 1: @@ -1293,7 +1292,7 @@ def _init_state(self): if self.init is None: self.init_ = self.loss_.init_estimator() - elif isinstance(self.init, six.str): + elif isinstance(self.init, str): self.init_ = INIT_ESTIMATORS[self.init]() else: self.init_ = self.init diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index 61000b05e79dc..9ca7af6ab9eaf 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -11,7 +11,6 @@ from scipy.sparse import issparse import numbers -from ..externals import six from ..tree import ExtraTreeRegressor from ..utils import check_random_state, check_array from ..utils.fixes import _joblib_parallel_args @@ -242,7 +241,7 @@ def fit(self, X, y=None, sample_weight=None): # ensure that max_sample is in [1, n_samples]: n_samples = X.shape[0] - if isinstance(self.max_samples, six.str): + if isinstance(self.max_samples, str): if self.max_samples == 'auto': max_samples = min(256, n_samples) else: diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py index b9ec635278ce4..74a6497e42191 100644 --- a/sklearn/ensemble/partial_dependence.py +++ b/sklearn/ensemble/partial_dependence.py @@ -11,8 +11,6 @@ from ..utils.extmath import cartesian from ..utils._joblib import Parallel, delayed -from ..externals import six -from ..externals.six.moves import map, range, zip from ..utils import check_array from ..utils.validation import check_is_fitted from ..tree._tree import DTYPE @@ -275,7 +273,7 @@ def plot_partial_dependence(gbrt, X, features, feature_names=None, feature_names = feature_names.tolist() def convert_feature(fx): - if isinstance(fx, six.str): + if isinstance(fx, str): try: fx = feature_names.index(fx) except ValueError: @@ -285,7 +283,7 @@ def convert_feature(fx): # convert features into a seq of int tuples tmp_features = [] for fxs in features: - if isinstance(fxs, (numbers.Integral,) + six.str): + if isinstance(fxs, (numbers.Integral,) + str): fxs = (fxs,) try: fxs = np.array([convert_feature(fx) for fx in fxs], dtype=np.int32) diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index dce50755ed674..ad29041e3d41e 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -738,7 +738,8 @@ def test_oob_multilcass_iris(): def test_verbose_output(): # Check verbose=1 does not cause error. - from sklearn.externals.six.moves import cStringIO as StringIO + from io import StringIO + import sys old_stdout = sys.stdout sys.stdout = StringIO() @@ -763,7 +764,7 @@ def test_verbose_output(): def test_more_verbose_output(): # Check verbose=2 does not cause error. - from sklearn.externals.six.moves import cStringIO as StringIO + from io import StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py index 80764fbf11eea..c55e50d5aed30 100644 --- a/sklearn/ensemble/weight_boosting.py +++ b/sklearn/ensemble/weight_boosting.py @@ -29,9 +29,7 @@ from .base import BaseEnsemble from ..base import ClassifierMixin, RegressorMixin, is_regressor, is_classifier -from ..externals import six -from ..externals.six.moves import zip -from ..externals.six.moves import xrange as range + from .forest import BaseForest from ..tree import DecisionTreeClassifier, DecisionTreeRegressor from ..tree.tree import BaseDecisionTree diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py index 6393914f76e7a..9636580af18ee 100644 --- a/sklearn/feature_extraction/dict_vectorizer.py +++ b/sklearn/feature_extraction/dict_vectorizer.py @@ -9,8 +9,6 @@ import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin -from ..externals import six -from ..externals.six.moves import xrange from ..utils import check_array, tosequence from ..utils.fixes import _Mapping as Mapping @@ -119,7 +117,7 @@ def fit(self, X, y=None): for x in X: for f, v in six.iteritems(x): - if isinstance(v, six.str): + if isinstance(v, str): f = "%s%s%s" % (f, self.separator, v) if f not in vocab: feature_names.append(f) @@ -165,7 +163,7 @@ def _transform(self, X, fitting): # same time for x in X: for f, v in six.iteritems(x): - if isinstance(v, six.str): + if isinstance(v, str): f = "%s%s%s" % (f, self.separator, v) v = 1 if f in vocab: @@ -299,7 +297,7 @@ def transform(self, X): for i, x in enumerate(X): for f, v in six.iteritems(x): - if isinstance(v, six.str): + if isinstance(v, str): f = "%s%s%s" % (f, self.separator, v) v = 1 try: diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index d5a66f6e1462b..32b8a879cb70e 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -25,8 +25,6 @@ import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin -from ..externals import six -from ..externals.six.moves import xrange from ..preprocessing import normalize from .hashing import FeatureHasher from .stop_words import ENGLISH_STOP_WORDS @@ -107,7 +105,7 @@ def strip_tags(s): def _check_stop_list(stop): if stop == "english": return ENGLISH_STOP_WORDS - elif isinstance(stop, six.str): + elif isinstance(stop, str): raise ValueError("not a built-in stop list: %s" % stop) elif stop is None: return None @@ -588,7 +586,7 @@ def fit(self, X, y=None): Training data. """ # triggers a parameter validation - if isinstance(X, six.str): + if isinstance(X, str): raise ValueError( "Iterable over raw text documents expected, " "string object received.") @@ -613,7 +611,7 @@ def transform(self, X): X : scipy.sparse matrix, shape = (n_samples, self.n_features) Document-term matrix. """ - if isinstance(X, six.str): + if isinstance(X, str): raise ValueError( "Iterable over raw text documents expected, " "string object received.") @@ -1018,7 +1016,7 @@ def fit_transform(self, raw_documents, y=None): # We intentionally don't call the transform method to make # fit_transform overridable without unwanted side effects in # TfidfVectorizer. - if isinstance(raw_documents, six.str): + if isinstance(raw_documents, str): raise ValueError( "Iterable over raw text documents expected, " "string object received.") @@ -1073,7 +1071,7 @@ def transform(self, raw_documents): X : sparse matrix, [n_samples, n_features] Document-term matrix. """ - if isinstance(raw_documents, six.str): + if isinstance(raw_documents, str): raise ValueError( "Iterable over raw text documents expected, " "string object received.") diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py index 5bb0b3ea890c3..441b4f45a80fd 100644 --- a/sklearn/feature_selection/base.py +++ b/sklearn/feature_selection/base.py @@ -12,7 +12,6 @@ from ..base import TransformerMixin from ..utils import check_array, safe_mask -from ..externals import six class SelectorMixin(six.with_metaclass(ABCMeta, TransformerMixin)): diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py index 38455d1f9224c..fb26f9d685688 100644 --- a/sklearn/feature_selection/from_model.py +++ b/sklearn/feature_selection/from_model.py @@ -6,7 +6,6 @@ from .base import SelectorMixin from ..base import BaseEstimator, clone, MetaEstimatorMixin -from ..externals import six from ..exceptions import NotFittedError from ..utils.metaestimators import if_delegate_has_method @@ -48,7 +47,7 @@ def _calculate_threshold(estimator, importances, threshold): else: threshold = "mean" - if isinstance(threshold, six.str): + if isinstance(threshold, str): if "*" in threshold: scale, reference = threshold.split("*") scale = float(scale.strip()) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 30307fa28902b..c7d360f728b1a 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -214,7 +214,7 @@ def test_rfecv_mockclassifier(): def test_rfecv_verbose_output(): # Check verbose=1 is producing an output. - from sklearn.externals.six.moves import cStringIO as StringIO + from io import StringIO import sys sys.stdout = StringIO() diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index b48113c4a75ec..e5a14c5e1db17 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -28,7 +28,6 @@ from scipy.spatial.distance import pdist, cdist, squareform from ..metrics.pairwise import pairwise_kernels -from ..externals import six from ..base import clone from ..utils.fixes import signature @@ -91,7 +90,7 @@ class Hyperparameter(namedtuple('Hyperparameter', __slots__ = () def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None): - if not isinstance(bounds, six.str) or bounds != "fixed": + if not isinstance(bounds, str) or bounds != "fixed": bounds = np.atleast_2d(bounds) if n_elements > 1: # vector-valued parameter if bounds.shape[0] == 1: @@ -102,7 +101,7 @@ def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None): % (name, n_elements, bounds.shape[0])) if fixed is None: - fixed = isinstance(bounds, six.str) and bounds == "fixed" + fixed = isinstance(bounds, str) and bounds == "fixed" return super(Hyperparameter, cls).__new__( cls, name, value_type, bounds, n_elements, fixed) diff --git a/sklearn/impute.py b/sklearn/impute.py index aa5fa90030f17..6088d4d1853eb 100644 --- a/sklearn/impute.py +++ b/sklearn/impute.py @@ -19,10 +19,6 @@ from .utils.fixes import _object_dtype_isnan from .utils import is_scalar_nan -from .externals import six - -zip = six.moves.zip -map = six.moves.map __all__ = [ 'MissingIndicator', @@ -565,7 +561,7 @@ def fit(self, X, y=None): raise ValueError("'features' has to be either 'missing-only' or " "'all'. Got {} instead.".format(self.features)) - if not ((isinstance(self.sparse, six.str) and + if not ((isinstance(self.sparse, str) and self.sparse == "auto") or isinstance(self.sparse, bool)): raise ValueError("'sparse' has to be a boolean or 'auto'. " "Got {!r} instead.".format(self.sparse)) diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index 539473b57cf4b..d931c55f7e63a 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -23,7 +23,6 @@ from scipy import linalg from scipy import sparse -from ..externals import six from ..utils._joblib import Parallel, delayed from ..base import BaseEstimator, ClassifierMixin, RegressorMixin from ..utils import check_array, check_X_y @@ -519,7 +518,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, Xy = None # precompute if n_samples > n_features - if isinstance(precompute, six.str) and precompute == 'auto': + if isinstance(precompute, str) and precompute == 'auto': precompute = (n_samples > n_features) if precompute is True: diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index d70f85a6d3caf..c51d3c577f4e9 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -19,8 +19,6 @@ from ..utils.validation import check_random_state from ..model_selection import check_cv from ..utils._joblib import Parallel, delayed, effective_n_jobs -from ..externals import six -from ..externals.six.moves import xrange from ..utils.extmath import safe_sparse_dot from ..utils.fixes import _joblib_parallel_args from ..utils.validation import check_is_fitted @@ -698,7 +696,7 @@ def fit(self, X, y, check_input=True): "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) - if isinstance(self.precompute, six.str): + if isinstance(self.precompute, str): raise ValueError('precompute should be one of True, False or' ' array-like. Got %r' % self.precompute) diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index 3c5b192fd709c..2079fe3d11379 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -24,8 +24,6 @@ from ..model_selection import check_cv from ..exceptions import ConvergenceWarning from ..utils._joblib import Parallel, delayed -from ..externals.six.moves import xrange -from ..externals.six import str solve_triangular_args = {'check_finite': False} diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index e1cfbc5cc246f..ec44613670ccd 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -35,7 +35,6 @@ from ..utils._joblib import Parallel, delayed, effective_n_jobs from ..utils.fixes import _joblib_parallel_args from ..model_selection import check_cv -from ..externals import six from ..metrics import get_scorer @@ -976,7 +975,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10, scores = list() - if isinstance(scoring, six.str): + if isinstance(scoring, str): scoring = get_scorer(scoring) for w in coefs: if multi_class == 'ovr': @@ -1919,7 +1918,7 @@ def score(self, X, y, sample_weight=None): "This warning will disappear in version 0.22.", ChangedBehaviorWarning) scoring = self.scoring or 'accuracy' - if isinstance(scoring, six.str): + if isinstance(scoring, str): scoring = get_scorer(scoring) return scoring(self, X, y, sample_weight=sample_weight) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 5d53f2f200132..36402d340a30d 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -29,7 +29,6 @@ from ..utils import column_or_1d from ..preprocessing import LabelBinarizer from ..model_selection import GridSearchCV -from ..externals import six from ..metrics.scorer import check_scoring from ..exceptions import ConvergenceWarning diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 5eb4ee2c05d26..1ee974c0389ad 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -20,7 +20,6 @@ from ..utils.multiclass import _check_partial_fit_first_call from ..utils.validation import check_is_fitted from ..exceptions import ConvergenceWarning -from ..externals import six from ..model_selection import StratifiedShuffleSplit, ShuffleSplit from .sgd_fast import plain_sgd, average_sgd diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index f1b3a0c2de298..1397b1c0206eb 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -31,7 +31,7 @@ def test_simple(): # Principle of Lars is to keep covariances tied and decreasing # also test verbose output - from sklearn.externals.six.moves import cStringIO as StringIO + from io import StringIO import sys old_stdout = sys.stdout try: diff --git a/sklearn/linear_model/theil_sen.py b/sklearn/linear_model/theil_sen.py index 297614cd9a7f2..863a5d54672be 100644 --- a/sklearn/linear_model/theil_sen.py +++ b/sklearn/linear_model/theil_sen.py @@ -22,7 +22,6 @@ from ..utils import check_random_state from ..utils import check_X_y from ..utils._joblib import Parallel, delayed, effective_n_jobs -from ..externals.six.moves import xrange as range from ..exceptions import ConvergenceWarning _EPSILON = np.finfo(np.double).eps diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py index 442b79c6a399e..2eb2cac6e80ec 100644 --- a/sklearn/manifold/spectral_embedding_.py +++ b/sklearn/manifold/spectral_embedding_.py @@ -16,7 +16,6 @@ from scipy.sparse.csgraph import laplacian as csgraph_laplacian from ..base import BaseEstimator -from ..externals import six from ..utils import check_random_state, check_array, check_symmetric from ..utils.extmath import _deterministic_vector_sign_flip from ..metrics.pairwise import rbf_kernel @@ -510,7 +509,7 @@ def fit(self, X, y=None): X = check_array(X, ensure_min_samples=2, estimator=self) random_state = check_random_state(self.random_state) - if isinstance(self.affinity, six.str): + if isinstance(self.affinity, str): if self.affinity not in set(("nearest_neighbors", "rbf", "precomputed")): raise ValueError(("%s is not a valid affinity. Expected " diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index cc692ae0d0cd0..c28481aa14187 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -1,5 +1,5 @@ import sys -from sklearn.externals.six.moves import cStringIO as StringIO +from io import StringIO import numpy as np import scipy.sparse as sp diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index d5dfac2ef65e3..89bf9a9ba8955 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -40,7 +40,6 @@ from ..utils.multiclass import type_of_target from ..utils.fixes import _Iterable as Iterable -from ..externals import six from ..base import is_regressor @@ -224,7 +223,7 @@ def get_scorer(scoring): scorer : callable The scorer. """ - if isinstance(scoring, six.str): + if isinstance(scoring, str): try: scorer = SCORERS[scoring] except KeyError: @@ -269,7 +268,7 @@ def check_scoring(estimator, scoring=None, allow_none=False): if not hasattr(estimator, 'fit'): raise TypeError("estimator should be an estimator implementing " "'fit' method, %r was passed" % estimator) - if isinstance(scoring, six.str): + if isinstance(scoring, str): return get_scorer(scoring) elif callable(scoring): # Heuristic to ensure user has not passed a metric @@ -339,7 +338,7 @@ def _check_multimetric_scoring(estimator, scoring=None): False if scorer is None/str/callable """ if callable(scoring) or scoring is None or isinstance(scoring, - six.str): + str): scorers = {"score": check_scoring(estimator, scoring=scoring)} return scorers, False else: @@ -365,7 +364,7 @@ def _check_multimetric_scoring(estimator, scoring=None): raise ValueError(err_msg + "Duplicate elements were found in" " the given list. %r" % repr(scoring)) elif len(keys) > 0: - if not all(isinstance(k, six.str) for k in keys): + if not all(isinstance(k, str) for k in keys): if any(callable(k) for k in keys): raise ValueError(err_msg + "One or more of the elements were " @@ -385,7 +384,7 @@ def _check_multimetric_scoring(estimator, scoring=None): elif isinstance(scoring, dict): keys = set(scoring) - if not all(isinstance(k, six.str) for k in keys): + if not all(isinstance(k, str) for k in keys): raise ValueError("Non-string types were found in the keys of " "the given dict. scoring=%r" % repr(scoring)) if len(keys) == 0: diff --git a/sklearn/mixture/base.py b/sklearn/mixture/base.py index 362a0baaa8175..bfdc75a0547e2 100644 --- a/sklearn/mixture/base.py +++ b/sklearn/mixture/base.py @@ -15,7 +15,6 @@ from .. import cluster from ..base import BaseEstimator from ..base import DensityMixin -from ..externals import six from ..exceptions import ConvergenceWarning from ..utils import check_array, check_random_state from ..utils.fixes import logsumexp diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 6cc2ffabc0e25..66d34f89d4be7 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -12,7 +12,7 @@ from sklearn.covariance import EmpiricalCovariance from sklearn.datasets.samples_generator import make_spd_matrix -from sklearn.externals.six.moves import cStringIO as StringIO +from io import StringIO from sklearn.metrics.cluster import adjusted_rand_score from sklearn.mixture.gaussian_mixture import GaussianMixture from sklearn.mixture.gaussian_mixture import ( diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 6c54afb7a9ee8..b0ca2eda863e1 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -30,7 +30,6 @@ from ._validation import _aggregate_score_dicts from ..exceptions import NotFittedError from ..utils._joblib import Parallel, delayed -from ..externals import six from ..utils import check_random_state from ..utils.fixes import sp_version from ..utils.fixes import MaskedArray @@ -371,7 +370,7 @@ def _check_param_grid(param_grid): if isinstance(v, np.ndarray) and v.ndim > 1: raise ValueError("Parameter array should be one-dimensional.") - if (isinstance(v, six.str) or + if (isinstance(v, str) or not isinstance(v, (np.ndarray, Sequence))): raise ValueError("Parameter values for parameter ({0}) need " "to be a sequence(but not a string) or" @@ -619,7 +618,7 @@ def fit(self, X, y=None, groups=None, **fit_params): if self.multimetric_: if self.refit is not False and ( - not isinstance(self.refit, six.str) or + not isinstance(self.refit, str) or # This will work for both dict / list (tuple) self.refit not in scorers): raise ValueError("For multi-metric scoring, the parameter " diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index b0dcc11c9003a..02c7616e5dace 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -1,7 +1,6 @@ """Test the search module""" -from sklearn.externals.six.moves import cStringIO as StringIO -from sklearn.externals.six.moves import xrange +from io import StringIO from itertools import chain, product import pickle import sys diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index b7cba19688543..ab05e01f71351 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -56,9 +56,6 @@ from sklearn.datasets import load_digits from sklearn.datasets import make_classification -from sklearn.externals import six -from sklearn.externals.six.moves import zip - from sklearn.utils.fixes import comb from sklearn.svm import SVC diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index a7352972173fc..13ee749fd88c0 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -69,7 +69,7 @@ from sklearn.preprocessing import LabelEncoder from sklearn.pipeline import Pipeline -from sklearn.externals.six.moves import cStringIO as StringIO +from io import StringIO from sklearn.base import BaseEstimator from sklearn.base import clone from sklearn.multiclass import OneVsRestClassifier diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index cddbcbfff2da5..deafc0e7302af 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -26,7 +26,6 @@ from .utils.validation import check_is_fitted, has_fit_parameter from .utils.multiclass import check_classification_targets from .utils._joblib import Parallel, delayed -from .externals import six __all__ = ["MultiOutputRegressor", "MultiOutputClassifier", "ClassifierChain", "RegressorChain"] diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index dced4fbdb3dd2..a0ac6a3105508 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -31,7 +31,6 @@ from .utils.fixes import logsumexp from .utils.multiclass import _check_partial_fit_first_call from .utils.validation import check_is_fitted -from .externals import six __all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB', 'ComplementNB'] diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py index a426335ae88dd..e0e7af71c79c0 100644 --- a/sklearn/neighbors/base.py +++ b/sklearn/neighbors/base.py @@ -24,7 +24,6 @@ from ..utils import check_X_y, check_array, gen_even_slices from ..utils.multiclass import check_classification_targets from ..utils.validation import check_is_fitted -from ..externals import six from ..exceptions import DataConversionWarning from ..utils._joblib import Parallel, delayed, effective_n_jobs from ..utils._joblib import __version__ as joblib_version diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index a8fcc8e11cdb9..e1cb5e9446450 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -17,7 +17,6 @@ from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer from ..model_selection import train_test_split -from ..externals import six from ..preprocessing import LabelBinarizer from ..utils import gen_batches, check_random_state from ..utils import shuffle diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py index cbf4906782d7c..5edee53984626 100644 --- a/sklearn/neural_network/rbm.py +++ b/sklearn/neural_network/rbm.py @@ -15,7 +15,6 @@ from ..base import BaseEstimator from ..base import TransformerMixin -from ..externals.six.moves import xrange from ..utils import check_array from ..utils import check_random_state from ..utils import gen_even_slices diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py index b0d5ab587a087..9f809823a8c08 100644 --- a/sklearn/neural_network/tests/test_mlp.py +++ b/sklearn/neural_network/tests/test_mlp.py @@ -15,7 +15,7 @@ from sklearn.datasets import load_digits, load_boston, load_iris from sklearn.datasets import make_regression, make_multilabel_classification from sklearn.exceptions import ConvergenceWarning -from sklearn.externals.six.moves import cStringIO as StringIO +from io import StringIO from sklearn.metrics import roc_auc_score from sklearn.neural_network import MLPClassifier from sklearn.neural_network import MLPRegressor diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py index 6298a085786db..e97b0c8f5e7e5 100644 --- a/sklearn/neural_network/tests/test_rbm.py +++ b/sklearn/neural_network/tests/test_rbm.py @@ -7,7 +7,7 @@ assert_true) from sklearn.datasets import load_digits -from sklearn.externals.six.moves import cStringIO as StringIO +from io import StringIO from sklearn.neural_network import BernoulliRBM from sklearn.utils.validation import assert_all_finite np.seterr(all='warn') diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index ef4470d91b2ae..c509597941d05 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -17,7 +17,6 @@ from .base import clone, TransformerMixin from .utils._joblib import Parallel, delayed -from .externals import six from .utils.metaestimators import if_delegate_has_method from .utils import Bunch from .utils.validation import check_memory diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index cffe18e6f17e8..7dfd168c146bb 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -12,7 +12,6 @@ from .. import get_config as _get_config from ..base import BaseEstimator, TransformerMixin -from ..externals import six from ..utils import check_array from ..utils import deprecated from ..utils.fixes import _argmax, _object_dtype_isnan @@ -371,7 +370,7 @@ def _handle_deprecations(self, X): # if user specified categorical_features -> always use legacy mode if self.categorical_features is not None: - if (isinstance(self.categorical_features, six.str) + if (isinstance(self.categorical_features, str) and self.categorical_features == 'all'): warnings.warn( "The 'categorical_features' keyword is deprecated in " @@ -438,7 +437,7 @@ def _legacy_fit_transform(self, X): "be able to use arbitrary integer values as " "category identifiers.") n_samples, n_features = X.shape - if (isinstance(self._n_values, six.str) and + if (isinstance(self._n_values, str) and self._n_values == 'auto'): n_values = np.max(X, axis=0) + 1 elif isinstance(self._n_values, numbers.Integral): @@ -473,7 +472,7 @@ def _legacy_fit_transform(self, X): shape=(n_samples, indices[-1]), dtype=self.dtype).tocsr() - if (isinstance(self._n_values, six.str) and + if (isinstance(self._n_values, str) and self._n_values == 'auto'): mask = np.array(out.sum(axis=0)).ravel() != 0 active_features = np.where(mask)[0] @@ -553,7 +552,7 @@ def _legacy_transform(self, X): out = sparse.coo_matrix((data, (row_indices, column_indices)), shape=(n_samples, indices[-1]), dtype=self.dtype).tocsr() - if (isinstance(self._n_values, six.str) and + if (isinstance(self._n_values, str) and self._n_values == 'auto'): out = out[:, self._active_features_] diff --git a/sklearn/preprocessing/base.py b/sklearn/preprocessing/base.py index 45e39a2bfcbc5..29a1bd87dc8ee 100644 --- a/sklearn/preprocessing/base.py +++ b/sklearn/preprocessing/base.py @@ -5,7 +5,6 @@ from ..utils import check_array from ..utils.validation import FLOAT_DTYPES -from ..externals import six def _transform_selected(X, transform, dtype, selected="all", copy=True, @@ -48,7 +47,7 @@ def _transform_selected(X, transform, dtype, selected="all", copy=True, raise ValueError("The retain_order option can only be set to True " "for dense matrices.") - if isinstance(selected, six.str) and selected == "all": + if isinstance(selected, str) and selected == "all": return transform(X) if len(selected) == 0: diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 77c2d2cc970fc..de2396dda5312 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -20,7 +20,6 @@ from scipy import optimize from ..base import BaseEstimator, TransformerMixin -from ..externals import six from ..utils import check_array from ..utils.extmath import row_norms from ..utils.extmath import _incremental_mean_and_var diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py index 4318122d4be6c..380af58cc1d40 100644 --- a/sklearn/preprocessing/imputation.py +++ b/sklearn/preprocessing/imputation.py @@ -15,10 +15,6 @@ from ..utils.validation import check_is_fitted from ..utils.validation import FLOAT_DTYPES -from ..externals import six - -zip = six.moves.zip -map = six.moves.map __all__ = [ 'Imputer', diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py index 12f95b5e2cb4f..1da320efc2762 100644 --- a/sklearn/preprocessing/label.py +++ b/sklearn/preprocessing/label.py @@ -24,10 +24,6 @@ from ..utils.multiclass import unique_labels from ..utils.multiclass import type_of_target -from ..externals import six - -zip = six.moves.zip -map = six.moves.map __all__ = [ 'label_binarize', diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py index 09bb25ac49c3e..953243ea42cf2 100644 --- a/sklearn/preprocessing/tests/test_discretization.py +++ b/sklearn/preprocessing/tests/test_discretization.py @@ -5,7 +5,6 @@ import scipy.sparse as sp import warnings -from sklearn.externals.six.moves import xrange as range from sklearn.preprocessing import KBinsDiscretizer from sklearn.preprocessing import OneHotEncoder from sklearn.utils.testing import ( diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index 6f6a8b2ba8341..7581847d00c58 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -36,8 +36,7 @@ import scipy.sparse as sp from .base import BaseEstimator, TransformerMixin -from .externals import six -from .externals.six.moves import xrange + from .utils import check_random_state from .utils.extmath import safe_sparse_dot from .utils.random import sample_without_replacement diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index 081e54fbb0dfb..04aa6714e0711 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -64,7 +64,6 @@ from scipy.sparse import csgraph from ..base import BaseEstimator, ClassifierMixin -from ..externals import six from ..metrics.pairwise import rbf_kernel from ..neighbors.unsupervised import NearestNeighbors from ..utils.extmath import safe_sparse_dot diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index 70a6cd58a671a..2105a6130e7f0 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -16,7 +16,6 @@ from ..utils.extmath import safe_sparse_dot from ..utils.validation import check_is_fitted, _check_large_sparse from ..utils.multiclass import check_classification_targets -from ..externals import six from ..exceptions import ConvergenceWarning from ..exceptions import NotFittedError diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py index 81900da12e1a3..017181f7cba38 100644 --- a/sklearn/tree/export.py +++ b/sklearn/tree/export.py @@ -16,7 +16,6 @@ import numpy as np -from ..externals import six from ..utils.validation import check_is_fitted from . import _criterion @@ -281,7 +280,7 @@ def node_to_str(self, tree, node_id, criterion): if self.impurity: if isinstance(criterion, _criterion.FriedmanMSE): criterion = "friedman_mse" - elif not isinstance(criterion, six.str): + elif not isinstance(criterion, str): criterion = "impurity" if labels: node_string += '%s = ' % criterion @@ -755,7 +754,7 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None, own_file = False return_string = False try: - if isinstance(out_file, six.str): + if isinstance(out_file, str): if six.PY3: out_file = open(out_file, "w", encoding="utf-8") else: diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index f31858979cf4f..5cf30b08a103a 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -30,7 +30,6 @@ from ..base import ClassifierMixin from ..base import RegressorMixin from ..base import is_classifier -from ..externals import six from ..utils import check_array from ..utils import check_random_state from ..utils import compute_sample_weight @@ -219,7 +218,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, min_samples_split = max(min_samples_split, 2 * min_samples_leaf) - if isinstance(self.max_features, six.str): + if isinstance(self.max_features, str): if self.max_features == "auto": if is_classification: max_features = max(1, int(np.sqrt(self.n_features_))) diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py index 88afa758f69c9..efca1fef0d12d 100644 --- a/sklearn/utils/class_weight.py +++ b/sklearn/utils/class_weight.py @@ -3,7 +3,6 @@ # License: BSD 3 clause import numpy as np -from ..externals import six def compute_class_weight(class_weight, classes, y): @@ -114,12 +113,12 @@ def compute_sample_weight(class_weight, y, indices=None): y = np.reshape(y, (-1, 1)) n_outputs = y.shape[1] - if isinstance(class_weight, six.str): + if isinstance(class_weight, str): if class_weight not in ['balanced']: raise ValueError('The only valid preset for class_weight is ' '"balanced". Given "%s".' % class_weight) elif (indices is not None and - not isinstance(class_weight, six.str)): + not isinstance(class_weight, str)): raise ValueError('The only valid class_weight for subsampling is ' '"balanced". Given "%s".' % class_weight) elif n_outputs > 1: diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 73a719da56551..8ccbe31e1989d 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -20,7 +20,6 @@ from . import check_random_state from .fixes import np_version from ._logistic_sigmoid import _log_logistic_sigmoid -from ..externals.six.moves import xrange from .sparsefuncs_fast import csr_row_norms from .validation import check_array diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py index 49b059b324595..606173560dd75 100644 --- a/sklearn/utils/metaestimators.py +++ b/sklearn/utils/metaestimators.py @@ -9,7 +9,6 @@ import numpy as np from ..utils import safe_indexing -from ..externals import six from ..base import BaseEstimator __all__ = ['if_delegate_has_method'] diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 49c9c4b1604fc..a1e9de98e7c71 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -4,7 +4,7 @@ import numpy as np import scipy.sparse as sp -from sklearn.externals.six.moves import cStringIO as StringIO +from io import StringIO from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils import deprecated diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py index 77c1259aa8bf4..1131257330dcf 100644 --- a/sklearn/utils/tests/test_fast_dict.py +++ b/sklearn/utils/tests/test_fast_dict.py @@ -4,7 +4,6 @@ from sklearn.utils.fast_dict import IntFloatDict, argmin from sklearn.utils.testing import assert_equal -from sklearn.externals.six.moves import xrange def test_int_float_dict(): diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index d33778c848c2a..6a224e7761a35 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -4,7 +4,6 @@ import scipy.sparse as sp from itertools import product -from sklearn.externals.six.moves import xrange from sklearn.externals.six import iteritems from scipy.sparse import issparse diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index cf5588a5506d5..7e64d21e8f613 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -18,7 +18,6 @@ from numpy.core.numeric import ComplexWarning -from ..externals import six from .fixes import signature from .. import get_config as _get_config from ..exceptions import NonBLASDotWarning @@ -205,7 +204,7 @@ def check_memory(memory): If ``memory`` is not joblib.Memory-like. """ - if memory is None or isinstance(memory, six.str): + if memory is None or isinstance(memory, str): if LooseVersion(joblib_version) < '0.12': memory = Memory(cachedir=memory, verbose=0) else: @@ -308,7 +307,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, changed_format = False - if isinstance(accept_sparse, six.str): + if isinstance(accept_sparse, str): accept_sparse = [accept_sparse] # Indices dtype validation @@ -467,7 +466,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, array_orig = array # store whether originally we wanted numeric dtype - dtype_numeric = isinstance(dtype, six.str) and dtype == "numeric" + dtype_numeric = isinstance(dtype, str) and dtype == "numeric" dtype_orig = getattr(array, "dtype", None) if not hasattr(dtype_orig, 'kind'): @@ -501,7 +500,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, '. Got {!r} instead'.format(force_all_finite)) if estimator is not None: - if isinstance(estimator, six.str): + if isinstance(estimator, str): estimator_name = estimator else: estimator_name = estimator.__class__.__name__ From 4d1e71ef870ba011d8da80ca22a2a4db68ece0f5 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 20 Nov 2018 18:21:14 -0500 Subject: [PATCH 05/29] six no more --- sklearn/compose/tests/test_column_transformer.py | 1 - sklearn/covariance/shrunk_covariance_.py | 1 - sklearn/covariance/tests/test_graph_lasso.py | 2 +- sklearn/covariance/tests/test_graphical_lasso.py | 2 +- sklearn/datasets/base.py | 2 +- sklearn/datasets/kddcup99.py | 3 +-- sklearn/datasets/lfw.py | 3 +-- sklearn/datasets/openml.py | 2 +- sklearn/datasets/samples_generator.py | 2 -- sklearn/datasets/tests/test_openml.py | 2 +- sklearn/datasets/tests/test_samples_generator.py | 1 - sklearn/decomposition/dict_learning.py | 1 - sklearn/decomposition/online_lda.py | 1 - sklearn/decomposition/tests/test_dict_learning.py | 2 +- sklearn/decomposition/tests/test_fastica.py | 1 - sklearn/decomposition/tests/test_online_lda.py | 2 +- sklearn/discriminant_analysis.py | 1 - sklearn/ensemble/bagging.py | 1 - sklearn/feature_selection/mutual_info_.py | 1 - sklearn/manifold/t_sne.py | 1 - sklearn/metrics/regression.py | 1 - sklearn/mixture/gaussian_mixture.py | 1 - sklearn/model_selection/_search.py | 2 +- sklearn/model_selection/_split.py | 1 - sklearn/model_selection/_validation.py | 1 - sklearn/multiclass.py | 5 ++--- sklearn/preprocessing/_encoders.py | 3 +-- sklearn/preprocessing/data.py | 5 ----- sklearn/tests/test_metaestimators.py | 1 - sklearn/tests/test_multiclass.py | 1 - sklearn/tests/test_naive_bayes.py | 1 - sklearn/tests/test_pipeline.py | 1 - sklearn/tree/export.py | 8 +++----- sklearn/tree/tests/test_export.py | 2 +- sklearn/utils/estimator_checks.py | 1 - sklearn/utils/multiclass.py | 1 - sklearn/utils/testing.py | 2 -- sklearn/utils/tests/test_multiclass.py | 1 - sklearn/utils/tests/test_murmurhash.py | 1 - 39 files changed, 17 insertions(+), 54 deletions(-) diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index e39b6de4d0859..f15188e8754fb 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -15,7 +15,6 @@ from sklearn.utils.testing import assert_allclose_dense_sparse from sklearn.base import BaseEstimator -from sklearn.externals import six from sklearn.compose import ColumnTransformer, make_column_transformer from sklearn.exceptions import NotFittedError, DataConversionWarning from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py index eed39a45bddc4..94804ccac60d6 100644 --- a/sklearn/covariance/shrunk_covariance_.py +++ b/sklearn/covariance/shrunk_covariance_.py @@ -18,7 +18,6 @@ import numpy as np from .empirical_covariance_ import empirical_covariance, EmpiricalCovariance -from ..externals.six.moves import xrange from ..utils import check_array diff --git a/sklearn/covariance/tests/test_graph_lasso.py b/sklearn/covariance/tests/test_graph_lasso.py index d368356100a4f..8e7b399cd8144 100644 --- a/sklearn/covariance/tests/test_graph_lasso.py +++ b/sklearn/covariance/tests/test_graph_lasso.py @@ -14,7 +14,7 @@ from sklearn.covariance import (graph_lasso, GraphLasso, GraphLassoCV, empirical_covariance) from sklearn.datasets.samples_generator import make_sparse_spd_matrix -from sklearn.externals.six.moves import StringIO +from io import StringIO from sklearn.utils import check_random_state from sklearn import datasets diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py index 47f15f4a762ac..239863925f921 100644 --- a/sklearn/covariance/tests/test_graphical_lasso.py +++ b/sklearn/covariance/tests/test_graphical_lasso.py @@ -12,7 +12,7 @@ from sklearn.covariance import (graphical_lasso, GraphicalLasso, GraphicalLassoCV, empirical_covariance) from sklearn.datasets.samples_generator import make_sparse_spd_matrix -from sklearn.externals.six.moves import StringIO +from io import StringIO from sklearn.utils import check_random_state from sklearn import datasets diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index eb06f133ec488..e4580b56dc181 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -22,7 +22,7 @@ import numpy as np -from sklearn.externals.six.moves.urllib.request import urlretrieve +from moves.urllib.request import urlretrieve RemoteFileMetadata = namedtuple('RemoteFileMetadata', ['filename', 'url', 'checksum']) diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index e460503474a9f..397640117c3d7 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -276,8 +276,7 @@ def _fetch_brute_kddcup99(data_home=None, file_ = GzipFile(filename=archive_path, mode='r') Xy = [] for line in file_.readlines(): - if six.PY3: - line = line.decode() + line = line.decode() Xy.append(line.replace('\n', '').split(',')) file_.close() logger.debug('extraction done') diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py index 13c3725a506c4..756a8045773bb 100644 --- a/sklearn/datasets/lfw.py +++ b/sklearn/datasets/lfw.py @@ -20,7 +20,6 @@ from ..utils import deprecated from ..utils import Bunch from ..utils._joblib import Memory -from ..externals.six import b from ..utils import _joblib logger = logging.getLogger(__name__) @@ -369,7 +368,7 @@ def _fetch_lfw_pairs(index_file_path, data_folder_path, slice_=None, # parse the index file to find the number of pairs to be able to allocate # the right amount of memory before starting to decode the jpeg files with open(index_file_path, 'rb') as index_file: - split_lines = [ln.strip().split(b('\t')) for ln in index_file] + split_lines = [ln.strip().split('\t') for ln in index_file] pair_specs = [sl for sl in split_lines if len(sl) > 2] n_pairs = len(pair_specs) diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py index 84385a6bb2700..1f1fc158553fd 100644 --- a/sklearn/datasets/openml.py +++ b/sklearn/datasets/openml.py @@ -22,7 +22,7 @@ from sklearn.externals import _arff from .base import get_data_home from ..externals.six import str, PY2, BytesIO -from ..externals.six.moves.urllib.error import HTTPError +from urllib.error import HTTPError from ..utils import Bunch __all__ = ['fetch_openml'] diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index b8ad97628cbe7..35d8ea6a05589 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -17,8 +17,6 @@ from ..utils import shuffle as util_shuffle from ..utils.fixes import _Iterable as Iterable from ..utils.random import sample_without_replacement -map = six.moves.map -zip = six.moves.zip def _generate_hypercube(samples, dimensions, rng): diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py index c192d50d9caf5..4a858899e2c31 100644 --- a/sklearn/datasets/tests/test_openml.py +++ b/sklearn/datasets/tests/test_openml.py @@ -18,7 +18,7 @@ from sklearn.utils.testing import (assert_warns_message, assert_raise_message) from sklearn.externals.six import str -from sklearn.externals.six.moves.urllib.error import HTTPError +from urllib.error import HTTPError from sklearn.datasets.tests.test_common import check_return_X_y from functools import partial diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 2cf6900442feb..c8b0fbd571145 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -6,7 +6,6 @@ import numpy as np import pytest import scipy.sparse as sp -from sklearn.externals.six.moves import zip from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py index 65ae605b6c19b..eb5e4f2588612 100644 --- a/sklearn/decomposition/dict_learning.py +++ b/sklearn/decomposition/dict_learning.py @@ -15,7 +15,6 @@ from ..base import BaseEstimator, TransformerMixin from ..utils._joblib import Parallel, delayed, effective_n_jobs -from ..externals.six.moves import zip from ..utils import (check_array, check_random_state, gen_even_slices, gen_batches) from ..utils.extmath import randomized_svd, row_norms diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py index b9bf1025e315e..1c8933d2b719d 100644 --- a/sklearn/decomposition/online_lda.py +++ b/sklearn/decomposition/online_lda.py @@ -21,7 +21,6 @@ from ..utils.fixes import logsumexp from ..utils.validation import check_non_negative from ..utils._joblib import Parallel, delayed, effective_n_jobs -from ..externals.six.moves import xrange from ..exceptions import NotFittedError from ._online_lda import (mean_change, _dirichlet_expectation_1d, diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index fd2937ed8f25d..042af84eaef03 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -223,7 +223,7 @@ def test_dict_learning_online_positivity(transform_algorithm, def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity - from sklearn.externals.six.moves import cStringIO as StringIO + from io import StringIO import sys old_stdout = sys.stdout diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index d5a329c7340c8..313a13ad8333b 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -17,7 +17,6 @@ from sklearn.decomposition import FastICA, fastica, PCA from sklearn.decomposition.fastica_ import _gs_decorrelation -from sklearn.externals.six import moves from sklearn.exceptions import ConvergenceWarning diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py index b7d95eeb6d899..76708f30a3dcd 100644 --- a/sklearn/decomposition/tests/test_online_lda.py +++ b/sklearn/decomposition/tests/test_online_lda.py @@ -20,7 +20,7 @@ from sklearn.utils.testing import if_safe_multiprocessing_with_blas from sklearn.exceptions import NotFittedError -from sklearn.externals.six import StringIO +from io import StringIO def _build_sparse_mtx(): diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 184f0b9b530a1..bd3202d409c86 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -13,7 +13,6 @@ import warnings import numpy as np from scipy import linalg -from .externals.six import str from .base import BaseEstimator, TransformerMixin, ClassifierMixin from .linear_model.base import LinearClassifierMixin diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py index 8be282580b254..719f198a958c8 100644 --- a/sklearn/ensemble/bagging.py +++ b/sklearn/ensemble/bagging.py @@ -15,7 +15,6 @@ from ..base import ClassifierMixin, RegressorMixin from ..utils._joblib import Parallel, delayed from ..externals.six import with_metaclass -from ..externals.six.moves import zip from ..metrics import r2_score, accuracy_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor from ..utils import check_random_state, check_X_y, check_array, column_or_1d diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py index 0637f784c5f95..7c5c247eb36ef 100644 --- a/sklearn/feature_selection/mutual_info_.py +++ b/sklearn/feature_selection/mutual_info_.py @@ -6,7 +6,6 @@ from scipy.sparse import issparse from scipy.special import digamma -from ..externals.six import moves from ..metrics.cluster.supervised import mutual_info_score from ..neighbors import NearestNeighbors from ..preprocessing import scale diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py index 649b44f0916b7..9e13920d49e00 100644 --- a/sklearn/manifold/t_sne.py +++ b/sklearn/manifold/t_sne.py @@ -25,7 +25,6 @@ from ..metrics.pairwise import pairwise_distances from . import _utils from . import _barnes_hut_tsne -from ..externals.six import str MACHINE_EPSILON = np.finfo(np.double).eps diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py index 22e97d399fd96..485c0ad9f9172 100644 --- a/sklearn/metrics/regression.py +++ b/sklearn/metrics/regression.py @@ -27,7 +27,6 @@ from ..utils.validation import check_array, check_consistent_length from ..utils.validation import column_or_1d -from ..externals.six import str __ALL__ = [ diff --git a/sklearn/mixture/gaussian_mixture.py b/sklearn/mixture/gaussian_mixture.py index 2c5f9b6cf151a..4e9b5d5dc904c 100644 --- a/sklearn/mixture/gaussian_mixture.py +++ b/sklearn/mixture/gaussian_mixture.py @@ -9,7 +9,6 @@ from scipy import linalg from .base import BaseMixture, _check_shape -from ..externals.six.moves import zip from ..utils import check_array from ..utils.validation import check_is_fitted from ..utils.extmath import row_norms diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index b0ca2eda863e1..b6a90f8b54284 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -275,7 +275,7 @@ def __iter__(self): else: # Always sort the keys of a dictionary, for reproducibility items = sorted(self.param_distributions.items()) - for _ in six.moves.range(self.n_iter): + for _ in range(self.n_iter): params = dict() for k, v in items: if hasattr(v, "rvs"): diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 356c018c58c7d..d3f5ab0f5ba72 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -26,7 +26,6 @@ from ..utils.validation import check_array from ..utils.multiclass import type_of_target from ..externals.six import with_metaclass -from ..externals.six.moves import zip from ..utils.fixes import signature, comb from ..utils.fixes import _Iterable as Iterable from ..base import _pprint diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 125b610098404..f2f3f91a4a26e 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -26,7 +26,6 @@ from ..utils.metaestimators import _safe_split from ..utils._joblib import Parallel, delayed from ..utils._joblib import logger -from ..externals.six.moves import zip from ..metrics.scorer import check_scoring, _check_multimetric_scoring from ..exceptions import FitFailedWarning from ._split import check_cv diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index fa7e87da6c255..fdfe1bed0ca9f 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -54,7 +54,6 @@ from .utils._joblib import Parallel from .utils._joblib import delayed -from .externals.six.moves import zip as izip __all__ = [ "OneVsRestClassifier", @@ -268,7 +267,7 @@ def partial_fit(self, X, y, classes=None): self.estimators_ = Parallel(n_jobs=self.n_jobs)( delayed(_partial_fit_binary)(estimator, X, column) - for estimator, column in izip(self.estimators_, columns)) + for estimator, column in zip(self.estimators_, columns)) return self @@ -557,7 +556,7 @@ def partial_fit(self, X, y, classes=None): n_jobs=self.n_jobs)( delayed(_partial_fit_ovo_binary)( estimator, X, y, self.classes_[i], self.classes_[j]) - for estimator, (i, j) in izip(self.estimators_, + for estimator, (i, j) in zip(self.estimators_, (combinations))) self.pairwise_indices_ = None diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index 7dfd168c146bb..430c14067f5f8 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -20,7 +20,6 @@ from .base import _transform_selected from .label import _encode, _encode_check_unknown -range = six.moves.range __all__ = [ 'OneHotEncoder', @@ -703,7 +702,7 @@ def get_feature_names(self, input_features=None): feature_names = [] for i in range(len(cats)): names = [ - input_features[i] + '_' + six.text_type(t) for t in cats[i]] + input_features[i] + '_' + str(t) for t in cats[i]] feature_names.extend(names) return np.array(feature_names, dtype=object) diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index de2396dda5312..ae04e48070a36 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -38,11 +38,6 @@ BOUNDS_THRESHOLD = 1e-7 - -zip = six.moves.zip -map = six.moves.map -range = six.moves.range - __all__ = [ 'Binarizer', 'KernelCenterer', diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index e1cbe09e43a94..f09017c2426f6 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -5,7 +5,6 @@ import numpy as np from sklearn.base import BaseEstimator -from sklearn.externals.six import iterkeys from sklearn.datasets import make_classification from sklearn.utils.testing import assert_true, assert_false, assert_raises diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index 7b46fa0bf14f5..99ec745d90902 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -34,7 +34,6 @@ from sklearn.pipeline import Pipeline from sklearn import svm from sklearn import datasets -from sklearn.externals.six.moves import zip iris = datasets.load_iris() rng = np.random.RandomState(0) diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 9533cff66662d..06e72f3f3eff1 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -11,7 +11,6 @@ from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score -from sklearn.externals.six.moves import zip from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 17793e35998a4..ecccb3a50da5f 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -10,7 +10,6 @@ import numpy as np from scipy import sparse -from sklearn.externals.six.moves import zip from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex from sklearn.utils.testing import assert_raise_message diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py index 017181f7cba38..18052d6233d09 100644 --- a/sklearn/tree/export.py +++ b/sklearn/tree/export.py @@ -11,6 +11,7 @@ # Li Li # License: BSD 3 clause import warnings +from IO import StringIO from numbers import Integral @@ -755,15 +756,12 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None, return_string = False try: if isinstance(out_file, str): - if six.PY3: - out_file = open(out_file, "w", encoding="utf-8") - else: - out_file = open(out_file, "wb") + out_file = open(out_file, "w", encoding="utf-8") own_file = True if out_file is None: return_string = True - out_file = six.StringIO() + out_file = StringIO() exporter = _DOTTreeExporter( out_file=out_file, max_depth=max_depth, diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py index 2471914fa44ce..6c765675faf76 100644 --- a/sklearn/tree/tests/test_export.py +++ b/sklearn/tree/tests/test_export.py @@ -11,7 +11,7 @@ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.ensemble import GradientBoostingClassifier from sklearn.tree import export_graphviz, plot_tree -from sklearn.externals.six import StringIO +from io import StringIO from sklearn.utils.testing import (assert_in, assert_equal, assert_raises, assert_less_equal, assert_raises_regex, assert_raise_message) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index cc17cc3c2300b..ecf2a58edeb16 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -12,7 +12,6 @@ from scipy import sparse from scipy.stats import rankdata -from sklearn.externals.six.moves import zip from sklearn.utils import IS_PYPY, _IS_32BIT from sklearn.utils import _joblib from sklearn.utils._joblib import Memory diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py index 3c25de040e2a2..a61b3e8263e79 100644 --- a/sklearn/utils/multiclass.py +++ b/sklearn/utils/multiclass.py @@ -16,7 +16,6 @@ import numpy as np -from ..externals.six import str from ..utils.fixes import _Sequence as Sequence from .validation import check_array diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py index db43cce6fbaf9..e12394d5982f0 100644 --- a/sklearn/utils/testing.py +++ b/sklearn/utils/testing.py @@ -802,8 +802,6 @@ def clean_warning_registry(): """ reg = "__warningregistry__" for mod_name, mod in list(sys.modules.items()): - if 'six.moves' in mod_name: - continue if hasattr(mod, reg): getattr(mod, reg).clear() diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index 6a224e7761a35..3d4622795e95c 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -4,7 +4,6 @@ import scipy.sparse as sp from itertools import product -from sklearn.externals.six import iteritems from scipy.sparse import issparse from scipy.sparse import csc_matrix diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py index d59ec6cecad77..cb7899af88ab3 100644 --- a/sklearn/utils/tests/test_murmurhash.py +++ b/sklearn/utils/tests/test_murmurhash.py @@ -3,7 +3,6 @@ # License: BSD 3 clause import numpy as np -from sklearn.externals.six import b, u from sklearn.utils.murmurhash import murmurhash3_32 from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_equal From cb844cdae64da38d0c0e60344d98169497a1870a Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 12:20:32 -0500 Subject: [PATCH 06/29] six iteritems --- benchmarks/bench_plot_fastkmeans.py | 8 ++++---- benchmarks/bench_plot_omp_lars.py | 2 +- benchmarks/bench_plot_svd.py | 2 +- sklearn/datasets/tests/test_lfw.py | 2 +- sklearn/feature_extraction/dict_vectorizer.py | 8 ++++---- sklearn/feature_extraction/text.py | 4 ++-- sklearn/gaussian_process/kernels.py | 2 +- sklearn/pipeline.py | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/benchmarks/bench_plot_fastkmeans.py b/benchmarks/bench_plot_fastkmeans.py index a7f9a017ad09f..d40d211dd1846 100644 --- a/benchmarks/bench_plot_fastkmeans.py +++ b/benchmarks/bench_plot_fastkmeans.py @@ -104,15 +104,15 @@ def compute_bench_2(chunks): results = compute_bench(samples_range, features_range) results_2 = compute_bench_2(chunks) - max_time = max([max(i) for i in [t for (label, t) in six.iteritems(results) + max_time = max([max(i) for i in [t for (label, t) in results.items() if "speed" in label]]) max_inertia = max([max(i) for i in [ - t for (label, t) in six.iteritems(results) + t for (label, t) in results.items() if "speed" not in label]]) fig = plt.figure('scikit-learn K-Means benchmark results') for c, (label, timings) in zip('brcy', - sorted(six.iteritems(results))): + sorted(results.items())): if 'speed' in label: ax = fig.add_subplot(2, 2, 1, projection='3d') ax.set_zlim3d(0.0, max_time * 1.1) @@ -129,7 +129,7 @@ def compute_bench_2(chunks): i = 0 for c, (label, timings) in zip('br', - sorted(six.iteritems(results_2))): + sorted(results_2.items())): i += 1 ax = fig.add_subplot(2, 2, i + 2) y = np.asarray(timings) diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py index a9b2c97aa6a78..a9cc87e9d22f8 100644 --- a/benchmarks/bench_plot_omp_lars.py +++ b/benchmarks/bench_plot_omp_lars.py @@ -109,7 +109,7 @@ def compute_bench(samples_range, features_range): import matplotlib.pyplot as plt fig = plt.figure('scikit-learn OMP vs. LARS benchmark results') - for i, (label, timings) in enumerate(sorted(six.iteritems(results))): + for i, (label, timings) in enumerate(sorted(results.items())): ax = fig.add_subplot(1, 2, i+1) vmax = max(1 - timings.min(), -1 + timings.max()) plt.matshow(timings, fignum=False, vmin=1 - vmax, vmax=1 + vmax) diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py index 4901ae13f1243..7f96696a33c51 100644 --- a/benchmarks/bench_plot_svd.py +++ b/benchmarks/bench_plot_svd.py @@ -66,7 +66,7 @@ def compute_bench(samples_range, features_range, n_iter=3, rank=50): label = 'scikit-learn singular value decomposition benchmark results' fig = plt.figure(label) ax = fig.gca(projection='3d') - for c, (label, timings) in zip('rbg', sorted(six.iteritems(results))): + for c, (label, timings) in zip('rbg', sorted(results.items())): X, Y = np.meshgrid(samples_range, features_range) Z = np.asarray(timings).reshape(samples_range.shape[0], features_range.shape[0]) diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 422969881fe86..75aecdfb999f1 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -75,7 +75,7 @@ def setup_module(): # generate some pairing metadata files using the same format as LFW with open(os.path.join(LFW_HOME, 'pairsDevTrain.txt'), 'wb') as f: f.write(six.b("10\n")) - more_than_two = [name for name, count in six.iteritems(counts) + more_than_two = [name for name, count in counts.items() if count >= 2] for i in range(5): name = random_state.choice(more_than_two) diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py index 9636580af18ee..2c7587dd3eeeb 100644 --- a/sklearn/feature_extraction/dict_vectorizer.py +++ b/sklearn/feature_extraction/dict_vectorizer.py @@ -116,7 +116,7 @@ def fit(self, X, y=None): vocab = {} for x in X: - for f, v in six.iteritems(x): + for f, v in x.items(): if isinstance(v, str): f = "%s%s%s" % (f, self.separator, v) if f not in vocab: @@ -162,7 +162,7 @@ def _transform(self, X, fitting): # collect all the possible feature names and build sparse matrix at # same time for x in X: - for f, v in six.iteritems(x): + for f, v in x.items(): if isinstance(v, str): f = "%s%s%s" % (f, self.separator, v) v = 1 @@ -296,7 +296,7 @@ def transform(self, X): Xa = np.zeros((len(X), len(vocab)), dtype=dtype) for i, x in enumerate(X): - for f, v in six.iteritems(x): + for f, v in x.items(): if isinstance(v, str): f = "%s%s%s" % (f, self.separator, v) v = 1 @@ -357,7 +357,7 @@ def restrict(self, support, indices=False): new_vocab[names[i]] = len(new_vocab) self.vocabulary_ = new_vocab - self.feature_names_ = [f for f, i in sorted(six.iteritems(new_vocab), + self.feature_names_ = [f for f, i in sorted(new_vocab.items(), key=itemgetter(1))] return self diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 32b8a879cb70e..fbbfaed5ef9db 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -870,7 +870,7 @@ def _sort_features(self, X, vocabulary): Returns a reordered matrix and modifies the vocabulary in place """ - sorted_features = sorted(six.iteritems(vocabulary)) + sorted_features = sorted(vocabulary.items()) map_index = np.empty(len(sorted_features), dtype=np.int32) for new_val, (term, old_val) in enumerate(sorted_features): vocabulary[term] = new_val @@ -908,7 +908,7 @@ def _limit_features(self, X, vocabulary, high=None, low=None, new_indices = np.cumsum(mask) - 1 # maps old indices to new removed_terms = set() - for term, old_index in list(six.iteritems(vocabulary)): + for term, old_index in list(vocabulary.items()): if mask[old_index]: vocabulary[term] = new_indices[old_index] else: diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index e5a14c5e1db17..0df0d1197dde9 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -175,7 +175,7 @@ def set_params(self, **params): # Simple optimisation to gain speed (inspect is slow) return self valid_params = self.get_params(deep=True) - for key, value in six.iteritems(params): + for key, value in params.items(): split = key.split('__', 1) if len(split) > 1: # nested objects case diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index c509597941d05..32ad908d5bf45 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -214,7 +214,7 @@ def _fit(self, X, y=None, **fit_params): fit_params_steps = dict((name, {}) for name, step in self.steps if step is not None) - for pname, pval in six.iteritems(fit_params): + for pname, pval in fit_params.items(): step, param = pname.split('__', 1) fit_params_steps[step][param] = pval Xt = X @@ -543,7 +543,7 @@ def _name_estimators(estimators): for est, name in zip(estimators, names): namecount[name] += 1 - for k, v in list(six.iteritems(namecount)): + for k, v in list(namecount.items()): if v == 1: del namecount[k] From 067a1e380a082c18e6fd1490ae3f0d061aa89774 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 12:23:46 -0500 Subject: [PATCH 07/29] six metaclass --- sklearn/ensemble/forest.py | 2 +- sklearn/ensemble/gradient_boosting.py | 8 ++++---- sklearn/ensemble/weight_boosting.py | 2 +- sklearn/feature_selection/base.py | 2 +- sklearn/linear_model/base.py | 2 +- sklearn/linear_model/coordinate_descent.py | 2 +- sklearn/linear_model/ridge.py | 2 +- sklearn/metrics/scorer.py | 2 +- sklearn/multioutput.py | 2 +- sklearn/neighbors/base.py | 2 +- sklearn/neural_network/multilayer_perceptron.py | 2 +- sklearn/svm/base.py | 2 +- sklearn/tree/tree.py | 2 +- sklearn/utils/metaestimators.py | 2 +- 14 files changed, 17 insertions(+), 17 deletions(-) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index a5f539a8653a9..5c193df2eb2d0 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -122,7 +122,7 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees, return tree -class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble)): +class BaseForest(BaseEnsemble, metaclass=ABCMeta): """Base class for forests of trees. Warning: This class should not be used directly. Use derived classes diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index 04d8dab7570a8..8b5ab415141c6 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -299,7 +299,7 @@ def predict(self, X): return y -class LossFunction(six.with_metaclass(ABCMeta, object)): +class LossFunction(object, metaclass=ABCMeta): """Abstract base class for various loss functions. Parameters @@ -406,7 +406,7 @@ def _update_terminal_region(self, tree, terminal_regions, leaf, X, y, """Template method for updating terminal regions (=leaves). """ -class RegressionLossFunction(six.with_metaclass(ABCMeta, LossFunction)): +class RegressionLossFunction(LossFunction, metaclass=ABCMeta): """Base class for regression loss functions. Parameters @@ -740,7 +740,7 @@ def _update_terminal_region(self, tree, terminal_regions, leaf, X, y, tree.value[leaf, 0] = val -class ClassificationLossFunction(six.with_metaclass(ABCMeta, LossFunction)): +class ClassificationLossFunction(LossFunction, metaclass=ABCMeta): """Base class for classification loss functions. """ def _score_to_proba(self, score): @@ -1118,7 +1118,7 @@ def update(self, j, est): self.verbose_mod *= 10 -class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble)): +class BaseGradientBoosting(BaseEnsemble, metaclass=ABCMeta): """Abstract base class for Gradient Boosting. """ @abstractmethod diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py index c55e50d5aed30..d5e3f3a912407 100644 --- a/sklearn/ensemble/weight_boosting.py +++ b/sklearn/ensemble/weight_boosting.py @@ -45,7 +45,7 @@ ] -class BaseWeightBoosting(six.with_metaclass(ABCMeta, BaseEnsemble)): +class BaseWeightBoosting(BaseEnsemble, metaclass=ABCMeta): """Base class for AdaBoost estimators. Warning: This class should not be used directly. Use derived classes diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py index 441b4f45a80fd..5add330188f78 100644 --- a/sklearn/feature_selection/base.py +++ b/sklearn/feature_selection/base.py @@ -14,7 +14,7 @@ from ..utils import check_array, safe_mask -class SelectorMixin(six.with_metaclass(ABCMeta, TransformerMixin)): +class SelectorMixin(TransformerMixin, metaclass=ABCMeta): """ Transformer mixin that performs feature selection given a support mask diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index d931c55f7e63a..e5747d8f04925 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -182,7 +182,7 @@ def _rescale_data(X, y, sample_weight): return X, y -class LinearModel(six.with_metaclass(ABCMeta, BaseEstimator)): +class LinearModel(BaseEstimator, metaclass=ABCMeta): """Base class for Linear Models""" @abstractmethod diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index c51d3c577f4e9..78ae74182700d 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -1047,7 +1047,7 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None, return this_mses -class LinearModelCV(six.with_metaclass(ABCMeta, LinearModel)): +class LinearModelCV(LinearModel, metaclass=ABCMeta): """Base class for iterative model fitting along a regularization path""" @abstractmethod diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 36402d340a30d..06028f441900c 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -463,7 +463,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', return coef -class _BaseRidge(six.with_metaclass(ABCMeta, LinearModel)): +class _BaseRidge(LinearModel, metaclass=ABCMeta): @abstractmethod def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py index 89bf9a9ba8955..9def4d484803b 100644 --- a/sklearn/metrics/scorer.py +++ b/sklearn/metrics/scorer.py @@ -43,7 +43,7 @@ from ..base import is_regressor -class _BaseScorer(six.with_metaclass(ABCMeta, object)): +class _BaseScorer(object, metaclass=ABCMeta): def __init__(self, score_func, sign, kwargs): self._kwargs = kwargs self._score_func = score_func diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index deafc0e7302af..69f05183f2fe8 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -367,7 +367,7 @@ def score(self, X, y): return np.mean(np.all(y == y_pred, axis=1)) -class _BaseChain(six.with_metaclass(ABCMeta, BaseEstimator)): +class _BaseChain(BaseEstimator, metaclass=ABCMeta): def __init__(self, base_estimator, order=None, cv=None, random_state=None): self.base_estimator = base_estimator self.order = order diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py index e0e7af71c79c0..8f3799adfffe4 100644 --- a/sklearn/neighbors/base.py +++ b/sklearn/neighbors/base.py @@ -102,7 +102,7 @@ def _get_weights(dist, weights): "'distance', or a callable function") -class NeighborsBase(six.with_metaclass(ABCMeta, BaseEstimator)): +class NeighborsBase(BaseEstimator, metaclass=ABCMeta): """Base class for nearest neighbors estimators.""" @abstractmethod diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index e1cb5e9446450..765ff953223a2 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -36,7 +36,7 @@ def _pack(coefs_, intercepts_): return np.hstack([l.ravel() for l in coefs_ + intercepts_]) -class BaseMultilayerPerceptron(six.with_metaclass(ABCMeta, BaseEstimator)): +class BaseMultilayerPerceptron(BaseEstimator, metaclass=ABCMeta): """Base class for MLP classification and regression. Warning: This class should not be used directly. diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index 2105a6130e7f0..73a3f50f68c30 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -56,7 +56,7 @@ def _one_vs_one_coef(dual_coef, n_support, support_vectors): return coef -class BaseLibSVM(six.with_metaclass(ABCMeta, BaseEstimator)): +class BaseLibSVM(BaseEstimator, metaclass=ABCMeta): """Base class for estimators that use libsvm as backing library This implements support vector machine classification and regression. diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 5cf30b08a103a..f8982590f7671 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -71,7 +71,7 @@ # ============================================================================= -class BaseDecisionTree(six.with_metaclass(ABCMeta, BaseEstimator)): +class BaseDecisionTree(BaseEstimator, metaclass=ABCMeta): """Base class for decision trees. Warning: This class should not be used directly. diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py index 606173560dd75..25e1fe825717f 100644 --- a/sklearn/utils/metaestimators.py +++ b/sklearn/utils/metaestimators.py @@ -14,7 +14,7 @@ __all__ = ['if_delegate_has_method'] -class _BaseComposition(six.with_metaclass(ABCMeta, BaseEstimator)): +class _BaseComposition(BaseEstimator, metaclass=ABCMeta): """Handles parameter management for classifiers composed of named estimators. """ @abstractmethod From 44ec241436ce3d3ad75d50ff692af8a5ea68e229 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 12:24:49 -0500 Subject: [PATCH 08/29] metaclass with two base classes --- sklearn/decomposition/base.py | 2 +- sklearn/ensemble/forest.py | 2 +- sklearn/linear_model/stochastic_gradient.py | 2 +- sklearn/mixture/base.py | 2 +- sklearn/naive_bayes.py | 2 +- sklearn/svm/base.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py index faebbd0c74ac6..b318de0cd0daf 100644 --- a/sklearn/decomposition/base.py +++ b/sklearn/decomposition/base.py @@ -17,7 +17,7 @@ from abc import ABCMeta, abstractmethod -class _BasePCA(six.with_metaclass(ABCMeta, BaseEstimator, TransformerMixin)): +class _BasePCA(BaseEstimator, TransformerMixin, metaclass=ABCMeta): """Base class for PCA methods. Warning: This class should not be used directly. diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 5c193df2eb2d0..f19bd8bbe3b44 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -635,7 +635,7 @@ def predict_log_proba(self, X): return proba -class ForestRegressor(six.with_metaclass(ABCMeta, BaseForest, RegressorMixin)): +class ForestRegressor(BaseForest, RegressorMixin, metaclass=ABCMeta): """Base class for forest of trees-based regressors. Warning: This class should not be used directly. Use derived classes diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 1ee974c0389ad..3fc7e8a0fbf56 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -64,7 +64,7 @@ def __call__(self, coef, intercept): return est.score(self.X_val, self.y_val, self.sample_weight_val) -class BaseSGD(six.with_metaclass(ABCMeta, BaseEstimator, SparseCoefMixin)): +class BaseSGD(BaseEstimator, SparseCoefMixin, metaclass=ABCMeta): """Base class for SGD classification and regression.""" def __init__(self, loss, penalty='l2', alpha=0.0001, C=1.0, diff --git a/sklearn/mixture/base.py b/sklearn/mixture/base.py index bfdc75a0547e2..afcec0b94b2d9 100644 --- a/sklearn/mixture/base.py +++ b/sklearn/mixture/base.py @@ -63,7 +63,7 @@ def _check_X(X, n_components=None, n_features=None, ensure_min_samples=1): return X -class BaseMixture(six.with_metaclass(ABCMeta, DensityMixin, BaseEstimator)): +class BaseMixture(DensityMixin, BaseEstimator, metaclass=ABCMeta): """Base class for mixture models. This abstract class specifies an interface for all mixture classes and diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index a0ac6a3105508..1a3771807f3cb 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -35,7 +35,7 @@ __all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB', 'ComplementNB'] -class BaseNB(six.with_metaclass(ABCMeta, BaseEstimator, ClassifierMixin)): +class BaseNB(BaseEstimator, ClassifierMixin, metaclass=ABCMeta): """Abstract base class for naive Bayes estimators""" @abstractmethod diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index 73a3f50f68c30..d08f21c2fd374 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -498,7 +498,7 @@ def _get_coef(self): return safe_sparse_dot(self._dual_coef_, self.support_vectors_) -class BaseSVC(six.with_metaclass(ABCMeta, BaseLibSVM, ClassifierMixin)): +class BaseSVC(BaseLibSVM, ClassifierMixin, metaclass=ABCMeta): """ABC for LibSVM-based classifiers.""" @abstractmethod def __init__(self, kernel, degree, gamma, coef0, tol, C, nu, From 185e0bfc2940851f1b16e1617ae72e4ae43aaaa9 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 12:27:48 -0500 Subject: [PATCH 09/29] multi-line metaclasses --- sklearn/linear_model/stochastic_gradient.py | 3 +-- sklearn/model_selection/_search.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 3fc7e8a0fbf56..17aad4521e54e 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -468,8 +468,7 @@ def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter, return result -class BaseSGDClassifier(six.with_metaclass(ABCMeta, BaseSGD, - LinearClassifierMixin)): +class BaseSGDClassifier(BaseSGD, LinearClassifierMixin, metaclass=ABCMeta): loss_functions = { "hinge": (Hinge, 1.0), diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index b6a90f8b54284..157d4ede09dff 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -381,8 +381,7 @@ def _check_param_grid(param_grid): "to be a non-empty sequence.".format(name)) -class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator, - MetaEstimatorMixin)): +class BaseSearchCV(BaseEstimator, MetaEstimatorMixin, metaclass=ABCMeta): """Abstract base class for hyper parameter search with cross-validation. """ From 6791cbf46ddb5b0ceac003dac38ccb6e335b00de Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 12:40:50 -0500 Subject: [PATCH 10/29] more metaclass and b fun --- sklearn/cross_decomposition/pls_.py | 4 +-- sklearn/datasets/svmlight_format.py | 28 +++++++-------- sklearn/datasets/tests/test_base.py | 7 ++-- .../datasets/tests/test_svmlight_format.py | 34 +++++++++---------- sklearn/ensemble/bagging.py | 2 +- sklearn/ensemble/base.py | 4 +-- sklearn/ensemble/forest.py | 5 ++- sklearn/feature_extraction/text.py | 4 +-- sklearn/gaussian_process/kernels.py | 2 +- sklearn/metrics/tests/test_pairwise.py | 2 -- sklearn/model_selection/_split.py | 9 +++-- sklearn/multioutput.py | 3 +- sklearn/random_projection.py | 3 +- sklearn/semi_supervised/label_propagation.py | 3 +- sklearn/utils/metaestimators.py | 5 ++- 15 files changed, 51 insertions(+), 64 deletions(-) diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py index dcc614677f5f7..ea35089de6637 100644 --- a/sklearn/cross_decomposition/pls_.py +++ b/sklearn/cross_decomposition/pls_.py @@ -116,8 +116,8 @@ def _center_scale_xy(X, Y, scale=True): return X, Y, x_mean, y_mean, x_std, y_std -class _PLS(six.with_metaclass(ABCMeta), BaseEstimator, TransformerMixin, - RegressorMixin): +class _PLS(BaseEstimator, TransformerMixin, RegressorMixin, + metaclass=ABCMeta): """Partial Least Squares (PLS) This class implements the generic PLS algorithm, constructors' parameters diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py index f5e2edfe53354..60e3a3961655e 100644 --- a/sklearn/datasets/svmlight_format.py +++ b/sklearn/datasets/svmlight_format.py @@ -326,28 +326,28 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id): X_is_sp = int(hasattr(X, "tocsr")) y_is_sp = int(hasattr(y, "tocsr")) if X.dtype.kind == 'i': - value_pattern = u("%d:%d") + value_pattern = "%d:%d" else: - value_pattern = u("%d:%.16g") + value_pattern = "%d:%.16g" if y.dtype.kind == 'i': - label_pattern = u("%d") + label_pattern = "%d" else: - label_pattern = u("%.16g") + label_pattern = "%.16g" - line_pattern = u("%s") + line_pattern = "%s" if query_id is not None: - line_pattern += u(" qid:%d") - line_pattern += u(" %s\n") + line_pattern += " qid:%d" + line_pattern += " %s\n" if comment: - f.write(b("# Generated by dump_svmlight_file from scikit-learn %s\n" - % __version__)) - f.write(b("# Column indices are %s-based\n" - % ["zero", "one"][one_based])) + f.write("# Generated by dump_svmlight_file from scikit-learn %s\n" + % __version__) + f.write("# Column indices are %s-based\n" + % ["zero", "one"][one_based]) - f.write(b("#\n")) - f.writelines(b("# %s\n" % line) for line in comment.splitlines()) + f.write("#\n") + f.writelines("# %s\n" % line for line in comment.splitlines()) for i in range(X.shape[0]): if X_is_sp: @@ -437,7 +437,7 @@ def dump_svmlight_file(X, y, f, zero_based=True, comment=None, query_id=None, comment.decode("ascii") # just for the exception else: comment = comment.encode("utf-8") - if six.b("\0") in comment: + if "\0" in comment: raise ValueError("comment string contains NUL byte") yval = check_array(y, accept_sparse='csr', ensure_2d=False) diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index e01ec39eb4943..83be6d05b561a 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -24,7 +24,6 @@ from sklearn.datasets.base import Bunch from sklearn.datasets.tests.test_common import check_return_X_y -from sklearn.externals.six import b, u from sklearn.externals._pilutil import pillow_installed from sklearn.utils.testing import assert_false @@ -56,7 +55,7 @@ def test_category_dir_1(load_files_root): test_category_dir1 = tempfile.mkdtemp(dir=load_files_root) sample_file = tempfile.NamedTemporaryFile(dir=test_category_dir1, delete=False) - sample_file.write(b("Hello World!\n")) + sample_file.write("Hello World!\n") sample_file.close() yield str(test_category_dir1) _remove_dir(test_category_dir1) @@ -97,7 +96,7 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2, assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 2) assert_equal(res.DESCR, None) - assert_equal(res.data, [b("Hello World!\n")]) + assert_equal(res.data, ["Hello World!\n"]) def test_load_files_w_categories_desc_and_encoding( @@ -108,7 +107,7 @@ def test_load_files_w_categories_desc_and_encoding( assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 1) assert_equal(res.DESCR, "test") - assert_equal(res.data, [u("Hello World!\n")]) + assert_equal(res.data, ["Hello World!\n"]) def test_load_files_wo_load_content( diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index ca1f7ddae8ecd..584b226cb3a0e 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -10,8 +10,6 @@ import pytest -from sklearn.externals.six import b - from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal @@ -153,13 +151,13 @@ def test_load_invalid_order_file(): def test_load_zero_based(): - f = BytesIO(b("-1 4:1.\n1 0:1\n")) + f = BytesIO("-1 4:1.\n1 0:1\n") assert_raises(ValueError, load_svmlight_file, f, zero_based=False) def test_load_zero_based_auto(): - data1 = b("-1 1:1 2:2 3:3\n") - data2 = b("-1 0:0 1:1\n") + data1 = "-1 1:1 2:2 3:3\n" + data2 = "-1 0:0 1:1\n" f1 = BytesIO(data1) X, y = load_svmlight_file(f1, zero_based="auto") @@ -174,10 +172,10 @@ def test_load_zero_based_auto(): def test_load_with_qid(): # load svmfile with qid attribute - data = b(""" + data = """ 3 qid:1 1:0.53 2:0.12 2 qid:1 1:0.13 2:0.1 - 7 qid:2 1:0.87 2:0.12""") + 7 qid:2 1:0.87 2:0.12""" X, y = load_svmlight_file(BytesIO(data), query_id=False) assert_array_equal(y, [3, 2, 7]) assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]]) @@ -280,9 +278,9 @@ def test_dump_multilabel(): dump_svmlight_file(X, y, f, multilabel=True) f.seek(0) # make sure it dumps multilabel correctly - assert_equal(f.readline(), b("1 0:1 2:3 4:5\n")) - assert_equal(f.readline(), b("0,2 \n")) - assert_equal(f.readline(), b("0,1 1:5 3:1\n")) + assert_equal(f.readline(), "1 0:1 2:3 4:5\n") + assert_equal(f.readline(), "0,2 \n") + assert_equal(f.readline(), "0,1 1:5 3:1\n") def test_dump_concise(): @@ -303,11 +301,11 @@ def test_dump_concise(): f.seek(0) # make sure it's using the most concise format possible assert_equal(f.readline(), - b("1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n")) - assert_equal(f.readline(), b("2.1 0:1000000000 1:2e+18 2:3e+27\n")) - assert_equal(f.readline(), b("3.01 \n")) - assert_equal(f.readline(), b("1.000000000000001 \n")) - assert_equal(f.readline(), b("1 \n")) + "1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n") + assert_equal(f.readline(), "2.1 0:1000000000 1:2e+18 2:3e+27\n") + assert_equal(f.readline(), "3.01 \n") + assert_equal(f.readline(), "1.000000000000001 \n") + assert_equal(f.readline(), "1 \n") f.seek(0) # make sure it's correct too :) X2, y2 = load_svmlight_file(f) @@ -329,7 +327,7 @@ def test_dump_comment(): assert_array_almost_equal(y, y2) # XXX we have to update this to support Python 3.x - utf8_comment = b("It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc") + utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc" f = BytesIO() assert_raises(UnicodeDecodeError, dump_svmlight_file, X, y, f, comment=utf8_comment) @@ -376,11 +374,11 @@ def test_dump_query_id(): def test_load_with_long_qid(): # load svmfile with longint qid attribute - data = b(""" + data = """ 1 qid:0 0:1 1:2 2:3 0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985 0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985 - 3 qid:9223372036854775807 0:1440446648 1:72048431380967004 2:236784985""") + 3 qid:9223372036854775807 0:1440446648 1:72048431380967004 2:236784985""" X, y, qid = load_svmlight_file(BytesIO(data), query_id=True) true_X = [[1, 2, 3], diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py index 719f198a958c8..ef2399e0d6041 100644 --- a/sklearn/ensemble/bagging.py +++ b/sklearn/ensemble/bagging.py @@ -183,7 +183,7 @@ def _parallel_predict_regression(estimators, estimators_features, X): estimators_features)) -class BaseBagging(with_metaclass(ABCMeta, BaseEnsemble)): +class BaseBagging(BaseEnsemble, metaclass=ABCMeta): """Base class for Bagging meta-estimator. Warning: This class should not be used directly. Use derived classes diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py index 0f1d7087ed501..1ca2ef8b20492 100644 --- a/sklearn/ensemble/base.py +++ b/sklearn/ensemble/base.py @@ -13,7 +13,6 @@ from ..base import MetaEstimatorMixin from ..utils import check_random_state from ..utils._joblib import effective_n_jobs -from ..externals import six from abc import ABCMeta, abstractmethod MAX_RAND_SEED = np.iinfo(np.int32).max @@ -58,8 +57,7 @@ def _set_random_states(estimator, random_state=None): estimator.set_params(**to_set) -class BaseEnsemble(six.with_metaclass(ABCMeta, BaseEstimator, - MetaEstimatorMixin)): +class BaseEnsemble(BaseEstimator, MetaEstimatorMixin, metaclass=ABCMeta): """Base class for all ensemble classes. Warning: This class should not be used directly. Use derived classes diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index f19bd8bbe3b44..789274278f7e1 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -242,7 +242,7 @@ def fit(self, X, y, sample_weight=None): if self.n_estimators == 'warn': warn("The default value of n_estimators will change from " - "10 in version 0.20 to 100 in 0.22.", FutureWarning) + "10 in version 0.20 to 100 in 0.22.", FutureWarning) self.n_estimators = 10 # Validate or convert input data @@ -394,8 +394,7 @@ def _accumulate_prediction(predict, X, out, lock): out[i] += prediction[i] -class ForestClassifier(six.with_metaclass(ABCMeta, BaseForest, - ClassifierMixin)): +class ForestClassifier(BaseForest, ClassifierMixin, metaclass=ABCMeta): """Base class for forest of trees-based classifiers. Warning: This class should not be used directly. Use derived classes diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index fbbfaed5ef9db..824ec3beccc4e 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -343,7 +343,7 @@ def _validate_vocabulary(self): raise ValueError(msg) vocabulary = vocab else: - indices = set(six.itervalues(vocabulary)) + indices = set(vocabulary.values()) if len(indices) != len(vocabulary): raise ValueError("Vocabulary contains repeated indices.") for i in range(len(vocabulary)): @@ -1124,7 +1124,7 @@ def get_feature_names(self): self._check_vocabulary() - return [t for t, i in sorted(six.iteritems(self.vocabulary_), + return [t for t, i in sorted(self.vocabulary_.items(), key=itemgetter(1))] diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index 0df0d1197dde9..7d83b5db0fd7e 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -115,7 +115,7 @@ def __eq__(self, other): self.fixed == other.fixed) -class Kernel(six.with_metaclass(ABCMeta)): +class Kernel(metaclass=ABCMeta): """Base class for all kernels. .. versionadded:: 0.18 diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index 5dd2d86c94545..d2f4578856a31 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -19,8 +19,6 @@ from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_warns_message -from sklearn.externals.six import iteritems - from sklearn.metrics.pairwise import euclidean_distances from sklearn.metrics.pairwise import manhattan_distances from sklearn.metrics.pairwise import linear_kernel diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index d3f5ab0f5ba72..582f3d69bf6f9 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -25,7 +25,6 @@ from ..utils.validation import _num_samples, column_or_1d from ..utils.validation import check_array from ..utils.multiclass import type_of_target -from ..externals.six import with_metaclass from ..utils.fixes import signature, comb from ..utils.fixes import _Iterable as Iterable from ..base import _pprint @@ -59,7 +58,7 @@ "in version 0.22.") -class BaseCrossValidator(with_metaclass(ABCMeta)): +class BaseCrossValidator(metaclass=ABCMeta): """Base class for all cross-validators Implementations must define `_iter_test_masks` or `_iter_test_indices`. @@ -265,7 +264,7 @@ def get_n_splits(self, X, y=None, groups=None): return int(comb(_num_samples(X), self.p, exact=True)) -class _BaseKFold(with_metaclass(ABCMeta, BaseCrossValidator)): +class _BaseKFold(BaseCrossValidator, metaclass=ABCMeta): """Base class for KFold, GroupKFold, and StratifiedKFold""" @abstractmethod @@ -1059,7 +1058,7 @@ def split(self, X, y=None, groups=None): return super(LeavePGroupsOut, self).split(X, y, groups) -class _RepeatedSplits(with_metaclass(ABCMeta)): +class _RepeatedSplits(metaclass=ABCMeta): """Repeated splits for an arbitrary randomized CV splitter. Repeats splits for cross-validators n times with different randomization @@ -1264,7 +1263,7 @@ def __init__(self, n_splits=5, n_repeats=10, random_state=None): StratifiedKFold, n_repeats, random_state, n_splits=n_splits) -class BaseShuffleSplit(with_metaclass(ABCMeta)): +class BaseShuffleSplit(metaclass=ABCMeta): """Base class for ShuffleSplit and StratifiedShuffleSplit""" def __init__(self, n_splits=10, test_size="default", train_size=None, diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 69f05183f2fe8..e5fc27f79b76b 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -59,8 +59,7 @@ def _partial_fit_estimator(estimator, X, y, classes=None, sample_weight=None, return estimator -class MultiOutputEstimator(six.with_metaclass(ABCMeta, BaseEstimator, - MetaEstimatorMixin)): +class MultiOutputEstimator(BaseEstimator, MetaEstimatorMixin, metaclass=ABCMeta): @abstractmethod def __init__(self, estimator, n_jobs=None): self.estimator = estimator diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index 7581847d00c58..bac632fc7df65 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -291,8 +291,7 @@ def sparse_random_matrix(n_components, n_features, density='auto', return np.sqrt(1 / density) / np.sqrt(n_components) * components -class BaseRandomProjection(six.with_metaclass(ABCMeta, BaseEstimator, - TransformerMixin)): +class BaseRandomProjection(BaseEstimator, TransformerMixin, metaclass=ABCMeta): """Base class for random projections. Warning: This class should not be used directly. diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index 04aa6714e0711..6b04eb8256daa 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -72,8 +72,7 @@ from ..exceptions import ConvergenceWarning -class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator, - ClassifierMixin)): +class BaseLabelPropagation(BaseEstimator, ClassifierMixin, metaclass=ABCMeta): """Base class for label propagation module. Parameters diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py index 25e1fe825717f..e2d6ca58b5e05 100644 --- a/sklearn/utils/metaestimators.py +++ b/sklearn/utils/metaestimators.py @@ -29,8 +29,7 @@ def _get_params(self, attr, deep=True): out.update(estimators) for name, estimator in estimators: if hasattr(estimator, 'get_params'): - for key, value in six.iteritems( - estimator.get_params(deep=True)): + for key, value in estimator.get_params(deep=True).items(): out['%s__%s' % (name, key)] = value return out @@ -44,7 +43,7 @@ def _set_params(self, attr, **params): names = [] if items: names, _ = zip(*items) - for name in list(six.iterkeys(params)): + for name in list(params.keys()): if '__' not in name and name in names: self._replace_estimator(attr, name, params.pop(name)) # 3. Step parameters and other initialisation arguments From adfef757d58b0d772a2aa23c6aaa2191f2b508f3 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 13:01:53 -0500 Subject: [PATCH 11/29] getting rid of six (and python2) --- sklearn/cluster/mean_shift_.py | 2 +- sklearn/datasets/_svmlight_format.pyx | 2 -- sklearn/datasets/base.py | 2 +- sklearn/datasets/openml.py | 16 +++------------- sklearn/datasets/tests/test_lfw.py | 15 ++++++++------- sklearn/decomposition/fastica_.py | 4 ++-- sklearn/externals/joblib/_parallel_backends.py | 2 +- sklearn/externals/joblib/_store_backends.py | 2 +- sklearn/metrics/tests/test_pairwise.py | 2 +- sklearn/model_selection/tests/test_split.py | 3 +-- sklearn/tree/export.py | 2 +- sklearn/utils/tests/test_bench.py | 11 ----------- sklearn/utils/tests/test_murmurhash.py | 16 ++++++++-------- 13 files changed, 28 insertions(+), 51 deletions(-) delete mode 100644 sklearn/utils/tests/test_bench.py diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py index 89117164a63f8..ce5dac8b5a318 100644 --- a/sklearn/cluster/mean_shift_.py +++ b/sklearn/cluster/mean_shift_.py @@ -284,7 +284,7 @@ def get_bin_seeds(X, bin_size, min_bin_freq=1): bin_sizes[tuple(binned_point)] += 1 # Select only those bins as seeds which have enough members - bin_seeds = np.array([point for point, freq in bin_sizes.items()) if + bin_seeds = np.array([point for point, freq in bin_sizes.items() if freq >= min_bin_freq], dtype=np.float32) if len(bin_seeds) == len(X): warnings.warn("Binning data failed with provided bin_size=%f," diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx index bba5db9d3cf50..ea45920ad70f2 100644 --- a/sklearn/datasets/_svmlight_format.pyx +++ b/sklearn/datasets/_svmlight_format.pyx @@ -14,8 +14,6 @@ cimport numpy as np import numpy as np import scipy.sparse as sp -from ..externals.six import b - np.import_array() diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index e4580b56dc181..bb48f48c57e35 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -22,7 +22,7 @@ import numpy as np -from moves.urllib.request import urlretrieve +from urllib.request import urlretrieve RemoteFileMetadata = namedtuple('RemoteFileMetadata', ['filename', 'url', 'checksum']) diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py index 1f1fc158553fd..09c4c515a5745 100644 --- a/sklearn/datasets/openml.py +++ b/sklearn/datasets/openml.py @@ -21,7 +21,6 @@ from sklearn.externals import _arff from .base import get_data_home -from ..externals.six import str, PY2, BytesIO from urllib.error import HTTPError from ..utils import Bunch @@ -92,8 +91,6 @@ def is_gzip(_fsrc): if data_home is None: fsrc = urlopen(req) if is_gzip(fsrc): - if PY2: - fsrc = BytesIO(fsrc.read()) return gzip.GzipFile(fileobj=fsrc, mode='rb') return fsrc @@ -360,16 +357,9 @@ def _arff_load(): else: return_type = _arff.DENSE - if PY2: - arff_file = _arff.load( - response.read(), - encode_nominal=encode_nominal, - return_type=return_type, - ) - else: - arff_file = _arff.loads(response.read().decode('utf-8'), - encode_nominal=encode_nominal, - return_type=return_type) + arff_file = _arff.loads(response.read().decode('utf-8'), + encode_nominal=encode_nominal, + return_type=return_type) return arff_file return _arff_load() diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 75aecdfb999f1..56323c4aba266 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -70,30 +70,31 @@ def setup_module(): # add some random file pollution to test robustness with open(os.path.join(LFW_HOME, 'lfw_funneled', '.test.swp'), 'wb') as f: - f.write(six.b('Text file to be ignored by the dataset loader.')) + f.write('Text file to be ignored by the dataset loader.'.encode()) # generate some pairing metadata files using the same format as LFW with open(os.path.join(LFW_HOME, 'pairsDevTrain.txt'), 'wb') as f: - f.write(six.b("10\n")) + f.write("10\n").encode() more_than_two = [name for name, count in counts.items() if count >= 2] for i in range(5): name = random_state.choice(more_than_two) first, second = random_state.sample(range(counts[name]), 2) - f.write(six.b('%s\t%d\t%d\n' % (name, first, second))) + f.write('%s\t%d\t%d\n' % (name, first, second).encode()) for i in range(5): first_name, second_name = random_state.sample(FAKE_NAMES, 2) first_index = random_state.choice(np.arange(counts[first_name])) second_index = random_state.choice(np.arange(counts[second_name])) - f.write(six.b('%s\t%d\t%s\t%d\n' % (first_name, first_index, - second_name, second_index))) + f.write(('%s\t%d\t%s\t%d\n' % (first_name, first_index, + second_name, second_index)).encode() + ) with open(os.path.join(LFW_HOME, 'pairsDevTest.txt'), 'wb') as f: - f.write(six.b("Fake place holder that won't be tested")) + f.write("Fake place holder that won't be tested".encode()) with open(os.path.join(LFW_HOME, 'pairs.txt'), 'wb') as f: - f.write(six.b("Fake place holder that won't be tested")) + f.write("Fake place holder that won't be tested".encode()) def teardown_module(): diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py index 5995357c4f4a9..f9ee5e42fbbc8 100644 --- a/sklearn/decomposition/fastica_.py +++ b/sklearn/decomposition/fastica_.py @@ -74,7 +74,7 @@ def _ica_def(X, tol, g, fun_args, max_iter, w_init): w = w_init[j, :].copy() w /= np.sqrt((w ** 2).sum()) - for i in moves.range(max_iter): + for i in range(max_iter): gwtx, g_wtx = g(np.dot(w.T, X), fun_args) w1 = (X * gwtx).mean(axis=1) - g_wtx.mean() * w @@ -103,7 +103,7 @@ def _ica_par(X, tol, g, fun_args, max_iter, w_init): W = _sym_decorrelation(w_init) del w_init p_ = float(X.shape[1]) - for ii in moves.range(max_iter): + for ii in range(max_iter): gwtx, g_wtx = g(np.dot(W, X), fun_args) W1 = _sym_decorrelation(np.dot(gwtx, X.T) / p_ - g_wtx[:, np.newaxis] * W) diff --git a/sklearn/externals/joblib/_parallel_backends.py b/sklearn/externals/joblib/_parallel_backends.py index 0f0bcf0ab4213..dd62d508694a9 100644 --- a/sklearn/externals/joblib/_parallel_backends.py +++ b/sklearn/externals/joblib/_parallel_backends.py @@ -27,7 +27,7 @@ from .externals.loky import process_executor, cpu_count -class ParallelBackendBase(with_metaclass(ABCMeta)): +class ParallelBackendBase(metaclass=ABCMeta): """Helper abc which defines all methods a ParallelBackend must implement""" supports_timeout = False diff --git a/sklearn/externals/joblib/_store_backends.py b/sklearn/externals/joblib/_store_backends.py index 9196f0a7746a1..3e2c02d1d64fb 100644 --- a/sklearn/externals/joblib/_store_backends.py +++ b/sklearn/externals/joblib/_store_backends.py @@ -31,7 +31,7 @@ def concurrency_safe_write(object_to_write, filename, write_func): return temporary_filename -class StoreBackendBase(with_metaclass(ABCMeta)): +class StoreBackendBase(metaclass=ABCMeta): """Helper Abstract Base Class which defines all methods that a StorageBackend must implement.""" diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index d2f4578856a31..f76215d5e1bbf 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -296,7 +296,7 @@ def test_pairwise_kernels_filter_param(): assert_raises(TypeError, pairwise_kernels, X, Y, "rbf", **params) -@pytest.mark.parametrize('metric, func', iteritems(PAIRED_DISTANCES)) +@pytest.mark.parametrize('metric, func', PAIRED_DISTANCES.items()) def test_paired_distances(metric, func): # Test the pairwise_distance helper function. rng = np.random.RandomState(0) diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index ab05e01f71351..ebdd3ab17225c 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -545,8 +545,7 @@ def test_shuffle_split(): ss1 = ShuffleSplit(test_size=0.2, random_state=0).split(X) ss2 = ShuffleSplit(test_size=2, random_state=0).split(X) ss3 = ShuffleSplit(test_size=np.int32(2), random_state=0).split(X) - for typ in six.integer_types: - ss4 = ShuffleSplit(test_size=typ(2), random_state=0).split(X) + ss4 = ShuffleSplit(test_size=int(2), random_state=0).split(X) for t1, t2, t3, t4 in zip(ss1, ss2, ss3, ss4): assert_array_equal(t1[0], t2[0]) assert_array_equal(t2[0], t3[0]) diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py index 18052d6233d09..017275cfb1c19 100644 --- a/sklearn/tree/export.py +++ b/sklearn/tree/export.py @@ -11,7 +11,7 @@ # Li Li # License: BSD 3 clause import warnings -from IO import StringIO +from io import StringIO from numbers import Integral diff --git a/sklearn/utils/tests/test_bench.py b/sklearn/utils/tests/test_bench.py deleted file mode 100644 index c04ba4ad25eba..0000000000000 --- a/sklearn/utils/tests/test_bench.py +++ /dev/null @@ -1,11 +0,0 @@ - -import datetime - -from sklearn.utils.bench import total_seconds -from sklearn.utils.testing import assert_equal - - -def test_total_seconds(): - delta = (datetime.datetime(2012, 1, 1, 5, 5, 1) - - datetime.datetime(2012, 1, 1, 5, 5, 4)) - assert_equal(86397, total_seconds(delta)) diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py index cb7899af88ab3..6f57b06c6bb4e 100644 --- a/sklearn/utils/tests/test_murmurhash.py +++ b/sklearn/utils/tests/test_murmurhash.py @@ -43,19 +43,19 @@ def test_mmhash3_int_array(): def test_mmhash3_bytes(): - assert_equal(murmurhash3_32(b('foo'), 0), -156908512) - assert_equal(murmurhash3_32(b('foo'), 42), -1322301282) + assert_equal(murmurhash3_32('foo'.encode(), 0), -156908512) + assert_equal(murmurhash3_32('foo'.encode(), 42), -1322301282) - assert_equal(murmurhash3_32(b('foo'), 0, positive=True), 4138058784) - assert_equal(murmurhash3_32(b('foo'), 42, positive=True), 2972666014) + assert_equal(murmurhash3_32('foo'.encode(), 0, positive=True), 4138058784) + assert_equal(murmurhash3_32('foo'.encode(), 42, positive=True), 2972666014) def test_mmhash3_unicode(): - assert_equal(murmurhash3_32(u('foo'), 0), -156908512) - assert_equal(murmurhash3_32(u('foo'), 42), -1322301282) + assert_equal(murmurhash3_32('foo', 0), -156908512) + assert_equal(murmurhash3_32('foo', 42), -1322301282) - assert_equal(murmurhash3_32(u('foo'), 0, positive=True), 4138058784) - assert_equal(murmurhash3_32(u('foo'), 42, positive=True), 2972666014) + assert_equal(murmurhash3_32('foo', 0, positive=True), 4138058784) + assert_equal(murmurhash3_32('foo', 42, positive=True), 2972666014) def test_no_collision_on_byte_range(): From aa3f485e4efc3d4369def4e807e6950a99697ee8 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 13:03:46 -0500 Subject: [PATCH 12/29] another moves --- sklearn/decomposition/tests/test_fastica.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 313a13ad8333b..5efda7d67a178 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -129,7 +129,7 @@ def g_test(x): ica = FastICA(fun=fn, algorithm=algo, random_state=0) assert_raises(ValueError, ica.fit, m.T) - assert_raises(TypeError, FastICA(fun=moves.range(10)).fit, m.T) + assert_raises(TypeError, FastICA(fun=range(10)).fit, m.T) def test_fastica_nowhiten(): From 6a5815b9e006073b784788ed7fe07793d578aa85 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 16:24:25 -0500 Subject: [PATCH 13/29] build on 32bit python3.5 --- appveyor.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/appveyor.yml b/appveyor.yml index 531aaca31aec5..bd59d727ffdc7 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,6 +22,11 @@ environment: PYTHON_ARCH: "64" CHECK_WARNINGS: "true" + - PYTHON: "C:\\Python35" + PYTHON_VERSION: "3.5.x" + PYTHON_ARCH: "32" + + # Because we only have a single worker, we don't want to waste precious # appveyor CI time and make other PRs wait for repeated failures in a failing # PR. The following option cancels pending jobs in a given PR after the first From 906576e1ff2fccec2ef12277705907f7d85fcb51 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 16:26:32 -0500 Subject: [PATCH 14/29] remove b in pyx --- sklearn/datasets/_svmlight_format.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx index ea45920ad70f2..7a1ad9dc93cf7 100644 --- a/sklearn/datasets/_svmlight_format.pyx +++ b/sklearn/datasets/_svmlight_format.pyx @@ -31,7 +31,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based, cdef char *line_cstr cdef int idx, prev_idx cdef Py_ssize_t i - cdef bytes qid_prefix = b('qid') + cdef bytes qid_prefix = 'qid'.encode() cdef Py_ssize_t n_features cdef long long offset_max = offset + length if length > 0 else -1 From 403158072160ab1776643f8f836edefcbf0f68e5 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 16:30:36 -0500 Subject: [PATCH 15/29] minor six fixes --- sklearn/ensemble/partial_dependence.py | 2 +- sklearn/feature_selection/mutual_info_.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py index 74a6497e42191..9460b29184df7 100644 --- a/sklearn/ensemble/partial_dependence.py +++ b/sklearn/ensemble/partial_dependence.py @@ -283,7 +283,7 @@ def convert_feature(fx): # convert features into a seq of int tuples tmp_features = [] for fxs in features: - if isinstance(fxs, (numbers.Integral,) + str): + if isinstance(fxs, (numbers.Integral, str)): fxs = (fxs,) try: fxs = np.array([convert_feature(fx) for fx in fxs], dtype=np.int32) diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py index 7c5c247eb36ef..057a696077e8f 100644 --- a/sklearn/feature_selection/mutual_info_.py +++ b/sklearn/feature_selection/mutual_info_.py @@ -285,7 +285,7 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False, y += 1e-10 * np.maximum(1, np.mean(np.abs(y))) * rng.randn(n_samples) mi = [_compute_mi(x, y, discrete_feature, discrete_target, n_neighbors) for - x, discrete_feature in moves.zip(_iterate_columns(X), discrete_mask)] + x, discrete_feature zip(_iterate_columns(X), discrete_mask)] return np.array(mi) From 8aa75f13f66a12b41ff88fdf6b9501654ef18032 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Nov 2018 16:31:41 -0500 Subject: [PATCH 16/29] typo --- sklearn/feature_selection/mutual_info_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py index 057a696077e8f..ac5492317bc50 100644 --- a/sklearn/feature_selection/mutual_info_.py +++ b/sklearn/feature_selection/mutual_info_.py @@ -285,7 +285,7 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False, y += 1e-10 * np.maximum(1, np.mean(np.abs(y))) * rng.randn(n_samples) mi = [_compute_mi(x, y, discrete_feature, discrete_target, n_neighbors) for - x, discrete_feature zip(_iterate_columns(X), discrete_mask)] + x, discrete_feature in zip(_iterate_columns(X), discrete_mask)] return np.array(mi) From 28c9fd779d30a3d1b458e1c3c1fc51190f181224 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 14 Dec 2018 17:02:12 -0500 Subject: [PATCH 17/29] unused imports, minor cleanups --- sklearn/cluster/tests/test_k_means.py | 1 - sklearn/compose/_column_transformer.py | 2 +- sklearn/datasets/kddcup99.py | 1 - sklearn/datasets/tests/test_svmlight_format.py | 13 ++++++------- sklearn/ensemble/bagging.py | 1 - sklearn/metrics/cluster/tests/test_common.py | 2 +- sklearn/model_selection/_search.py | 1 - sklearn/preprocessing/_discretization.py | 1 - sklearn/tests/test_discriminant_analysis.py | 1 - sklearn/tests/test_metaestimators.py | 2 +- sklearn/utils/extmath.py | 1 - sklearn/utils/tests/test_deprecation.py | 2 -- sklearn/utils/tests/test_extmath.py | 2 -- sklearn/utils/tests/test_fixes.py | 1 - sklearn/utils/tests/test_multiclass.py | 4 ++-- sklearn/utils/tests/test_validation.py | 1 - 16 files changed, 11 insertions(+), 25 deletions(-) diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index 245ed3d97ea95..37571d427002b 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -9,7 +9,6 @@ from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import SkipTest from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 71944f3721e8c..c2fad0d76870b 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -691,7 +691,7 @@ def _validate_transformers(transformers): return True for t in transformers: - if isinstance(t, six.string_types) and t in ('drop', 'passthrough'): + if isinstance(t, str) and t in ('drop', 'passthrough'): continue if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr(t, "transform")): diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index 713cb19beee36..baa84bab445d5 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -8,7 +8,6 @@ """ -import sys import errno from gzip import GzipFile import logging diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index e8242e1195e26..05a958bd0c88b 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -17,7 +17,6 @@ from sklearn.utils.testing import assert_raises_regex from sklearn.utils.testing import assert_in from sklearn.utils.testing import fails_if_pypy -from sklearn.utils.fixes import sp_version import sklearn from sklearn.datasets import (load_svmlight_file, load_svmlight_files, @@ -43,8 +42,8 @@ def test_load_svmlight_file(): # test X's non-zero values for i, j, val in ((0, 2, 2.5), (0, 10, -5.2), (0, 15, 1.5), - (1, 5, 1.0), (1, 12, -3), - (2, 20, 27)): + (1, 5, 1.0), (1, 12, -3), + (2, 20, 27)): assert_equal(X[i, j], val) @@ -106,7 +105,7 @@ def test_load_svmlight_file_n_features(): # test X's non-zero values for i, j, val in ((0, 2, 2.5), (0, 10, -5.2), - (1, 5, 1.0), (1, 12, -3)): + (1, 5, 1.0), (1, 12, -3)): assert_equal(X[i, j], val) @@ -376,9 +375,9 @@ def test_load_with_long_qid(): X, y, qid = load_svmlight_file(BytesIO(data), query_id=True) true_X = [[1, 2, 3], - [1440446648, 72048431380967004, 236784985], - [1440446648, 72048431380967004, 236784985], - [1440446648, 72048431380967004, 236784985]] + [1440446648, 72048431380967004, 236784985], + [1440446648, 72048431380967004, 236784985], + [1440446648, 72048431380967004, 236784985]] true_y = [1, 0, 0, 3] trueQID = [0, 72048431380967004, -9223372036854775807, 9223372036854775807] diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py index ef2399e0d6041..63a7721f905bc 100644 --- a/sklearn/ensemble/bagging.py +++ b/sklearn/ensemble/bagging.py @@ -14,7 +14,6 @@ from .base import BaseEnsemble, _partition_estimators from ..base import ClassifierMixin, RegressorMixin from ..utils._joblib import Parallel, delayed -from ..externals.six import with_metaclass from ..metrics import r2_score, accuracy_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor from ..utils import check_random_state, check_X_y, check_array, column_or_1d diff --git a/sklearn/metrics/cluster/tests/test_common.py b/sklearn/metrics/cluster/tests/test_common.py index d3fde5c3b5565..810a573b12e63 100644 --- a/sklearn/metrics/cluster/tests/test_common.py +++ b/sklearn/metrics/cluster/tests/test_common.py @@ -15,7 +15,7 @@ from sklearn.metrics.cluster import calinski_harabasz_score from sklearn.metrics.cluster import davies_bouldin_score -from sklearn.utils.testing import assert_allclose, ignore_warnings +from sklearn.utils.testing import assert_allclose # Dictionaries of metrics diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index bafb43efb98ba..d12e41930a001 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -31,7 +31,6 @@ from ..exceptions import NotFittedError from ..utils._joblib import Parallel, delayed from ..utils import check_random_state -from ..utils.fixes import sp_version from ..utils.fixes import MaskedArray from ..utils.fixes import _Mapping as Mapping, _Sequence as Sequence from ..utils.fixes import _Iterable as Iterable diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index 2ba1b019fc12e..b57e03230f4f1 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -17,7 +17,6 @@ from ..utils.validation import check_array from ..utils.validation import check_is_fitted from ..utils.validation import FLOAT_DTYPES -from ..utils.fixes import np_version class KBinsDiscretizer(BaseEstimator, TransformerMixin): diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index e7b14e2b1f9f2..3cf4f5c016f79 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -6,7 +6,6 @@ from sklearn.utils import check_random_state from sklearn.utils.testing import (assert_array_equal, assert_no_warnings, assert_warns_message) -from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index da9ccb4314801..47de7ae374b74 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -104,7 +104,7 @@ def score(self, X, y, *args, **kwargs): self._check_fit() return 1.0 - methods = [k for k in iterkeys(SubEstimator.__dict__) + methods = [k for k in SubEstimator.__dict__.keys() if not k.startswith('_') and not k.startswith('fit')] methods.sort() diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index bb2c2455d6201..fef2c7aff7971 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -18,7 +18,6 @@ from scipy import linalg, sparse from . import check_random_state -from .fixes import np_version from ._logistic_sigmoid import _log_logistic_sigmoid from .sparsefuncs_fast import csr_row_norms from .validation import check_array diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py index f6049debeb20a..c8d8484d71bc4 100644 --- a/sklearn/utils/tests/test_deprecation.py +++ b/sklearn/utils/tests/test_deprecation.py @@ -2,13 +2,11 @@ # License: BSD 3 clause -import sys import pickle from sklearn.utils.deprecation import _is_deprecated from sklearn.utils.deprecation import deprecated from sklearn.utils.testing import assert_warns_message -from sklearn.utils.testing import SkipTest @deprecated('qwerty') diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 81b0044c804f0..69cb83804dced 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -20,8 +20,6 @@ from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import skip_if_32bit -from sklearn.utils.testing import SkipTest -from sklearn.utils.fixes import np_version from sklearn.utils.extmath import density from sklearn.utils.extmath import randomized_svd diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py index 0dd97c03cb032..b253fc1f54cec 100644 --- a/sklearn/utils/tests/test_fixes.py +++ b/sklearn/utils/tests/test_fixes.py @@ -8,7 +8,6 @@ import numpy as np import pytest -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_allclose diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index be5989b14e152..443988ddc3ecb 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -224,7 +224,7 @@ def test_unique_labels_mixed_types(): def test_is_multilabel(): - for group, group_examples in iteritems(EXAMPLES): + for group, group_examples in EXAMPLES.items(): if group in ['multilabel-indicator']: dense_exp = True else: @@ -277,7 +277,7 @@ def test_check_classification_targets(): # @ignore_warnings def test_type_of_target(): - for group, group_examples in iteritems(EXAMPLES): + for group, group_examples in EXAMPLES.items(): for example in group_examples: assert_equal(type_of_target(example), group, msg=('type_of_target(%r) should be %r, got %r' diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index ec8b10ce2b54e..99019e25c0c81 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -10,7 +10,6 @@ from pytest import importorskip import numpy as np import scipy.sparse as sp -from scipy import __version__ as scipy_version from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises From 03bf639e497619a4838498d3057a7556147be222 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 14 Dec 2018 17:13:03 -0500 Subject: [PATCH 18/29] remove six import from openml test --- sklearn/datasets/tests/test_openml.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py index 4a858899e2c31..4cda24c7398b2 100644 --- a/sklearn/datasets/tests/test_openml.py +++ b/sklearn/datasets/tests/test_openml.py @@ -17,7 +17,6 @@ _retry_with_clean_cache) from sklearn.utils.testing import (assert_warns_message, assert_raise_message) -from sklearn.externals.six import str from urllib.error import HTTPError from sklearn.datasets.tests.test_common import check_return_X_y from functools import partial From 14c321995a537421e3f2a13ce850ceb9a8cf0a6b Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 14 Dec 2018 17:14:44 -0500 Subject: [PATCH 19/29] remove six from bicluster example --- examples/bicluster/plot_bicluster_newsgroups.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py index 12d42e23a0f19..430c37a651197 100644 --- a/examples/bicluster/plot_bicluster_newsgroups.py +++ b/examples/bicluster/plot_bicluster_newsgroups.py @@ -32,7 +32,6 @@ from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.cluster import MiniBatchKMeans -from sklearn.externals.six import iteritems from sklearn.datasets.twenty_newsgroups import fetch_20newsgroups from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.cluster import v_measure_score @@ -116,7 +115,7 @@ def most_common(d): Like Counter.most_common in Python >=2.7. """ - return sorted(iteritems(d), key=operator.itemgetter(1), reverse=True) + return sorted(d.items(), key=operator.itemgetter(1), reverse=True) bicluster_ncuts = list(bicluster_ncut(i) From c450e95c27b42f45a248df2e4c0de5d03ff831f4 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 14 Dec 2018 17:18:56 -0500 Subject: [PATCH 20/29] revert externals --- sklearn/externals/_arff.py | 4 ++-- sklearn/externals/joblib/_parallel_backends.py | 2 +- sklearn/externals/joblib/_store_backends.py | 2 +- sklearn/externals/six.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py index 2552e77b894a6..82f504542f9a9 100644 --- a/sklearn/externals/_arff.py +++ b/sklearn/externals/_arff.py @@ -431,7 +431,7 @@ def decode_data(self, s, conversors): raise BadDataFormat(s) # XXX: int 0 is used for implicit values, not '0' values = [values[i] if i in values else 0 for i in - range(len(conversors))] + xrange(len(conversors))] else: if len(values) != len(conversors): raise BadDataFormat(s) @@ -524,7 +524,7 @@ def encode_data(self, data, attributes): data = data.data # Check if the rows are sorted - if not all(row[i] <= row[i + 1] for i in range(len(row) - 1)): + if not all(row[i] <= row[i + 1] for i in xrange(len(row) - 1)): raise ValueError("liac-arff can only output COO matrices with " "sorted rows.") diff --git a/sklearn/externals/joblib/_parallel_backends.py b/sklearn/externals/joblib/_parallel_backends.py index dd62d508694a9..0f0bcf0ab4213 100644 --- a/sklearn/externals/joblib/_parallel_backends.py +++ b/sklearn/externals/joblib/_parallel_backends.py @@ -27,7 +27,7 @@ from .externals.loky import process_executor, cpu_count -class ParallelBackendBase(metaclass=ABCMeta): +class ParallelBackendBase(with_metaclass(ABCMeta)): """Helper abc which defines all methods a ParallelBackend must implement""" supports_timeout = False diff --git a/sklearn/externals/joblib/_store_backends.py b/sklearn/externals/joblib/_store_backends.py index 3e2c02d1d64fb..9196f0a7746a1 100644 --- a/sklearn/externals/joblib/_store_backends.py +++ b/sklearn/externals/joblib/_store_backends.py @@ -31,7 +31,7 @@ def concurrency_safe_write(object_to_write, filename, write_func): return temporary_filename -class StoreBackendBase(metaclass=ABCMeta): +class StoreBackendBase(with_metaclass(ABCMeta)): """Helper Abstract Base Class which defines all methods that a StorageBackend must implement.""" diff --git a/sklearn/externals/six.py b/sklearn/externals/six.py index 5b7dc61f98e7e..85898ec71275f 100644 --- a/sklearn/externals/six.py +++ b/sklearn/externals/six.py @@ -33,7 +33,7 @@ PY3 = sys.version_info[0] == 3 if PY3: - str = str, + string_types = str, integer_types = int, class_types = type, text_type = str @@ -41,7 +41,7 @@ MAXSIZE = sys.maxsize else: - str = basestring, + string_types = basestring, integer_types = (int, long) class_types = (type, types.ClassType) text_type = unicode From 559db2b55139b91002a31da642bb3febfba86ed5 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 14 Dec 2018 17:24:50 -0500 Subject: [PATCH 21/29] fix some encoding stuff --- sklearn/datasets/svmlight_format.py | 15 +++++----- .../datasets/tests/test_svmlight_format.py | 28 +++++++++---------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py index 60e3a3961655e..df3c6dba98e29 100644 --- a/sklearn/datasets/svmlight_format.py +++ b/sklearn/datasets/svmlight_format.py @@ -341,13 +341,14 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id): line_pattern += " %s\n" if comment: - f.write("# Generated by dump_svmlight_file from scikit-learn %s\n" - % __version__) - f.write("# Column indices are %s-based\n" - % ["zero", "one"][one_based]) + f.write(("# Generated by dump_svmlight_file from scikit-learn %s\n" + % __version__).encode()) + f.write(("# Column indices are %s-based\n" + % ["zero", "one"][one_based]).encode()) - f.write("#\n") - f.writelines("# %s\n" % line for line in comment.splitlines()) + f.write("#\n".encode()) + f.writelines((("# %s\n" % line).encode() + for line in comment.splitlines())) for i in range(X.shape[0]): if X_is_sp: @@ -437,7 +438,7 @@ def dump_svmlight_file(X, y, f, zero_based=True, comment=None, query_id=None, comment.decode("ascii") # just for the exception else: comment = comment.encode("utf-8") - if "\0" in comment: + if "\0".encode() in comment: raise ValueError("comment string contains NUL byte") yval = check_array(y, accept_sparse='csr', ensure_2d=False) diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index 05a958bd0c88b..67fd4f1321058 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -150,13 +150,13 @@ def test_load_invalid_order_file(): def test_load_zero_based(): - f = BytesIO("-1 4:1.\n1 0:1\n") + f = BytesIO("-1 4:1.\n1 0:1\n".encode()) assert_raises(ValueError, load_svmlight_file, f, zero_based=False) def test_load_zero_based_auto(): - data1 = "-1 1:1 2:2 3:3\n" - data2 = "-1 0:0 1:1\n" + data1 = "-1 1:1 2:2 3:3\n".encode() + data2 = "-1 0:0 1:1\n".encode() f1 = BytesIO(data1) X, y = load_svmlight_file(f1, zero_based="auto") @@ -174,7 +174,7 @@ def test_load_with_qid(): data = """ 3 qid:1 1:0.53 2:0.12 2 qid:1 1:0.13 2:0.1 - 7 qid:2 1:0.87 2:0.12""" + 7 qid:2 1:0.87 2:0.12""".encode() X, y = load_svmlight_file(BytesIO(data), query_id=False) assert_array_equal(y, [3, 2, 7]) assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]]) @@ -271,9 +271,9 @@ def test_dump_multilabel(): dump_svmlight_file(X, y, f, multilabel=True) f.seek(0) # make sure it dumps multilabel correctly - assert_equal(f.readline(), "1 0:1 2:3 4:5\n") - assert_equal(f.readline(), "0,2 \n") - assert_equal(f.readline(), "0,1 1:5 3:1\n") + assert_equal(f.readline(), "1 0:1 2:3 4:5\n".encode()) + assert_equal(f.readline(), "0,2 \n".encode()) + assert_equal(f.readline(), "0,1 1:5 3:1\n".encode()) def test_dump_concise(): @@ -294,11 +294,11 @@ def test_dump_concise(): f.seek(0) # make sure it's using the most concise format possible assert_equal(f.readline(), - "1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n") - assert_equal(f.readline(), "2.1 0:1000000000 1:2e+18 2:3e+27\n") - assert_equal(f.readline(), "3.01 \n") - assert_equal(f.readline(), "1.000000000000001 \n") - assert_equal(f.readline(), "1 \n") + "1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n".encode()) + assert_equal(f.readline(), "2.1 0:1000000000 1:2e+18 2:3e+27\n".encode()) + assert_equal(f.readline(), "3.01 \n".encode()) + assert_equal(f.readline(), "1.000000000000001 \n".encode()) + assert_equal(f.readline(), "1 \n".encode()) f.seek(0) # make sure it's correct too :) X2, y2 = load_svmlight_file(f) @@ -320,7 +320,7 @@ def test_dump_comment(): assert_array_almost_equal(y, y2) # XXX we have to update this to support Python 3.x - utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc" + utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc".encode() f = BytesIO() assert_raises(UnicodeDecodeError, dump_svmlight_file, X, y, f, comment=utf8_comment) @@ -371,7 +371,7 @@ def test_load_with_long_qid(): 1 qid:0 0:1 1:2 2:3 0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985 0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985 - 3 qid:9223372036854775807 0:1440446648 1:72048431380967004 2:236784985""" + 3 qid:9223372036854775807 0:1440446648 1:72048431380967004 2:236784985""".encode() X, y, qid = load_svmlight_file(BytesIO(data), query_id=True) true_X = [[1, 2, 3], From fb6a96a9100c39e0ca9e5372536180f090fcb4a6 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 14 Dec 2018 17:33:05 -0500 Subject: [PATCH 22/29] fix more bytes issues/ typos --- sklearn/datasets/lfw.py | 2 +- sklearn/datasets/tests/test_base.py | 4 ++-- sklearn/datasets/tests/test_lfw.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py index 756a8045773bb..52f69bdb23498 100644 --- a/sklearn/datasets/lfw.py +++ b/sklearn/datasets/lfw.py @@ -368,7 +368,7 @@ def _fetch_lfw_pairs(index_file_path, data_folder_path, slice_=None, # parse the index file to find the number of pairs to be able to allocate # the right amount of memory before starting to decode the jpeg files with open(index_file_path, 'rb') as index_file: - split_lines = [ln.strip().split('\t') for ln in index_file] + split_lines = [ln.decode().strip().split('\t') for ln in index_file] pair_specs = [sl for sl in split_lines if len(sl) > 2] n_pairs = len(pair_specs) diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 2ae238b1ef5cf..78c0c11663782 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -54,7 +54,7 @@ def test_category_dir_1(load_files_root): test_category_dir1 = tempfile.mkdtemp(dir=load_files_root) sample_file = tempfile.NamedTemporaryFile(dir=test_category_dir1, delete=False) - sample_file.write("Hello World!\n") + sample_file.write("Hello World!\n".encode()) sample_file.close() yield str(test_category_dir1) _remove_dir(test_category_dir1) @@ -95,7 +95,7 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2, assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 2) assert_equal(res.DESCR, None) - assert_equal(res.data, ["Hello World!\n"]) + assert_equal(res.data, ["Hello World!\n".encode()]) def test_load_files_w_categories_desc_and_encoding( diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 56323c4aba266..b5c6f8d7409ea 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -74,13 +74,13 @@ def setup_module(): # generate some pairing metadata files using the same format as LFW with open(os.path.join(LFW_HOME, 'pairsDevTrain.txt'), 'wb') as f: - f.write("10\n").encode() + f.write("10\n".encode()) more_than_two = [name for name, count in counts.items() if count >= 2] for i in range(5): name = random_state.choice(more_than_two) first, second = random_state.sample(range(counts[name]), 2) - f.write('%s\t%d\t%d\n' % (name, first, second).encode()) + f.write(('%s\t%d\t%d\n' % (name, first, second)).encode()) for i in range(5): first_name, second_name = random_state.sample(FAKE_NAMES, 2) From e414e6994880a144cafbf3a3e376b08ef6d3b069 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 28 Dec 2018 13:41:33 -0500 Subject: [PATCH 23/29] undo encode() changes --- sklearn/datasets/_svmlight_format.pyx | 2 +- sklearn/datasets/svmlight_format.py | 15 ++++----- sklearn/datasets/tests/test_lfw.py | 5 ++- .../datasets/tests/test_svmlight_format.py | 32 +++++++++---------- sklearn/utils/tests/test_murmurhash.py | 8 ++--- 5 files changed, 30 insertions(+), 32 deletions(-) diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx index 7a1ad9dc93cf7..ea45920ad70f2 100644 --- a/sklearn/datasets/_svmlight_format.pyx +++ b/sklearn/datasets/_svmlight_format.pyx @@ -31,7 +31,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based, cdef char *line_cstr cdef int idx, prev_idx cdef Py_ssize_t i - cdef bytes qid_prefix = 'qid'.encode() + cdef bytes qid_prefix = b('qid') cdef Py_ssize_t n_features cdef long long offset_max = offset + length if length > 0 else -1 diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py index df3c6dba98e29..2c2028b53ae4b 100644 --- a/sklearn/datasets/svmlight_format.py +++ b/sklearn/datasets/svmlight_format.py @@ -341,14 +341,13 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id): line_pattern += " %s\n" if comment: - f.write(("# Generated by dump_svmlight_file from scikit-learn %s\n" - % __version__).encode()) - f.write(("# Column indices are %s-based\n" - % ["zero", "one"][one_based]).encode()) + f.write(b"# Generated by dump_svmlight_file from scikit-learn %s\n" + % __version__) + f.write(b"# Column indices are %s-based\n" + % ["zero", "one"][one_based]) - f.write("#\n".encode()) - f.writelines((("# %s\n" % line).encode() - for line in comment.splitlines())) + f.write(b"#\n") + f.writelines(b"# %s\n" % line for line in comment.splitlines()) for i in range(X.shape[0]): if X_is_sp: @@ -438,7 +437,7 @@ def dump_svmlight_file(X, y, f, zero_based=True, comment=None, query_id=None, comment.decode("ascii") # just for the exception else: comment = comment.encode("utf-8") - if "\0".encode() in comment: + if b"\0" in comment: raise ValueError("comment string contains NUL byte") yval = check_array(y, accept_sparse='csr', ensure_2d=False) diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index b5c6f8d7409ea..68c7f0a6d6b37 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -86,9 +86,8 @@ def setup_module(): first_name, second_name = random_state.sample(FAKE_NAMES, 2) first_index = random_state.choice(np.arange(counts[first_name])) second_index = random_state.choice(np.arange(counts[second_name])) - f.write(('%s\t%d\t%s\t%d\n' % (first_name, first_index, - second_name, second_index)).encode() - ) + f.write(b'%s\t%d\t%s\t%d\n' % (first_name, first_index, + second_name, second_index)) with open(os.path.join(LFW_HOME, 'pairsDevTest.txt'), 'wb') as f: f.write("Fake place holder that won't be tested".encode()) diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index 67fd4f1321058..eac9bc01fac73 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -150,13 +150,13 @@ def test_load_invalid_order_file(): def test_load_zero_based(): - f = BytesIO("-1 4:1.\n1 0:1\n".encode()) + f = BytesIO(b"-1 4:1.\n1 0:1\n") assert_raises(ValueError, load_svmlight_file, f, zero_based=False) def test_load_zero_based_auto(): - data1 = "-1 1:1 2:2 3:3\n".encode() - data2 = "-1 0:0 1:1\n".encode() + data1 = b"-1 1:1 2:2 3:3\n" + data2 = b"-1 0:0 1:1\n" f1 = BytesIO(data1) X, y = load_svmlight_file(f1, zero_based="auto") @@ -171,10 +171,10 @@ def test_load_zero_based_auto(): def test_load_with_qid(): # load svmfile with qid attribute - data = """ + data = b""" 3 qid:1 1:0.53 2:0.12 2 qid:1 1:0.13 2:0.1 - 7 qid:2 1:0.87 2:0.12""".encode() + 7 qid:2 1:0.87 2:0.12""" X, y = load_svmlight_file(BytesIO(data), query_id=False) assert_array_equal(y, [3, 2, 7]) assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]]) @@ -271,9 +271,9 @@ def test_dump_multilabel(): dump_svmlight_file(X, y, f, multilabel=True) f.seek(0) # make sure it dumps multilabel correctly - assert_equal(f.readline(), "1 0:1 2:3 4:5\n".encode()) - assert_equal(f.readline(), "0,2 \n".encode()) - assert_equal(f.readline(), "0,1 1:5 3:1\n".encode()) + assert_equal(f.readline(), b"1 0:1 2:3 4:5\n") + assert_equal(f.readline(), b"0,2 \n") + assert_equal(f.readline(), b"0,1 1:5 3:1\n") def test_dump_concise(): @@ -294,11 +294,11 @@ def test_dump_concise(): f.seek(0) # make sure it's using the most concise format possible assert_equal(f.readline(), - "1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n".encode()) - assert_equal(f.readline(), "2.1 0:1000000000 1:2e+18 2:3e+27\n".encode()) - assert_equal(f.readline(), "3.01 \n".encode()) - assert_equal(f.readline(), "1.000000000000001 \n".encode()) - assert_equal(f.readline(), "1 \n".encode()) + b"1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n") + assert_equal(f.readline(), b"2.1 0:1000000000 1:2e+18 2:3e+27\n") + assert_equal(f.readline(), b"3.01 \n") + assert_equal(f.readline(), b"1.000000000000001 \n") + assert_equal(f.readline(), b"1 \n") f.seek(0) # make sure it's correct too :) X2, y2 = load_svmlight_file(f) @@ -320,7 +320,7 @@ def test_dump_comment(): assert_array_almost_equal(y, y2) # XXX we have to update this to support Python 3.x - utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc".encode() + utf8_comment = b"It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc" f = BytesIO() assert_raises(UnicodeDecodeError, dump_svmlight_file, X, y, f, comment=utf8_comment) @@ -367,11 +367,11 @@ def test_dump_query_id(): def test_load_with_long_qid(): # load svmfile with longint qid attribute - data = """ + data = b""" 1 qid:0 0:1 1:2 2:3 0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985 0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985 - 3 qid:9223372036854775807 0:1440446648 1:72048431380967004 2:236784985""".encode() + 3 qid:9223372036854775807 0:1440446648 1:72048431380967004 2:236784985""" X, y, qid = load_svmlight_file(BytesIO(data), query_id=True) true_X = [[1, 2, 3], diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py index a3d6df221bf1a..6066012fa0162 100644 --- a/sklearn/utils/tests/test_murmurhash.py +++ b/sklearn/utils/tests/test_murmurhash.py @@ -43,11 +43,11 @@ def test_mmhash3_int_array(): def test_mmhash3_bytes(): - assert_equal(murmurhash3_32('foo'.encode(), 0), -156908512) - assert_equal(murmurhash3_32('foo'.encode(), 42), -1322301282) + assert_equal(murmurhash3_32(b'foo', 0), -156908512) + assert_equal(murmurhash3_32(b'foo', 42), -1322301282) - assert_equal(murmurhash3_32('foo'.encode(), 0, positive=True), 4138058784) - assert_equal(murmurhash3_32('foo'.encode(), 42, positive=True), 2972666014) + assert_equal(murmurhash3_32(b'foo', 0, positive=True), 4138058784) + assert_equal(murmurhash3_32(b'foo', 42, positive=True), 2972666014) def test_mmhash3_unicode(): From 8c849beedfadb401cad81924ddeb6a3b300d4f50 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 28 Dec 2018 13:43:21 -0500 Subject: [PATCH 24/29] fix cython b --- sklearn/datasets/_svmlight_format.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx index ea45920ad70f2..99f443ccae53f 100644 --- a/sklearn/datasets/_svmlight_format.pyx +++ b/sklearn/datasets/_svmlight_format.pyx @@ -31,7 +31,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based, cdef char *line_cstr cdef int idx, prev_idx cdef Py_ssize_t i - cdef bytes qid_prefix = b('qid') + cdef bytes qid_prefix = b'qid' cdef Py_ssize_t n_features cdef long long offset_max = offset + length if length > 0 else -1 From eb8a3ce87eca8a21e0718f2a61fa8343903d0150 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 28 Dec 2018 13:45:38 -0500 Subject: [PATCH 25/29] fix remark by roman --- sklearn/datasets/openml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py index 13a6fcf83373e..fa195ce030298 100644 --- a/sklearn/datasets/openml.py +++ b/sklearn/datasets/openml.py @@ -555,7 +555,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None, target_column = [] elif not isinstance(target_column, list): raise TypeError("Did not recognize type of target_column" - "Should be string_type, list or None. Got: " + "Should be str, list or None. Got: " "{}".format(type(target_column))) data_columns = _valid_data_column_names(features_list, target_column) From ac02f515af7d200a8e276ef141110b769bc9b83c Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 28 Dec 2018 13:49:03 -0500 Subject: [PATCH 26/29] Apply suggestions from code review s/encode/u Co-Authored-By: amueller --- sklearn/datasets/tests/test_base.py | 4 ++-- sklearn/datasets/tests/test_lfw.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 78c0c11663782..08a6ba29413cf 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -54,7 +54,7 @@ def test_category_dir_1(load_files_root): test_category_dir1 = tempfile.mkdtemp(dir=load_files_root) sample_file = tempfile.NamedTemporaryFile(dir=test_category_dir1, delete=False) - sample_file.write("Hello World!\n".encode()) + sample_file.write(b"Hello World!\n") sample_file.close() yield str(test_category_dir1) _remove_dir(test_category_dir1) @@ -95,7 +95,7 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2, assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 2) assert_equal(res.DESCR, None) - assert_equal(res.data, ["Hello World!\n".encode()]) + assert_equal(res.data, [b"Hello World!\n"]) def test_load_files_w_categories_desc_and_encoding( diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 68c7f0a6d6b37..2a0559598a7d6 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -70,11 +70,11 @@ def setup_module(): # add some random file pollution to test robustness with open(os.path.join(LFW_HOME, 'lfw_funneled', '.test.swp'), 'wb') as f: - f.write('Text file to be ignored by the dataset loader.'.encode()) + f.write(b'Text file to be ignored by the dataset loader.') # generate some pairing metadata files using the same format as LFW with open(os.path.join(LFW_HOME, 'pairsDevTrain.txt'), 'wb') as f: - f.write("10\n".encode()) + f.write(b"10\n") more_than_two = [name for name, count in counts.items() if count >= 2] for i in range(5): @@ -90,10 +90,10 @@ def setup_module(): second_name, second_index)) with open(os.path.join(LFW_HOME, 'pairsDevTest.txt'), 'wb') as f: - f.write("Fake place holder that won't be tested".encode()) + f.write(b"Fake place holder that won't be tested") with open(os.path.join(LFW_HOME, 'pairs.txt'), 'wb') as f: - f.write("Fake place holder that won't be tested".encode()) + f.write(b"Fake place holder that won't be tested") def teardown_module(): From 3cce1c6a3de1034d31022fcdae2103e053f84f7e Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 28 Dec 2018 13:55:16 -0500 Subject: [PATCH 27/29] pep8 --- .../bench_sample_without_replacement.py | 34 +++++++++---------- sklearn/datasets/svmlight_format.py | 2 +- sklearn/multioutput.py | 3 +- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py index 4fb23efb7f24f..c993f719ac245 100644 --- a/benchmarks/bench_sample_without_replacement.py +++ b/benchmarks/bench_sample_without_replacement.py @@ -89,49 +89,47 @@ def bench_sample(sampling, n_population, n_samples): # Set Python core input sampling_algorithm["python-core-sample"] = \ lambda n_population, n_sample: \ - random.sample(range(n_population), n_sample) + random.sample(range(n_population), n_sample) ########################################################################### # Set custom automatic method selection sampling_algorithm["custom-auto"] = \ lambda n_population, n_samples, random_state=None: \ - sample_without_replacement(n_population, - n_samples, - method="auto", - random_state=random_state) + sample_without_replacement(n_population, n_samples, method="auto", + random_state=random_state) ########################################################################### # Set custom tracking based method sampling_algorithm["custom-tracking-selection"] = \ lambda n_population, n_samples, random_state=None: \ - sample_without_replacement(n_population, - n_samples, - method="tracking_selection", - random_state=random_state) + sample_without_replacement(n_population, + n_samples, + method="tracking_selection", + random_state=random_state) ########################################################################### # Set custom reservoir based method sampling_algorithm["custom-reservoir-sampling"] = \ lambda n_population, n_samples, random_state=None: \ - sample_without_replacement(n_population, - n_samples, - method="reservoir_sampling", - random_state=random_state) + sample_without_replacement(n_population, + n_samples, + method="reservoir_sampling", + random_state=random_state) ########################################################################### # Set custom reservoir based method sampling_algorithm["custom-pool"] = \ lambda n_population, n_samples, random_state=None: \ - sample_without_replacement(n_population, - n_samples, - method="pool", - random_state=random_state) + sample_without_replacement(n_population, + n_samples, + method="pool", + random_state=random_state) ########################################################################### # Numpy permutation based sampling_algorithm["numpy-permutation"] = \ lambda n_population, n_sample: \ - np.random.permutation(n_population)[:n_sample] + np.random.permutation(n_population)[:n_sample] ########################################################################### # Remove unspecified algorithm diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py index 2c2028b53ae4b..d5ad1cabb7180 100644 --- a/sklearn/datasets/svmlight_format.py +++ b/sklearn/datasets/svmlight_format.py @@ -344,7 +344,7 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id): f.write(b"# Generated by dump_svmlight_file from scikit-learn %s\n" % __version__) f.write(b"# Column indices are %s-based\n" - % ["zero", "one"][one_based]) + % ["zero", "one"][one_based]) f.write(b"#\n") f.writelines(b"# %s\n" % line for line in comment.splitlines()) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index e5fc27f79b76b..a3ec122140d68 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -59,7 +59,8 @@ def _partial_fit_estimator(estimator, X, y, classes=None, sample_weight=None, return estimator -class MultiOutputEstimator(BaseEstimator, MetaEstimatorMixin, metaclass=ABCMeta): +class MultiOutputEstimator(BaseEstimator, MetaEstimatorMixin, + metaclass=ABCMeta): @abstractmethod def __init__(self, estimator, n_jobs=None): self.estimator = estimator From b849e4e3a7a528cf5fec30f449d187222ad2720f Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 28 Dec 2018 15:35:41 -0500 Subject: [PATCH 28/29] string formatting fun --- sklearn/datasets/tests/test_lfw.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 2a0559598a7d6..1afd09084371c 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -86,8 +86,9 @@ def setup_module(): first_name, second_name = random_state.sample(FAKE_NAMES, 2) first_index = random_state.choice(np.arange(counts[first_name])) second_index = random_state.choice(np.arange(counts[second_name])) - f.write(b'%s\t%d\t%s\t%d\n' % (first_name, first_index, - second_name, second_index)) + f.write(('%s\t%d\t%s\t%d\n' % (first_name, first_index, + second_name, second_index) + ).encode()) with open(os.path.join(LFW_HOME, 'pairsDevTest.txt'), 'wb') as f: f.write(b"Fake place holder that won't be tested") From 3df76787fb595b0dfbd5b87d33383bf14e1e82e9 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 28 Dec 2018 15:37:54 -0500 Subject: [PATCH 29/29] more string formatting fun --- sklearn/datasets/svmlight_format.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py index d5ad1cabb7180..bef7c7b471a3a 100644 --- a/sklearn/datasets/svmlight_format.py +++ b/sklearn/datasets/svmlight_format.py @@ -341,10 +341,10 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id): line_pattern += " %s\n" if comment: - f.write(b"# Generated by dump_svmlight_file from scikit-learn %s\n" - % __version__) - f.write(b"# Column indices are %s-based\n" - % ["zero", "one"][one_based]) + f.write(("# Generated by dump_svmlight_file from scikit-learn %s\n" + % __version__).encode()) + f.write(("# Column indices are %s-based\n" + % ["zero", "one"][one_based]).encode()) f.write(b"#\n") f.writelines(b"# %s\n" % line for line in comment.splitlines())