scikit-learn · rth · Jan 3, 2019 · Nov 20, 2018 · Nov 20, 2018 · Nov 20, 2018
diff --git a/benchmarks/bench_plot_fastkmeans.py b/benchmarks/bench_plot_fastkmeans.py
@@ -104,15 +104,15 @@ def compute_bench_2(chunks):
     results = compute_bench(samples_range, features_range)
     results_2 = compute_bench_2(chunks)
 
-    max_time = max([max(i) for i in [t for (label, t) in six.iteritems(results)
+    max_time = max([max(i) for i in [t for (label, t) in results.items()
                                      if "speed" in label]])
     max_inertia = max([max(i) for i in [
-        t for (label, t) in six.iteritems(results)
+        t for (label, t) in results.items()
         if "speed" not in label]])
 
     fig = plt.figure('scikit-learn K-Means benchmark results')
     for c, (label, timings) in zip('brcy',
-                                   sorted(six.iteritems(results))):
+                                   sorted(results.items())):
         if 'speed' in label:
             ax = fig.add_subplot(2, 2, 1, projection='3d')
             ax.set_zlim3d(0.0, max_time * 1.1)
@@ -129,7 +129,7 @@ def compute_bench_2(chunks):
 
     i = 0
     for c, (label, timings) in zip('br',
-                                   sorted(six.iteritems(results_2))):
+                                   sorted(results_2.items())):
         i += 1
         ax = fig.add_subplot(2, 2, i + 2)
         y = np.asarray(timings)

diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py
@@ -109,7 +109,7 @@ def compute_bench(samples_range, features_range):
 
     import matplotlib.pyplot as plt
     fig = plt.figure('scikit-learn OMP vs. LARS benchmark results')
-    for i, (label, timings) in enumerate(sorted(six.iteritems(results))):
+    for i, (label, timings) in enumerate(sorted(results.items())):
         ax = fig.add_subplot(1, 2, i+1)
         vmax = max(1 - timings.min(), -1 + timings.max())
         plt.matshow(timings, fignum=False, vmin=1 - vmax, vmax=1 + vmax)

diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py
@@ -66,7 +66,7 @@ def compute_bench(samples_range, features_range, n_iter=3, rank=50):
     label = 'scikit-learn singular value decomposition benchmark results'
     fig = plt.figure(label)
     ax = fig.gca(projection='3d')
-    for c, (label, timings) in zip('rbg', sorted(six.iteritems(results))):
+    for c, (label, timings) in zip('rbg', sorted(results.items())):
         X, Y = np.meshgrid(samples_range, features_range)
         Z = np.asarray(timings).reshape(samples_range.shape[0],
                                         features_range.shape[0])

diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py
@@ -19,7 +19,6 @@
 import scipy.sparse as sp
 
 from sklearn import clone
-from sklearn.externals.six.moves import xrange
 from sklearn.random_projection import (SparseRandomProjection,
                                        GaussianRandomProjection,
                                        johnson_lindenstrauss_min_dim)
@@ -212,7 +211,7 @@ def print_row(clf_type, time_fit, time_transform):
     for name in selected_transformers:
         print("Perform benchmarks for %s..." % name)
 
-        for iteration in xrange(opts.n_times):
+        for iteration in range(opts.n_times):
             print("\titer %s..." % iteration, end="")
             time_to_fit, time_to_transform = bench_scikit_transformer(X_dense,
               transformers[name])

diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
@@ -15,7 +15,6 @@
 import numpy as np
 import random
 
-from sklearn.externals.six.moves import xrange
 from sklearn.utils.random import sample_without_replacement
 
 
@@ -90,49 +89,47 @@ def bench_sample(sampling, n_population, n_samples):
     # Set Python core input
     sampling_algorithm["python-core-sample"] = \
         lambda n_population, n_sample: \
-            random.sample(xrange(n_population), n_sample)
+        random.sample(range(n_population), n_sample)
 
-   ###########################################################################
+    ###########################################################################
     # Set custom automatic method selection
     sampling_algorithm["custom-auto"] = \
         lambda n_population, n_samples, random_state=None: \
-            sample_without_replacement(n_population,
-                                       n_samples,
-                                       method="auto",
-                                       random_state=random_state)
+        sample_without_replacement(n_population, n_samples, method="auto",
+                                   random_state=random_state)
 
     ###########################################################################
     # Set custom tracking based method
     sampling_algorithm["custom-tracking-selection"] = \
         lambda n_population, n_samples, random_state=None: \
-            sample_without_replacement(n_population,
-                                       n_samples,
-                                       method="tracking_selection",
-                                       random_state=random_state)
+        sample_without_replacement(n_population,
+                                   n_samples,
+                                   method="tracking_selection",
+                                   random_state=random_state)
 
     ###########################################################################
     # Set custom reservoir based method
     sampling_algorithm["custom-reservoir-sampling"] = \
         lambda n_population, n_samples, random_state=None: \
-            sample_without_replacement(n_population,
-                                       n_samples,
-                                       method="reservoir_sampling",
-                                       random_state=random_state)
+        sample_without_replacement(n_population,
+                                   n_samples,
+                                   method="reservoir_sampling",
+                                   random_state=random_state)
 
     ###########################################################################
     # Set custom reservoir based method
     sampling_algorithm["custom-pool"] = \
         lambda n_population, n_samples, random_state=None: \
-            sample_without_replacement(n_population,
-                                       n_samples,
-                                       method="pool",
-                                       random_state=random_state)
+        sample_without_replacement(n_population,
+                                   n_samples,
+                                   method="pool",
+                                   random_state=random_state)
 
     ###########################################################################
     # Numpy permutation based
     sampling_algorithm["numpy-permutation"] = \
         lambda n_population, n_sample: \
-            np.random.permutation(n_population)[:n_sample]
+        np.random.permutation(n_population)[:n_sample]
 
     ###########################################################################
     # Remove unspecified algorithm
@@ -156,11 +153,11 @@ def bench_sample(sampling, n_population, n_samples):
         print("Perform benchmarks for %s..." % name, end="")
         time[name] = np.zeros(shape=(opts.n_steps, opts.n_times))
 
-        for step in xrange(opts.n_steps):
-            for it in xrange(opts.n_times):
+        for step in range(opts.n_steps):
+            for it in range(opts.n_times):
                 time[name][step, it] = bench_sample(sampling_algorithm[name],
-                                                      opts.n_population,
-                                                      n_samples[step])
+                                                    opts.n_population,
+                                                    n_samples[step])
 
         print("done")
 

diff --git a/doc/conf.py b/doc/conf.py
@@ -15,7 +15,6 @@
 from __future__ import print_function
 import sys
 import os
-from sklearn.externals.six import u
 
 # If extensions (or modules to document with autodoc) are in another
 # directory, add these directories to sys.path here. If the directory
@@ -74,8 +73,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u('scikit-learn')
-copyright = u('2007 - 2018, scikit-learn developers (BSD License)')
+project = 'scikit-learn'
+copyright = '2007 - 2018, scikit-learn developers (BSD License)'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -214,8 +213,8 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass
 # [howto/manual]).
-latex_documents = [('index', 'user_guide.tex', u('scikit-learn user guide'),
-                    u('scikit-learn developers'), 'manual'), ]
+latex_documents = [('index', 'user_guide.tex', 'scikit-learn user guide',
+                    'scikit-learn developers', 'manual'), ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.

diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
@@ -908,9 +908,7 @@ in the examples.
 Python versions supported
 -------------------------
 
-All scikit-learn code should work unchanged in Python 3.5 or
-newer.
-
+Since scikit-learn 0.21, only Python 3.5 and newer is supported.
 
 .. _code_review:
 

diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
@@ -227,13 +227,13 @@ Now restart IPython and let us use this new toy::
      178                                               # values justified in the paper
      179        48          144      3.0      0.0      alpha = 1
      180        48          113      2.4      0.0      beta = 0.1
-     181       638         1880      2.9      0.1      for n_iter in xrange(1, max_iter + 1):
+     181       638         1880      2.9      0.1      for n_iter in range(1, max_iter + 1):
      182       638       195133    305.9     10.2          grad = np.dot(WtW, H) - WtV
      183       638       495761    777.1     25.9          proj_gradient = norm(grad[np.logical_or(grad < 0, H > 0)])
      184       638         2449      3.8      0.1          if proj_gradient < tol:
      185        48          130      2.7      0.0              break
      186
-     187      1474         4474      3.0      0.2          for inner_iter in xrange(1, 20):
+     187      1474         4474      3.0      0.2          for inner_iter in range(1, 20):
      188      1474        83833     56.9      4.4              Hn = H - alpha * grad
      189                                                       # Hn = np.where(Hn > 0, Hn, 0)
      190      1474       194239    131.8     10.1              Hn = _pos(Hn)

diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py
@@ -40,8 +40,8 @@
 import matplotlib.pyplot as plt
 from matplotlib import rcParams
 
-from sklearn.externals.six.moves import html_parser
-from sklearn.externals.six.moves.urllib.request import urlretrieve
+from html.parser import HTMLParser
+from urllib.request import urlretrieve
 from sklearn.datasets import get_data_home
 from sklearn.feature_extraction.text import HashingVectorizer
 from sklearn.linear_model import SGDClassifier
@@ -60,11 +60,11 @@ def _not_in_sphinx():
 #
 
 
-class ReutersParser(html_parser.HTMLParser):
+class ReutersParser(HTMLParser):
     """Utility class to parse a SGML file and yield documents one at a time."""
 
     def __init__(self, encoding='latin-1'):
-        html_parser.HTMLParser.__init__(self)
+        HTMLParser.__init__(self)
         self._reset()
         self.encoding = encoding
 

diff --git a/examples/applications/svm_gui.py b/examples/applications/svm_gui.py
@@ -39,7 +39,6 @@
 
 from sklearn import svm
 from sklearn.datasets import dump_svmlight_file
-from sklearn.externals.six.moves import xrange
 
 y_min, y_max = -50, 50
 x_min, x_max = -50, 50
@@ -187,7 +186,7 @@ def update_example(self, model, idx):
 
     def update(self, event, model):
         if event == "examples_loaded":
-            for i in xrange(len(model.data)):
+            for i in range(len(model.data)):
                 self.update_example(model, i)
 
         if event == "example_added":

diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py
@@ -47,8 +47,7 @@
 from joblib import Memory
 
 from sklearn.decomposition import randomized_svd
-from sklearn.externals.six.moves.urllib.request import urlopen
-from sklearn.externals.six import iteritems
+from urllib.request import urlopen
 
 
 print(__doc__)
@@ -173,7 +172,7 @@ def get_adjacency_matrix(redirects_filename, page_links_filename, limit=None):
 # stop after 5M links to make it possible to work in RAM
 X, redirects, index_map = get_adjacency_matrix(
     redirects_filename, page_links_filename, limit=5000000)
-names = dict((i, name) for name, i in iteritems(index_map))
+names = dict((i, name) for name, i in index_map.items())
 
 print("Computing the principal singular vectors using randomized_svd")
 t0 = time()

diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py
@@ -32,7 +32,6 @@
 
 from sklearn.cluster.bicluster import SpectralCoclustering
 from sklearn.cluster import MiniBatchKMeans
-from sklearn.externals.six import iteritems
 from sklearn.datasets.twenty_newsgroups import fetch_20newsgroups
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.cluster import v_measure_score
@@ -116,7 +115,7 @@ def most_common(d):
 
     Like Counter.most_common in Python >=2.7.
     """
-    return sorted(iteritems(d), key=operator.itemgetter(1), reverse=True)
+    return sorted(d.items(), key=operator.itemgetter(1), reverse=True)
 
 
 bicluster_ncuts = list(bicluster_ncut(i)

diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py
@@ -29,8 +29,6 @@
 #
 # License: BSD 3 clause
 
-from sklearn.externals.six.moves import zip
-
 import matplotlib.pyplot as plt
 
 from sklearn.datasets import make_gaussian_quantiles

diff --git a/sklearn/base.py b/sklearn/base.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 from scipy import sparse
-from .externals import six
+
 from . import __version__
 
 
@@ -58,7 +58,7 @@ def clone(estimator, safe=True):
                             % (repr(estimator), type(estimator)))
     klass = estimator.__class__
     new_object_params = estimator.get_params(deep=False)
-    for name, param in six.iteritems(new_object_params):
+    for name, param in new_object_params.items():
         new_object_params[name] = clone(param, safe=False)
     new_object = klass(**new_object_params)
     params_set = new_object.get_params(deep=False)
@@ -97,7 +97,7 @@ def _pprint(params, offset=0, printer=repr):
     params_list = list()
     this_line_length = offset
     line_sep = ',\n' + (1 + offset // 2) * ' '
-    for i, (k, v) in enumerate(sorted(six.iteritems(params))):
+    for i, (k, v) in enumerate(sorted(params.items())):
         if type(v) is float:
             # use str for representing floating point numbers
             # this way we get consistent representation across

diff --git a/sklearn/cluster/bicluster.py b/sklearn/cluster/bicluster.py
@@ -14,7 +14,6 @@
 
 from . import KMeans, MiniBatchKMeans
 from ..base import BaseEstimator, BiclusterMixin
-from ..externals import six
 from ..utils import check_random_state
 
 from ..utils.extmath import (make_nonnegative, randomized_svd,
@@ -85,8 +84,7 @@ def _log_normalize(X):
     return L - row_avg - col_avg + avg
 
 
-class BaseSpectral(six.with_metaclass(ABCMeta, BaseEstimator,
-                                      BiclusterMixin)):
+class BaseSpectral(BaseEstimator, BiclusterMixin, metaclass=ABCMeta):
     """Base class for spectral biclustering."""
 
     @abstractmethod

diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
@@ -11,7 +11,6 @@
 
 from ..metrics.pairwise import euclidean_distances
 from ..base import TransformerMixin, ClusterMixin, BaseEstimator
-from ..externals.six.moves import xrange
 from ..utils import check_array
 from ..utils.extmath import row_norms, safe_sparse_dot
 from ..utils.validation import check_is_fitted
@@ -29,7 +28,7 @@ def _iterate_sparse_X(X):
     X_data = X.data
     X_indptr = X.indptr
 
-    for i in xrange(n_samples):
+    for i in range(n_samples):
         row = np.zeros(X.shape[1])
         startptr, endptr = X_indptr[i], X_indptr[i + 1]
         nonzero_indices = X_indices[startptr:endptr]