From af354b47e9b8937becbe37b04b643d1abd4a7a8b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 20 Nov 2018 17:44:06 -0500
Subject: [PATCH 01/29] Start removing python 2.7

---
 .travis.yml                                  | 18 --------------
 README.rst                                   |  2 +-
 appveyor.yml                                 |  5 ----
 conftest.py                                  |  4 ---
 doc/developers/contributing.rst              | 14 +----------
 doc/install.rst                              |  2 +-
 setup.py                                     |  3 ---
 sklearn/datasets/species_distributions.py    | 11 ++-------
 sklearn/feature_extraction/_hashing.pyx      | 10 +++-----
 sklearn/model_selection/_split.py            |  5 ----
 sklearn/model_selection/tests/test_search.py |  7 ++----
 sklearn/utils/bench.py                       | 26 --------------------
 sklearn/utils/fixes.py                       |  2 --
 13 files changed, 10 insertions(+), 99 deletions(-)
 delete mode 100644 sklearn/utils/bench.py

diff --git a/.travis.yml b/.travis.yml
index e976cacd06c96..f1f161ca9d4e8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,24 +20,6 @@ env:
 
 matrix:
   include:
-    # This environment tests that scikit-learn can be built against
-    # versions of numpy, scipy with ATLAS that comes with Ubuntu Trusty 14.04
-    # i.e. numpy 1.8.2 and scipy 0.13.3
-    - env: DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.5"
-           COVERAGE=true
-      if: type != cron
-      addons:
-        apt:
-          packages:
-            # these only required by the DISTRIB="ubuntu" builds:
-            - python-scipy
-            - libatlas3-base
-            - libatlas-dev
-    # Python 3.4 build
-    - env: DISTRIB="conda" PYTHON_VERSION="3.4" INSTALL_MKL="false"
-           NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2"
-           PILLOW_VERSION="4.0.0" COVERAGE=true
-      if: type != cron
     # Python 3.5 build
     - env: DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="false"
            NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2"
diff --git a/README.rst b/README.rst
index 70537af856f61..b2370fbde06a1 100644
--- a/README.rst
+++ b/README.rst
@@ -50,7 +50,7 @@ Dependencies
 
 scikit-learn requires:
 
-- Python (>= 2.7 or >= 3.4)
+- Python (>= 3.5)
 - NumPy (>= 1.8.2)
 - SciPy (>= 0.13.3)
 
diff --git a/appveyor.yml b/appveyor.yml
index e26a02c90cd39..531aaca31aec5 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -22,11 +22,6 @@ environment:
       PYTHON_ARCH: "64"
       CHECK_WARNINGS: "true"
 
-    - PYTHON: "C:\\Python27"
-      PYTHON_VERSION: "2.7.8"
-      PYTHON_ARCH: "32"
-
-
 # Because we only have a single worker, we don't want to waste precious
 # appveyor CI time and make other PRs wait for repeated failures in a failing
 # PR. The following option cancels pending jobs in a given PR after the first
diff --git a/conftest.py b/conftest.py
index 50a3d3470a47a..495d47a6afc9d 100644
--- a/conftest.py
+++ b/conftest.py
@@ -11,8 +11,6 @@
 import pytest
 from _pytest.doctest import DoctestItem
 
-from sklearn.utils.fixes import PY3_OR_LATER
-
 PYTEST_MIN_VERSION = '3.3.0'
 
 if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION:
@@ -50,8 +48,6 @@ def pytest_collection_modifyitems(config, items):
     # run doctests only for numpy >= 1.14. We want to skip the doctest for
     # python 2 due to unicode.
     skip_doctests = False
-    if not PY3_OR_LATER:
-        skip_doctests = True
     try:
         import numpy as np
         if LooseVersion(np.__version__) < LooseVersion('1.14'):
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 0bb8b1fc39f59..53fa23bafac7a 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -891,19 +891,7 @@ in the examples.
 Python versions supported
 -------------------------
 
-All scikit-learn code should work unchanged in both Python 2.7 and 3.4 or
-newer. Since Python 3.x is not backwards compatible, that may require changes
-to code and it certainly requires testing on both 2.7 and 3.4 or newer.
-
-For most numerical algorithms, Python 3.x support is easy:
-just remember that ``print`` is a function and
-integer division is written ``//``.
-String handling has been overhauled, though, as have parts of
-the Python standard library.
-The `six <https://pythonhosted.org/six/>`_ package helps with
-cross-compatibility and is included in scikit-learn as
-``sklearn.externals.six``.
-
+Since scikit-learn 0.21, only Python 3.5 and newer is supported.
 
 .. _code_review:
 
diff --git a/doc/install.rst b/doc/install.rst
index bb6b67af3e3cb..7ac8eb5f077ec 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -17,7 +17,7 @@ Installing the latest release
 
 Scikit-learn requires:
 
-- Python (>= 2.7 or >= 3.4),
+- Python (>= 3.5),
 - NumPy (>= 1.8.2),
 - SciPy (>= 0.13.3).
 
diff --git a/setup.py b/setup.py
index e25c50a114a33..1e421acfabea6 100755
--- a/setup.py
+++ b/setup.py
@@ -183,10 +183,7 @@ def setup_package():
                                  'Operating System :: POSIX',
                                  'Operating System :: Unix',
                                  'Operating System :: MacOS',
-                                 'Programming Language :: Python :: 2',
-                                 'Programming Language :: Python :: 2.7',
                                  'Programming Language :: Python :: 3',
-                                 'Programming Language :: Python :: 3.4',
                                  'Programming Language :: Python :: 3.5',
                                  'Programming Language :: Python :: 3.6',
                                  'Programming Language :: Python :: 3.7',
diff --git a/sklearn/datasets/species_distributions.py b/sklearn/datasets/species_distributions.py
index da158eb24aa33..062dceeabbbf2 100644
--- a/sklearn/datasets/species_distributions.py
+++ b/sklearn/datasets/species_distributions.py
@@ -41,7 +41,6 @@
 from os import makedirs, remove
 from os.path import exists
 
-import sys
 
 import logging
 import numpy as np
@@ -53,8 +52,6 @@
 from sklearn.datasets.base import _pkl_filepath
 from sklearn.utils import _joblib
 
-PY3_OR_LATER = sys.version_info[0] >= 3
-
 # The original data can be found at:
 # https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip
 SAMPLES = RemoteFileMetadata(
@@ -106,12 +103,8 @@ def _load_csv(F):
     rec : np.ndarray
         record array representing the data
     """
-    if PY3_OR_LATER:
-        # Numpy recarray wants Python 3 str but not bytes...
-        names = F.readline().decode('ascii').strip().split(',')
-    else:
-        # Numpy recarray wants Python 2 str but not unicode
-        names = F.readline().strip().split(',')
+    # Numpy recarray wants Python 3 str but not bytes...
+    names = F.readline().decode('ascii').strip().split(',')
 
     rec = np.loadtxt(F, skiprows=0, delimiter=',', dtype='a22,f4,f4')
     rec.dtype.names = names
diff --git a/sklearn/feature_extraction/_hashing.pyx b/sklearn/feature_extraction/_hashing.pyx
index c462dd8a24719..24b38a081faf5 100644
--- a/sklearn/feature_extraction/_hashing.pyx
+++ b/sklearn/feature_extraction/_hashing.pyx
@@ -35,13 +35,9 @@ def transform(raw_X, Py_ssize_t n_features, dtype, bint alternate_sign=1):
     cdef array.array indices
     cdef array.array indptr
     indices = array.array("i")
-    if sys.version_info >= (3, 3):
-        indices_array_dtype = "q"
-        indices_np_dtype = np.longlong
-    else:
-        # On Windows with PY2.7 long int would still correspond to 32 bit. 
-        indices_array_dtype = "l"
-        indices_np_dtype = np.int_
+    indices_array_dtype = "q"
+    indices_np_dtype = np.longlong
+
 
     indptr = array.array(indices_array_dtype, [0])
 
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 2eccb50fcc976..356c018c58c7d 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -66,11 +66,6 @@ class BaseCrossValidator(with_metaclass(ABCMeta)):
     Implementations must define `_iter_test_masks` or `_iter_test_indices`.
     """
 
-    def __init__(self):
-        # We need this for the build_repr to work properly in py2.7
-        # see #6304
-        pass
-
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.
 
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index dfdcb504912f1..95f8ac1bd9929 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -13,7 +13,6 @@
 import pytest
 
 from sklearn.utils.fixes import sp_version
-from sklearn.utils.fixes import PY3_OR_LATER
 from sklearn.utils.fixes import _Iterable as Iterable, _Sized as Sized
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_not_equal
@@ -429,10 +428,8 @@ def test_grid_search_when_param_grid_includes_range():
     # Test that the best estimator contains the right value for foo_param
     clf = MockClassifier()
     grid_search = None
-    if PY3_OR_LATER:
-        grid_search = GridSearchCV(clf, {'foo_param': range(1, 4)})
-    else:
-        grid_search = GridSearchCV(clf, {'foo_param': xrange(1, 4)})
+    grid_search = GridSearchCV(clf, {'foo_param': range(1, 4)})
+
     grid_search.fit(X, y)
     assert_equal(grid_search.best_estimator_.foo_param, 2)
 
diff --git a/sklearn/utils/bench.py b/sklearn/utils/bench.py
deleted file mode 100644
index 3ea26ec6b395f..0000000000000
--- a/sklearn/utils/bench.py
+++ /dev/null
@@ -1,26 +0,0 @@
-"""
-Helper functions for benchmarking
-"""
-
-
-def total_seconds(delta):
-    """
-    helper function to emulate function total_seconds,
-    introduced in python2.7
-
-    https://docs.python.org/library/datetime.html\
-#datetime.timedelta.total_seconds
-
-    Parameters
-    ----------
-    delta : datetime object
-
-    Returns
-    -------
-    int
-        The number of seconds contained in delta
-    """
-
-    mu_sec = 1e-6  # number of seconds in one microseconds
-
-    return delta.seconds + delta.microseconds * mu_sec
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index c92a91ad0a0d1..d966b1c7a4875 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -12,7 +12,6 @@
 
 import os
 import errno
-import sys
 
 from distutils.version import LooseVersion
 
@@ -42,7 +41,6 @@ def _parse_version(version_string):
 
 np_version = _parse_version(np.__version__)
 sp_version = _parse_version(scipy.__version__)
-PY3_OR_LATER = sys.version_info[0] >= 3
 
 
 # Remove when minimum required NumPy >= 1.10

From b25487637b5c034f65ac2b9412a16db2cb612104 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 20 Nov 2018 17:55:52 -0500
Subject: [PATCH 02/29] remove xrange

---
 benchmarks/bench_random_projections.py        |  3 +-
 .../bench_sample_without_replacement.py       | 13 ++++---
 doc/conf.py                                   |  9 +++--
 doc/developers/performance.rst                |  4 +--
 .../plot_out_of_core_classification.py        |  8 ++---
 examples/applications/svm_gui.py              |  3 +-
 sklearn/cluster/birch.py                      |  2 +-
 sklearn/cluster/hierarchical.py               |  8 ++---
 sklearn/covariance/shrunk_covariance_.py      |  6 ++--
 sklearn/datasets/_svmlight_format.pyx         |  2 +-
 sklearn/decomposition/factor_analysis.py      |  2 +-
 sklearn/decomposition/fastica_.py             |  4 +--
 sklearn/decomposition/online_lda.py           |  8 ++---
 sklearn/decomposition/tests/test_fastica.py   |  2 +-
 .../decomposition/tests/test_online_lda.py    |  2 +-
 sklearn/discriminant_analysis.py              |  2 +-
 sklearn/externals/_arff.py                    |  4 +--
 sklearn/feature_extraction/dict_vectorizer.py |  2 +-
 sklearn/feature_extraction/tests/test_text.py |  6 +---
 sklearn/feature_extraction/text.py            | 12 +++----
 sklearn/linear_model/coordinate_descent.py    |  2 +-
 sklearn/linear_model/least_angle.py           |  4 +--
 sklearn/model_selection/tests/test_search.py  |  2 +-
 sklearn/neural_network/rbm.py                 |  2 +-
 sklearn/random_projection.py                  |  2 +-
 sklearn/svm/tests/test_svm.py                 | 12 -------
 sklearn/utils/_random.pyx                     |  4 +--
 sklearn/utils/extmath.py                      |  4 +--
 sklearn/utils/sparsefuncs_fast.pyx            | 36 +++++++++----------
 sklearn/utils/tests/test_fast_dict.py         |  2 +-
 sklearn/utils/tests/test_multiclass.py        |  4 +--
 31 files changed, 78 insertions(+), 98 deletions(-)

diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py
index 4379e50e98ab5..dc313a3e9f762 100644
--- a/benchmarks/bench_random_projections.py
+++ b/benchmarks/bench_random_projections.py
@@ -19,7 +19,6 @@
 import scipy.sparse as sp
 
 from sklearn import clone
-from sklearn.externals.six.moves import xrange
 from sklearn.random_projection import (SparseRandomProjection,
                                        GaussianRandomProjection,
                                        johnson_lindenstrauss_min_dim)
@@ -212,7 +211,7 @@ def print_row(clf_type, time_fit, time_transform):
     for name in selected_transformers:
         print("Perform benchmarks for %s..." % name)
 
-        for iteration in xrange(opts.n_times):
+        for iteration in range(opts.n_times):
             print("\titer %s..." % iteration, end="")
             time_to_fit, time_to_transform = bench_scikit_transformer(X_dense,
               transformers[name])
diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
index 90c382e906bcc..4fb23efb7f24f 100644
--- a/benchmarks/bench_sample_without_replacement.py
+++ b/benchmarks/bench_sample_without_replacement.py
@@ -15,7 +15,6 @@
 import numpy as np
 import random
 
-from sklearn.externals.six.moves import xrange
 from sklearn.utils.random import sample_without_replacement
 
 
@@ -90,9 +89,9 @@ def bench_sample(sampling, n_population, n_samples):
     # Set Python core input
     sampling_algorithm["python-core-sample"] = \
         lambda n_population, n_sample: \
-            random.sample(xrange(n_population), n_sample)
+            random.sample(range(n_population), n_sample)
 
-   ###########################################################################
+    ###########################################################################
     # Set custom automatic method selection
     sampling_algorithm["custom-auto"] = \
         lambda n_population, n_samples, random_state=None: \
@@ -156,11 +155,11 @@ def bench_sample(sampling, n_population, n_samples):
         print("Perform benchmarks for %s..." % name, end="")
         time[name] = np.zeros(shape=(opts.n_steps, opts.n_times))
 
-        for step in xrange(opts.n_steps):
-            for it in xrange(opts.n_times):
+        for step in range(opts.n_steps):
+            for it in range(opts.n_times):
                 time[name][step, it] = bench_sample(sampling_algorithm[name],
-                                                      opts.n_population,
-                                                      n_samples[step])
+                                                    opts.n_population,
+                                                    n_samples[step])
 
         print("done")
 
diff --git a/doc/conf.py b/doc/conf.py
index e829a429a4b7b..d19b43ddcb955 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -15,7 +15,6 @@
 from __future__ import print_function
 import sys
 import os
-from sklearn.externals.six import u
 
 # If extensions (or modules to document with autodoc) are in another
 # directory, add these directories to sys.path here. If the directory
@@ -74,8 +73,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u('scikit-learn')
-copyright = u('2007 - 2018, scikit-learn developers (BSD License)')
+project = 'scikit-learn'
+copyright = '2007 - 2018, scikit-learn developers (BSD License)'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -214,8 +213,8 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass
 # [howto/manual]).
-latex_documents = [('index', 'user_guide.tex', u('scikit-learn user guide'),
-                    u('scikit-learn developers'), 'manual'), ]
+latex_documents = [('index', 'user_guide.tex', 'scikit-learn user guide',
+                    'scikit-learn developers', 'manual'), ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
index 325199a464fab..80ef0b210f7f1 100644
--- a/doc/developers/performance.rst
+++ b/doc/developers/performance.rst
@@ -227,13 +227,13 @@ Now restart IPython and let us use this new toy::
      178                                               # values justified in the paper
      179        48          144      3.0      0.0      alpha = 1
      180        48          113      2.4      0.0      beta = 0.1
-     181       638         1880      2.9      0.1      for n_iter in xrange(1, max_iter + 1):
+     181       638         1880      2.9      0.1      for n_iter in range(1, max_iter + 1):
      182       638       195133    305.9     10.2          grad = np.dot(WtW, H) - WtV
      183       638       495761    777.1     25.9          proj_gradient = norm(grad[np.logical_or(grad < 0, H > 0)])
      184       638         2449      3.8      0.1          if proj_gradient < tol:
      185        48          130      2.7      0.0              break
      186
-     187      1474         4474      3.0      0.2          for inner_iter in xrange(1, 20):
+     187      1474         4474      3.0      0.2          for inner_iter in range(1, 20):
      188      1474        83833     56.9      4.4              Hn = H - alpha * grad
      189                                                       # Hn = np.where(Hn > 0, Hn, 0)
      190      1474       194239    131.8     10.1              Hn = _pos(Hn)
diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py
index ecff621ce82bd..52495e2d0a423 100644
--- a/examples/applications/plot_out_of_core_classification.py
+++ b/examples/applications/plot_out_of_core_classification.py
@@ -40,8 +40,8 @@
 import matplotlib.pyplot as plt
 from matplotlib import rcParams
 
-from sklearn.externals.six.moves import html_parser
-from sklearn.externals.six.moves.urllib.request import urlretrieve
+from html.parser import HTMLParser
+from urllib.request import urlretrieve
 from sklearn.datasets import get_data_home
 from sklearn.feature_extraction.text import HashingVectorizer
 from sklearn.linear_model import SGDClassifier
@@ -60,11 +60,11 @@ def _not_in_sphinx():
 #
 
 
-class ReutersParser(html_parser.HTMLParser):
+class ReutersParser(HTMLParser):
     """Utility class to parse a SGML file and yield documents one at a time."""
 
     def __init__(self, encoding='latin-1'):
-        html_parser.HTMLParser.__init__(self)
+        HTMLParser.__init__(self)
         self._reset()
         self.encoding = encoding
 
diff --git a/examples/applications/svm_gui.py b/examples/applications/svm_gui.py
index 51a8a5bd48b34..83d290146eaf5 100644
--- a/examples/applications/svm_gui.py
+++ b/examples/applications/svm_gui.py
@@ -40,7 +40,6 @@
 
 from sklearn import svm
 from sklearn.datasets import dump_svmlight_file
-from sklearn.externals.six.moves import xrange
 
 y_min, y_max = -50, 50
 x_min, x_max = -50, 50
@@ -188,7 +187,7 @@ def update_example(self, model, idx):
 
     def update(self, event, model):
         if event == "examples_loaded":
-            for i in xrange(len(model.data)):
+            for i in range(len(model.data)):
                 self.update_example(model, i)
 
         if event == "example_added":
diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
index 188eff02b6f02..c08e7862d9b6c 100644
--- a/sklearn/cluster/birch.py
+++ b/sklearn/cluster/birch.py
@@ -29,7 +29,7 @@ def _iterate_sparse_X(X):
     X_data = X.data
     X_indptr = X.indptr
 
-    for i in xrange(n_samples):
+    for i in range(n_samples):
         row = np.zeros(X.shape[1])
         startptr, endptr = X_indptr[i], X_indptr[i + 1]
         nonzero_indices = X_indices[startptr:endptr]
diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
index 3e5d2e8203ba8..2a44aa81912a1 100644
--- a/sklearn/cluster/hierarchical.py
+++ b/sklearn/cluster/hierarchical.py
@@ -64,10 +64,10 @@ def _fix_connectivity(X, connectivity, affinity):
                       "stopping the tree early." % n_components,
                       stacklevel=2)
         # XXX: Can we do without completing the matrix?
-        for i in xrange(n_components):
+        for i in range(n_components):
             idx_i = np.where(labels == i)[0]
             Xi = X[idx_i]
-            for j in xrange(i):
+            for j in range(i):
                 idx_j = np.where(labels == j)[0]
                 Xj = X[idx_j]
                 D = pairwise_distances(Xi, Xj, metric=affinity)
@@ -527,7 +527,7 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
     children = []
 
     # recursive merge loop
-    for k in xrange(n_samples, n_nodes):
+    for k in range(n_samples, n_nodes):
         # identify the merge
         while True:
             edge = heappop(inertia)
@@ -632,7 +632,7 @@ def _hc_cut(n_clusters, children, n_leaves):
     # are interested in largest elements
     # children[-1] is the root of the tree
     nodes = [-(max(children[-1]) + 1)]
-    for _ in xrange(n_clusters - 1):
+    for _ in range(n_clusters - 1):
         # As we have a heap, nodes[0] is the smallest element
         these_children = children[-nodes[0] - n_leaves]
         # Insert the 2 children and remove the largest node
diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
index 892d04c94d082..eed39a45bddc4 100644
--- a/sklearn/covariance/shrunk_covariance_.py
+++ b/sklearn/covariance/shrunk_covariance_.py
@@ -227,8 +227,8 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
     beta_ = 0.  # sum of the coefficients of <X2.T, X2>
     delta_ = 0.  # sum of the *squared* coefficients of <X.T, X>
     # starting block computation
-    for i in xrange(n_splits):
-        for j in xrange(n_splits):
+    for i in range(n_splits):
+        for j in range(n_splits):
             rows = slice(block_size * i, block_size * (i + 1))
             cols = slice(block_size * j, block_size * (j + 1))
             beta_ += np.sum(np.dot(X2.T[rows], X2[:, cols]))
@@ -237,7 +237,7 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
         beta_ += np.sum(np.dot(X2.T[rows], X2[:, block_size * n_splits:]))
         delta_ += np.sum(
             np.dot(X.T[rows], X[:, block_size * n_splits:]) ** 2)
-    for j in xrange(n_splits):
+    for j in range(n_splits):
         cols = slice(block_size * j, block_size * (j + 1))
         beta_ += np.sum(np.dot(X2.T[block_size * n_splits:], X2[:, cols]))
         delta_ += np.sum(
diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx
index 152bd4325dbfb..bba5db9d3cf50 100644
--- a/sklearn/datasets/_svmlight_format.pyx
+++ b/sklearn/datasets/_svmlight_format.pyx
@@ -92,7 +92,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based,
             features.pop(0)
             n_features -= 1
 
-        for i in xrange(0, n_features):
+        for i in range(0, n_features):
             idx_s, value = features[i].split(COLON, 1)
             idx = int(idx_s)
             if idx < 0 or not zero_based and idx == 0:
diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py
index eea477937e149..5ede30bca42bc 100644
--- a/sklearn/decomposition/factor_analysis.py
+++ b/sklearn/decomposition/factor_analysis.py
@@ -211,7 +211,7 @@ def my_svd(X):
             raise ValueError('SVD method %s is not supported. Please consider'
                              ' the documentation' % self.svd_method)
 
-        for i in xrange(self.max_iter):
+        for i in range(self.max_iter):
             # SMALL helps numerics
             sqrt_psi = np.sqrt(psi) + SMALL
             s, V, unexp_var = my_svd(X / (sqrt_psi * nsqrt))
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index 693d46d31fab5..7de65c9829fd8 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -75,7 +75,7 @@ def _ica_def(X, tol, g, fun_args, max_iter, w_init):
         w = w_init[j, :].copy()
         w /= np.sqrt((w ** 2).sum())
 
-        for i in moves.xrange(max_iter):
+        for i in moves.range(max_iter):
             gwtx, g_wtx = g(np.dot(w.T, X), fun_args)
 
             w1 = (X * gwtx).mean(axis=1) - g_wtx.mean() * w
@@ -104,7 +104,7 @@ def _ica_par(X, tol, g, fun_args, max_iter, w_init):
     W = _sym_decorrelation(w_init)
     del w_init
     p_ = float(X.shape[1])
-    for ii in moves.xrange(max_iter):
+    for ii in moves.range(max_iter):
         gwtx, g_wtx = g(np.dot(W, X), fun_args)
         W1 = _sym_decorrelation(np.dot(gwtx, X.T) / p_
                                 - g_wtx[:, np.newaxis] * W)
diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
index e35f40ec05c14..b9bf1025e315e 100644
--- a/sklearn/decomposition/online_lda.py
+++ b/sklearn/decomposition/online_lda.py
@@ -93,7 +93,7 @@ def _update_doc_distribution(X, exp_topic_word_distr, doc_topic_prior,
         X_indices = X.indices
         X_indptr = X.indptr
 
-    for idx_d in xrange(n_samples):
+    for idx_d in range(n_samples):
         if is_sparse_x:
             ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]]
             cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]]
@@ -107,7 +107,7 @@ def _update_doc_distribution(X, exp_topic_word_distr, doc_topic_prior,
         exp_topic_word_d = exp_topic_word_distr[:, ids]
 
         # Iterate between `doc_topic_d` and `norm_phi` until convergence
-        for _ in xrange(0, max_iters):
+        for _ in range(0, max_iters):
             last_d = doc_topic_d
 
             # The optimal phi_{dwk} is proportional to
@@ -544,7 +544,7 @@ def fit(self, X, y=None):
         n_jobs = effective_n_jobs(self.n_jobs)
         with Parallel(n_jobs=n_jobs, verbose=max(0,
                       self.verbose - 1)) as parallel:
-            for i in xrange(max_iter):
+            for i in range(max_iter):
                 if learning_method == 'online':
                     for idx_slice in gen_batches(n_samples, batch_size):
                         self._em_step(X[idx_slice, :], total_samples=n_samples,
@@ -682,7 +682,7 @@ def _loglikelihood(prior, distr, dirichlet_distr, size):
             X_indptr = X.indptr
 
         # E[log p(docs | theta, beta)]
-        for idx_d in xrange(0, n_samples):
+        for idx_d in range(0, n_samples):
             if is_sparse_x:
                 ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]]
                 cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]]
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index e834a00b03118..d5a329c7340c8 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -130,7 +130,7 @@ def g_test(x):
         ica = FastICA(fun=fn, algorithm=algo, random_state=0)
         assert_raises(ValueError, ica.fit, m.T)
 
-    assert_raises(TypeError, FastICA(fun=moves.xrange(10)).fit, m.T)
+    assert_raises(TypeError, FastICA(fun=moves.range(10)).fit, m.T)
 
 
 def test_fastica_nowhiten():
diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py
index 7e06d28faefcf..63346f3dbddbd 100644
--- a/sklearn/decomposition/tests/test_online_lda.py
+++ b/sklearn/decomposition/tests/test_online_lda.py
@@ -90,7 +90,7 @@ def test_lda_partial_fit():
     lda = LatentDirichletAllocation(n_components=n_components,
                                     learning_offset=10., total_samples=100,
                                     random_state=rng)
-    for i in xrange(3):
+    for i in range(3):
         lda.partial_fit(X)
 
     correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index a2919c7ff7a69..0418ebb62dc9a 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -660,7 +660,7 @@ def fit(self, X, y):
         means = []
         scalings = []
         rotations = []
-        for ind in xrange(n_classes):
+        for ind in range(n_classes):
             Xg = X[y == ind, :]
             meang = Xg.mean(0)
             means.append(meang)
diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py
index 82f504542f9a9..2552e77b894a6 100644
--- a/sklearn/externals/_arff.py
+++ b/sklearn/externals/_arff.py
@@ -431,7 +431,7 @@ def decode_data(self, s, conversors):
                 raise BadDataFormat(s)
             # XXX: int 0 is used for implicit values, not '0'
             values = [values[i] if i in values else 0 for i in
-                      xrange(len(conversors))]
+                      range(len(conversors))]
         else:
             if len(values) != len(conversors):
                 raise BadDataFormat(s)
@@ -524,7 +524,7 @@ def encode_data(self, data, attributes):
         data = data.data
 
         # Check if the rows are sorted
-        if not all(row[i] <= row[i + 1] for i in xrange(len(row) - 1)):
+        if not all(row[i] <= row[i + 1] for i in range(len(row) - 1)):
             raise ValueError("liac-arff can only output COO matrices with "
                              "sorted rows.")
 
diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py
index d078a325b69a6..f37241975645f 100644
--- a/sklearn/feature_extraction/dict_vectorizer.py
+++ b/sklearn/feature_extraction/dict_vectorizer.py
@@ -258,7 +258,7 @@ def inverse_transform(self, X, dict_type=dict):
         n_samples = X.shape[0]
 
         names = self.feature_names_
-        dicts = [dict_type() for _ in xrange(n_samples)]
+        dicts = [dict_type() for _ in range(n_samples)]
 
         if sp.issparse(X):
             for i, j in zip(*X.nonzero()):
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 9798175e4d5bc..503a167c5fe35 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -5,7 +5,6 @@
 import pytest
 from scipy import sparse
 
-from sklearn.externals.six import PY2
 from sklearn.feature_extraction.text import strip_tags
 from sklearn.feature_extraction.text import strip_accents_unicode
 from sklearn.feature_extraction.text import strip_accents_ascii
@@ -1132,10 +1131,7 @@ def _check_stop_words_consistency(estimator):
 
 @fails_if_pypy
 def test_vectorizer_stop_words_inconsistent():
-    if PY2:
-        lstr = "[u'and', u'll', u've']"
-    else:
-        lstr = "['and', 'll', 've']"
+    lstr = "['and', 'll', 've']"
     message = ('Your stop_words may be inconsistent with your '
                'preprocessing. Tokenizing the stop words generated '
                'tokens %s not in stop_words.' % lstr)
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 6120c1d4a8f3a..1162b942e5515 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -170,9 +170,9 @@ def _word_ngrams(self, tokens, stop_words=None):
             tokens_append = tokens.append
             space_join = " ".join
 
-            for n in xrange(min_n,
+            for n in range(min_n,
                             min(max_n + 1, n_original_tokens + 1)):
-                for i in xrange(n_original_tokens - n + 1):
+                for i in range(n_original_tokens - n + 1):
                     tokens_append(space_join(original_tokens[i: i + n]))
 
         return tokens
@@ -195,8 +195,8 @@ def _char_ngrams(self, text_document):
         # bind method outside of loop to reduce overhead
         ngrams_append = ngrams.append
 
-        for n in xrange(min_n, min(max_n + 1, text_len + 1)):
-            for i in xrange(text_len - n + 1):
+        for n in range(min_n, min(max_n + 1, text_len + 1)):
+            for i in range(text_len - n + 1):
                 ngrams_append(text_document[i: i + n])
         return ngrams
 
@@ -218,7 +218,7 @@ def _char_wb_ngrams(self, text_document):
         for w in text_document.split():
             w = ' ' + w + ' '
             w_len = len(w)
-            for n in xrange(min_n, max_n + 1):
+            for n in range(min_n, max_n + 1):
                 offset = 0
                 ngrams_append(w[offset:offset + n])
                 while offset + n < w_len:
@@ -348,7 +348,7 @@ def _validate_vocabulary(self):
                 indices = set(six.itervalues(vocabulary))
                 if len(indices) != len(vocabulary):
                     raise ValueError("Vocabulary contains repeated indices.")
-                for i in xrange(len(vocabulary)):
+                for i in range(len(vocabulary)):
                     if i not in indices:
                         msg = ("Vocabulary of size %d doesn't contain index "
                                "%d." % (len(vocabulary), i))
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index c8907574121a0..2d046332889d3 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -742,7 +742,7 @@ def fit(self, X, y, check_input=True):
         dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)
         self.n_iter_ = []
 
-        for k in xrange(n_targets):
+        for k in range(n_targets):
             if Xy is not None:
                 this_Xy = Xy[:, k]
             else:
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index 5cc05961abd7e..d0b5f6ea25720 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -636,7 +636,7 @@ def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None):
         if fit_path:
             self.active_ = []
             self.coef_path_ = []
-            for k in xrange(n_targets):
+            for k in range(n_targets):
                 this_Xy = None if Xy is None else Xy[:, k]
                 alphas, active, coef_path, n_iter_ = lars_path(
                     X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X,
@@ -656,7 +656,7 @@ def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None):
                                    self.coef_)]
                 self.n_iter_ = self.n_iter_[0]
         else:
-            for k in xrange(n_targets):
+            for k in range(n_targets):
                 this_Xy = None if Xy is None else Xy[:, k]
                 alphas, _, self.coef_[k], n_iter_ = lars_path(
                     X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X,
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 95f8ac1bd9929..b0dcc11c9003a 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -156,7 +156,7 @@ def test_parameter_grid():
     assert_equal(len(grid2), 6)
 
     # loop to assert we can iterate over the grid multiple times
-    for i in xrange(2):
+    for i in range(2):
         # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2)
         points = set(tuple(chain(*(sorted(p.items())))) for p in grid2)
         assert_equal(points,
diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py
index 1361bffe0d240..cbf4906782d7c 100644
--- a/sklearn/neural_network/rbm.py
+++ b/sklearn/neural_network/rbm.py
@@ -349,7 +349,7 @@ def fit(self, X, y=None):
                                             n_batches, n_samples))
         verbose = self.verbose
         begin = time.time()
-        for iteration in xrange(1, self.n_iter + 1):
+        for iteration in range(1, self.n_iter + 1):
             for batch_slice in batch_slices:
                 self._fit(X[batch_slice], rng)
 
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index f8b516eb61957..6f6a8b2ba8341 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -271,7 +271,7 @@ def sparse_random_matrix(n_components, n_features, density='auto',
         indices = []
         offset = 0
         indptr = [offset]
-        for _ in xrange(n_components):
+        for _ in range(n_components):
             # find the indices of the non-zero components for row i
             n_nonzero_i = rng.binomial(n_features, density)
             indices_i = sample_without_replacement(n_features, n_nonzero_i,
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 86d3c8d327ce0..90072a307e88e 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -26,7 +26,6 @@
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.exceptions import NotFittedError, UndefinedMetricWarning
 from sklearn.multiclass import OneVsRestClassifier
-from sklearn.externals import six
 
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
@@ -523,17 +522,6 @@ def test_bad_input():
 
 def test_unicode_kernel():
     # Test that a unicode kernel name does not cause a TypeError
-    if six.PY2:
-        # Test unicode (same as str on python3)
-        clf = svm.SVC(kernel=u'linear', probability=True)
-        clf.fit(X, Y)
-        clf.predict_proba(T)
-        svm.libsvm.cross_validation(iris.data,
-                                    iris.target.astype(np.float64), 5,
-                                    kernel=u'linear',
-                                    random_seed=0)
-
-    # Test default behavior on both versions
     clf = svm.SVC(gamma='scale', kernel='linear', probability=True)
     clf.fit(X, Y)
     clf.predict_proba(T)
diff --git a/sklearn/utils/_random.pyx b/sklearn/utils/_random.pyx
index 7913684d3107a..9687c583b9a3c 100644
--- a/sklearn/utils/_random.pyx
+++ b/sklearn/utils/_random.pyx
@@ -149,12 +149,12 @@ cpdef _sample_without_replacement_with_pool(np.int_t n_population,
     rng_randint = rng.randint
 
     # Initialize the pool
-    for i in xrange(n_population):
+    for i in range(n_population):
         pool[i] = i
 
     # The following line of code are heavily inspired from python core,
     # more precisely of random.sample.
-    for i in xrange(n_samples):
+    for i in range(n_samples):
         j = rng_randint(n_population - i)  # invariant: non-selected at [0,n-i)
         out[i] = pool[j]
         pool[j] = pool[n_population - i - 1]  # move non-selected item into
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 6cdca1bda1d1f..73a719da56551 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -514,13 +514,13 @@ def svd_flip(u, v, u_based_decision=True):
     if u_based_decision:
         # columns of u, rows of v
         max_abs_cols = np.argmax(np.abs(u), axis=0)
-        signs = np.sign(u[max_abs_cols, xrange(u.shape[1])])
+        signs = np.sign(u[max_abs_cols, range(u.shape[1])])
         u *= signs
         v *= signs[:, np.newaxis]
     else:
         # rows of v, columns of u
         max_abs_rows = np.argmax(np.abs(v), axis=1)
-        signs = np.sign(v[xrange(v.shape[0]), max_abs_rows])
+        signs = np.sign(v[range(v.shape[0]), max_abs_rows])
         u *= signs
         v *= signs[:, np.newaxis]
     return u, v
diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index b40b843e94322..4e13fce315c57 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -111,24 +111,24 @@ def _csr_mean_variance_axis0(np.ndarray[floating, ndim=1, mode="c"] X_data,
         np.ndarray[np.int64_t, ndim=1] counts_nan = np.zeros(n_features,
                                                              dtype=np.int64)
 
-    for i in xrange(non_zero):
+    for i in range(non_zero):
         col_ind = X_indices[i]
         if not isnan(X_data[i]):
             means[col_ind] += X_data[i]
         else:
             counts_nan[col_ind] += 1
 
-    for i in xrange(n_features):
+    for i in range(n_features):
         means[i] /= (n_samples - counts_nan[i])
 
-    for i in xrange(non_zero):
+    for i in range(non_zero):
         col_ind = X_indices[i]
         if not isnan(X_data[i]):
             diff = X_data[i] - means[col_ind]
             variances[col_ind] += diff * diff
             counts[col_ind] += 1
 
-    for i in xrange(n_features):
+    for i in range(n_features):
         variances[i] += (n_samples - counts_nan[i] - counts[i]) * means[i]**2
         variances[i] /= (n_samples - counts_nan[i])
 
@@ -189,13 +189,13 @@ def _csc_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data,
     cdef np.ndarray[np.int64_t, ndim=1] counts_nan = np.zeros(n_features,
                                                               dtype=np.int64)
 
-    for i in xrange(n_features):
+    for i in range(n_features):
 
         startptr = X_indptr[i]
         endptr = X_indptr[i + 1]
         counts = endptr - startptr
 
-        for j in xrange(startptr, endptr):
+        for j in range(startptr, endptr):
             if not isnan(X_data[j]):
                 means[i] += X_data[j]
             else:
@@ -203,7 +203,7 @@ def _csc_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data,
         counts -= counts_nan[i]
         means[i] /= (n_samples - counts_nan[i])
 
-        for j in xrange(startptr, endptr):
+        for j in range(startptr, endptr):
             if not isnan(X_data[j]):
                 diff = X_data[j] - means[i]
                 variances[i] += diff * diff
@@ -321,12 +321,12 @@ def _incr_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data,
         new_mean, new_var, counts_nan = _csc_mean_variance_axis0(
             X_data, n_samples, n_features, X_indices, X_indptr)
 
-    for i in xrange(n_features):
+    for i in range(n_features):
         new_n[i] -= counts_nan[i]
 
     # First pass
     cdef bint is_first_pass = True
-    for i in xrange(n_features):
+    for i in range(n_features):
         if last_n[i] > 0:
             is_first_pass = False
             break
@@ -334,19 +334,19 @@ def _incr_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data,
         return new_mean, new_var, new_n
 
     # Next passes
-    for i in xrange(n_features):
+    for i in range(n_features):
         updated_n[i] = last_n[i] + new_n[i]
         last_over_new_n[i] = last_n[i] / new_n[i]
 
     # Unnormalized stats
-    for i in xrange(n_features):
+    for i in range(n_features):
         last_mean[i] *= last_n[i]
         last_var[i] *= last_n[i]
         new_mean[i] *= new_n[i]
         new_var[i] *= new_n[i]
 
     # Update stats
-    for i in xrange(n_features):
+    for i in range(n_features):
         updated_var[i] = (last_var[i] + new_var[i] +
                           last_over_new_n[i] / updated_n[i] *
                           (last_mean[i] / last_over_new_n[i] - new_mean[i])**2)
@@ -375,10 +375,10 @@ def _inplace_csr_row_normalize_l1(np.ndarray[floating, ndim=1] X_data,
     cdef np.npy_intp i, j
     cdef double sum_
 
-    for i in xrange(n_samples):
+    for i in range(n_samples):
         sum_ = 0.0
 
-        for j in xrange(X_indptr[i], X_indptr[i + 1]):
+        for j in range(X_indptr[i], X_indptr[i + 1]):
             sum_ += fabs(X_data[j])
 
         if sum_ == 0.0:
@@ -386,7 +386,7 @@ def _inplace_csr_row_normalize_l1(np.ndarray[floating, ndim=1] X_data,
             # correctly)
             continue
 
-        for j in xrange(X_indptr[i], X_indptr[i + 1]):
+        for j in range(X_indptr[i], X_indptr[i + 1]):
             X_data[j] /= sum_
 
 
@@ -405,10 +405,10 @@ def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data,
     cdef np.npy_intp i, j
     cdef double sum_
 
-    for i in xrange(n_samples):
+    for i in range(n_samples):
         sum_ = 0.0
 
-        for j in xrange(X_indptr[i], X_indptr[i + 1]):
+        for j in range(X_indptr[i], X_indptr[i + 1]):
             sum_ += (X_data[j] * X_data[j])
 
         if sum_ == 0.0:
@@ -418,7 +418,7 @@ def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data,
 
         sum_ = sqrt(sum_)
 
-        for j in xrange(X_indptr[i], X_indptr[i + 1]):
+        for j in range(X_indptr[i], X_indptr[i + 1]):
             X_data[j] /= sum_
 
 
diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py
index 1b0b0fd68e086..77c1259aa8bf4 100644
--- a/sklearn/utils/tests/test_fast_dict.py
+++ b/sklearn/utils/tests/test_fast_dict.py
@@ -20,7 +20,7 @@ def test_int_float_dict():
     d.append(120, 3.)
     assert_equal(d[120], 3.0)
     assert_equal(len(d), len(keys) + 1)
-    for i in xrange(2000):
+    for i in range(2000):
         d.append(i + 1000, 4.0)
     assert_equal(d[1100], 4.0)
 
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index deef9a802dd1c..d33778c848c2a 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -166,7 +166,7 @@ def test_unique_labels():
     assert_raises(ValueError, unique_labels)
 
     # Multiclass problem
-    assert_array_equal(unique_labels(xrange(10)), np.arange(10))
+    assert_array_equal(unique_labels(range(10)), np.arange(10))
     assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
     assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))
 
@@ -181,7 +181,7 @@ def test_unique_labels():
                        np.arange(3))
 
     # Several arrays passed
-    assert_array_equal(unique_labels([4, 0, 2], xrange(5)),
+    assert_array_equal(unique_labels([4, 0, 2], range(5)),
                        np.arange(5))
     assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)),
                        np.arange(3))

From f01b60fdd811a4e8e37247bd26c894efbc908350 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 20 Nov 2018 18:02:30 -0500
Subject: [PATCH 03/29] removing xrange and more six stuff

---
 .../applications/wikipedia_principal_eigenvector.py  |  5 ++---
 sklearn/base.py                                      |  5 ++---
 sklearn/cluster/bicluster.py                         |  4 +---
 sklearn/cluster/birch.py                             |  1 -
 sklearn/cluster/hierarchical.py                      |  5 +----
 sklearn/cluster/k_means_.py                          |  5 ++---
 sklearn/compose/_column_transformer.py               | 10 +++++-----
 sklearn/compose/tests/test_column_transformer.py     |  2 +-
 sklearn/datasets/openml.py                           |  4 ++--
 sklearn/datasets/svmlight_format.py                  |  2 +-
 sklearn/datasets/tests/test_openml.py                |  4 ++--
 sklearn/decomposition/fastica_.py                    |  2 +-
 sklearn/decomposition/pca.py                         |  2 +-
 sklearn/discriminant_analysis.py                     |  4 ++--
 sklearn/ensemble/forest.py                           |  2 +-
 sklearn/ensemble/gradient_boosting.py                |  6 +++---
 sklearn/ensemble/iforest.py                          |  2 +-
 sklearn/ensemble/partial_dependence.py               |  4 ++--
 sklearn/externals/six.py                             |  4 ++--
 sklearn/feature_extraction/dict_vectorizer.py        |  6 +++---
 sklearn/feature_extraction/text.py                   | 10 +++++-----
 sklearn/feature_selection/from_model.py              |  2 +-
 sklearn/gaussian_process/kernels.py                  |  4 ++--
 sklearn/impute.py                                    |  2 +-
 sklearn/linear_model/base.py                         |  2 +-
 sklearn/linear_model/coordinate_descent.py           |  2 +-
 sklearn/linear_model/least_angle.py                  |  4 ++--
 sklearn/linear_model/logistic.py                     |  4 ++--
 sklearn/manifold/spectral_embedding_.py              |  2 +-
 sklearn/manifold/t_sne.py                            |  4 ++--
 sklearn/metrics/regression.py                        | 12 ++++++------
 sklearn/metrics/scorer.py                            | 10 +++++-----
 sklearn/model_selection/_search.py                   |  4 ++--
 sklearn/preprocessing/_encoders.py                   |  8 ++++----
 sklearn/preprocessing/base.py                        |  2 +-
 sklearn/tree/export.py                               |  4 ++--
 sklearn/tree/tree.py                                 |  2 +-
 sklearn/utils/class_weight.py                        |  4 ++--
 sklearn/utils/multiclass.py                          | 10 +++++-----
 sklearn/utils/validation.py                          |  8 ++++----
 40 files changed, 85 insertions(+), 94 deletions(-)

diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py
index 3c91479f48ec3..27f3844a23094 100644
--- a/examples/applications/wikipedia_principal_eigenvector.py
+++ b/examples/applications/wikipedia_principal_eigenvector.py
@@ -47,8 +47,7 @@
 from joblib import Memory
 
 from sklearn.decomposition import randomized_svd
-from sklearn.externals.six.moves.urllib.request import urlopen
-from sklearn.externals.six import iteritems
+from urllib.request import urlopen
 
 
 print(__doc__)
@@ -173,7 +172,7 @@ def get_adjacency_matrix(redirects_filename, page_links_filename, limit=None):
 # stop after 5M links to make it possible to work in RAM
 X, redirects, index_map = get_adjacency_matrix(
     redirects_filename, page_links_filename, limit=5000000)
-names = dict((i, name) for name, i in iteritems(index_map))
+names = dict((i, name) for name, i in index_map.items())
 
 print("Computing the principal singular vectors using randomized_svd")
 t0 = time()
diff --git a/sklearn/base.py b/sklearn/base.py
index 34998270cea88..6f2eaf062d9be 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -9,7 +9,6 @@
 
 import numpy as np
 from scipy import sparse
-from .externals import six
 from .utils.fixes import signature
 from . import __version__
 
@@ -58,7 +57,7 @@ def clone(estimator, safe=True):
                             % (repr(estimator), type(estimator)))
     klass = estimator.__class__
     new_object_params = estimator.get_params(deep=False)
-    for name, param in six.iteritems(new_object_params):
+    for name, param in new_object_params.items():
         new_object_params[name] = clone(param, safe=False)
     new_object = klass(**new_object_params)
     params_set = new_object.get_params(deep=False)
@@ -97,7 +96,7 @@ def _pprint(params, offset=0, printer=repr):
     params_list = list()
     this_line_length = offset
     line_sep = ',\n' + (1 + offset // 2) * ' '
-    for i, (k, v) in enumerate(sorted(six.iteritems(params))):
+    for i, (k, v) in enumerate(sorted(params.items())):
         if type(v) is float:
             # use str for representing floating point numbers
             # this way we get consistent representation across
diff --git a/sklearn/cluster/bicluster.py b/sklearn/cluster/bicluster.py
index 18260a0f3b1c2..567b9c063f189 100644
--- a/sklearn/cluster/bicluster.py
+++ b/sklearn/cluster/bicluster.py
@@ -14,7 +14,6 @@
 
 from . import KMeans, MiniBatchKMeans
 from ..base import BaseEstimator, BiclusterMixin
-from ..externals import six
 from ..utils import check_random_state
 
 from ..utils.extmath import (make_nonnegative, randomized_svd,
@@ -85,8 +84,7 @@ def _log_normalize(X):
     return L - row_avg - col_avg + avg
 
 
-class BaseSpectral(six.with_metaclass(ABCMeta, BaseEstimator,
-                                      BiclusterMixin)):
+class BaseSpectral(BaseEstimator, BiclusterMixin, metaclass=ABCMeta):
     """Base class for spectral biclustering."""
 
     @abstractmethod
diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
index c08e7862d9b6c..4b5f72ada9ad3 100644
--- a/sklearn/cluster/birch.py
+++ b/sklearn/cluster/birch.py
@@ -11,7 +11,6 @@
 
 from ..metrics.pairwise import euclidean_distances
 from ..base import TransformerMixin, ClusterMixin, BaseEstimator
-from ..externals.six.moves import xrange
 from ..utils import check_array
 from ..utils.extmath import row_norms, safe_sparse_dot
 from ..utils.validation import check_is_fitted
diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
index 2a44aa81912a1..37c9550d94d63 100644
--- a/sklearn/cluster/hierarchical.py
+++ b/sklearn/cluster/hierarchical.py
@@ -15,7 +15,6 @@
 from scipy.sparse.csgraph import connected_components
 
 from ..base import BaseEstimator, ClusterMixin
-from ..externals import six
 from ..metrics.pairwise import paired_distances, pairwise_distances
 from ..utils import check_array
 from ..utils.validation import check_memory
@@ -24,8 +23,6 @@
 from ._feature_agglomeration import AgglomerationTransform
 from ..utils.fast_dict import IntFloatDict
 
-from ..externals.six.moves import xrange
-
 ###############################################################################
 # For non fully-connected graphs
 
@@ -274,7 +271,7 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False):
     inertia = np.empty(len(coord_row), dtype=np.float64, order='C')
     _hierarchical.compute_ward_dist(moments_1, moments_2, coord_row, coord_col,
                                     inertia)
-    inertia = list(six.moves.zip(inertia, coord_row, coord_col))
+    inertia = list(zip(inertia, coord_row, coord_col))
     heapify(inertia)
 
     # prepare the main fields
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index e0d520f09fd18..ea31f042c0419 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -32,7 +32,6 @@
 from ..utils._joblib import Parallel
 from ..utils._joblib import delayed
 from ..utils._joblib import effective_n_jobs
-from ..externals.six import string_types
 from ..exceptions import ConvergenceWarning
 from . import _k_means
 from ._k_means_elkan import k_means_elkan
@@ -743,10 +742,10 @@ def _init_centroids(X, k, init, random_state=None, x_squared_norms=None,
         raise ValueError(
             "n_samples=%d should be larger than k=%d" % (n_samples, k))
 
-    if isinstance(init, string_types) and init == 'k-means++':
+    if isinstance(init, str) and init == 'k-means++':
         centers = _k_init(X, k, random_state=random_state,
                           x_squared_norms=x_squared_norms)
-    elif isinstance(init, string_types) and init == 'random':
+    elif isinstance(init, str) and init == 'random':
         seeds = random_state.permutation(n_samples)[:k]
         centers = X[seeds]
     elif hasattr(init, '__array__'):
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 68b9c65e42299..29b11a5a445a7 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -545,7 +545,7 @@ def _check_key_type(key, superclass):
     ----------
     key : scalar, list, slice, array-like
         The column specification to check
-    superclass : int or six.string_types
+    superclass : int or six.str
         The type for which to check the `key`
 
     """
@@ -560,7 +560,7 @@ def _check_key_type(key, superclass):
         if superclass is int:
             return key.dtype.kind == 'i'
         else:
-            # superclass = six.string_types
+            # superclass = six.str
             return key.dtype.kind in ('O', 'U', 'S')
     return False
 
@@ -589,7 +589,7 @@ def _get_column(X, key):
     # check whether we have string column names or integers
     if _check_key_type(key, int):
         column_names = False
-    elif _check_key_type(key, six.string_types):
+    elif _check_key_type(key, six.str):
         column_names = True
     elif hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_):
         # boolean mask
@@ -635,13 +635,13 @@ def _get_column_indices(X, key):
         else:
             return list(key)
 
-    elif _check_key_type(key, six.string_types):
+    elif _check_key_type(key, six.str):
         try:
             all_columns = list(X.columns)
         except AttributeError:
             raise ValueError("Specifying the columns using strings is only "
                              "supported for pandas DataFrames")
-        if isinstance(key, six.string_types):
+        if isinstance(key, six.str):
             columns = [key]
         elif isinstance(key, slice):
             start, stop = key.start, key.stop
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index b7631336ef3dd..fce40c11422a6 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -785,7 +785,7 @@ def test_column_transformer_remainder_numpy(key):
 def test_column_transformer_remainder_pandas(key):
     # test different ways that columns are specified with passthrough
     pd = pytest.importorskip('pandas')
-    if isinstance(key, six.string_types) and key == 'pd-index':
+    if isinstance(key, six.str) and key == 'pd-index':
         key = pd.Index(['first'])
 
     X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 1aec0aafab140..d1d240cad8301 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -21,7 +21,7 @@
 
 from sklearn.externals import _arff
 from .base import get_data_home
-from ..externals.six import string_types, PY2, BytesIO
+from ..externals.six import str, PY2, BytesIO
 from ..externals.six.moves.urllib.error import HTTPError
 from ..utils import Bunch
 
@@ -567,7 +567,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         # see issue: https://github.com/openml/OpenML/issues/768)
         target_column = [feature['name'] for feature in features_list
                          if feature['is_target'] == 'true']
-    elif isinstance(target_column, string_types):
+    elif isinstance(target_column, str):
         # for code-simplicity, make target_column by default a list
         target_column = [target_column]
     elif target_column is None:
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index dfcc5431ebd97..81c0d7c1ac1c5 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -159,7 +159,7 @@ def get_data():
 def _gen_open(f):
     if isinstance(f, int):  # file descriptor
         return io.open(f, "rb", closefd=False)
-    elif not isinstance(f, six.string_types):
+    elif not isinstance(f, six.str):
         raise TypeError("expected {str, int, file-like}, got %s" % type(f))
 
     _, ext = os.path.splitext(f)
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index fdf6506a30405..c192d50d9caf5 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -17,7 +17,7 @@
                                      _retry_with_clean_cache)
 from sklearn.utils.testing import (assert_warns_message,
                                    assert_raise_message)
-from sklearn.externals.six import string_types
+from sklearn.externals.six import str
 from sklearn.externals.six.moves.urllib.error import HTTPError
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
@@ -101,7 +101,7 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version,
     assert data_by_id.target.dtype == expected_target_dtype
     assert len(data_by_id.feature_names) == expected_features
     for feature in data_by_id.feature_names:
-        assert isinstance(feature, string_types)
+        assert isinstance(feature, str)
 
     # TODO: pass in a list of expected nominal features
     for feature, categories in data_by_id.categories.items():
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index 7de65c9829fd8..c7e5fbe6fd685 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -286,7 +286,7 @@ def my_g(x):
         def g(x, fun_args):
             return fun(x, **fun_args)
     else:
-        exc = ValueError if isinstance(fun, six.string_types) else TypeError
+        exc = ValueError if isinstance(fun, six.str) else TypeError
         raise exc("Unknown function %r;"
                   " should be one of 'logcosh', 'exp', 'cube' or callable"
                   % fun)
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index db183af45af0c..eff2a6653401c 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -479,7 +479,7 @@ def _fit_truncated(self, X, n_components, svd_solver):
         """
         n_samples, n_features = X.shape
 
-        if isinstance(n_components, six.string_types):
+        if isinstance(n_components, six.str):
             raise ValueError("n_components=%r cannot be a string "
                              "with svd_solver='%s'"
                              % (n_components, svd_solver))
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index 0418ebb62dc9a..7f8d9808cc0e2 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -13,7 +13,7 @@
 import warnings
 import numpy as np
 from scipy import linalg
-from .externals.six import string_types
+from .externals.six import str
 from .externals.six.moves import xrange
 
 from .base import BaseEstimator, TransformerMixin, ClassifierMixin
@@ -49,7 +49,7 @@ def _cov(X, shrinkage=None):
         Estimated covariance matrix.
     """
     shrinkage = "empirical" if shrinkage is None else shrinkage
-    if isinstance(shrinkage, string_types):
+    if isinstance(shrinkage, str):
         if shrinkage == 'auto':
             sc = StandardScaler()  # standardize features
             X = sc.fit_transform(X)
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 33d166d00969e..db793a494e594 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -493,7 +493,7 @@ def _validate_y_class_weight(self, y):
 
         if self.class_weight is not None:
             valid_presets = ('balanced', 'balanced_subsample')
-            if isinstance(self.class_weight, six.string_types):
+            if isinstance(self.class_weight, six.str):
                 if self.class_weight not in valid_presets:
                     raise ValueError('Valid presets for class_weight include '
                                      '"balanced" and "balanced_subsample". Given "%s".'
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index b15ed82833fd6..9dcdcfa934881 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1234,7 +1234,7 @@ def _check_params(self):
                              "was %r" % self.subsample)
 
         if self.init is not None:
-            if isinstance(self.init, six.string_types):
+            if isinstance(self.init, six.str):
                 if self.init not in INIT_ESTIMATORS:
                     raise ValueError('init="%s" is not supported' % self.init)
             else:
@@ -1248,7 +1248,7 @@ def _check_params(self):
             raise ValueError("alpha must be in (0.0, 1.0) but "
                              "was %r" % self.alpha)
 
-        if isinstance(self.max_features, six.string_types):
+        if isinstance(self.max_features, six.str):
             if self.max_features == "auto":
                 # if is_classification
                 if self.n_classes_ > 1:
@@ -1293,7 +1293,7 @@ def _init_state(self):
 
         if self.init is None:
             self.init_ = self.loss_.init_estimator()
-        elif isinstance(self.init, six.string_types):
+        elif isinstance(self.init, six.str):
             self.init_ = INIT_ESTIMATORS[self.init]()
         else:
             self.init_ = self.init
diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py
index bb66e55ed32df..61000b05e79dc 100644
--- a/sklearn/ensemble/iforest.py
+++ b/sklearn/ensemble/iforest.py
@@ -242,7 +242,7 @@ def fit(self, X, y=None, sample_weight=None):
         # ensure that max_sample is in [1, n_samples]:
         n_samples = X.shape[0]
 
-        if isinstance(self.max_samples, six.string_types):
+        if isinstance(self.max_samples, six.str):
             if self.max_samples == 'auto':
                 max_samples = min(256, n_samples)
             else:
diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py
index 2d669d413c054..b9ec635278ce4 100644
--- a/sklearn/ensemble/partial_dependence.py
+++ b/sklearn/ensemble/partial_dependence.py
@@ -275,7 +275,7 @@ def plot_partial_dependence(gbrt, X, features, feature_names=None,
         feature_names = feature_names.tolist()
 
     def convert_feature(fx):
-        if isinstance(fx, six.string_types):
+        if isinstance(fx, six.str):
             try:
                 fx = feature_names.index(fx)
             except ValueError:
@@ -285,7 +285,7 @@ def convert_feature(fx):
     # convert features into a seq of int tuples
     tmp_features = []
     for fxs in features:
-        if isinstance(fxs, (numbers.Integral,) + six.string_types):
+        if isinstance(fxs, (numbers.Integral,) + six.str):
             fxs = (fxs,)
         try:
             fxs = np.array([convert_feature(fx) for fx in fxs], dtype=np.int32)
diff --git a/sklearn/externals/six.py b/sklearn/externals/six.py
index 85898ec71275f..5b7dc61f98e7e 100644
--- a/sklearn/externals/six.py
+++ b/sklearn/externals/six.py
@@ -33,7 +33,7 @@
 PY3 = sys.version_info[0] == 3
 
 if PY3:
-    string_types = str,
+    str = str,
     integer_types = int,
     class_types = type,
     text_type = str
@@ -41,7 +41,7 @@
 
     MAXSIZE = sys.maxsize
 else:
-    string_types = basestring,
+    str = basestring,
     integer_types = (int, long)
     class_types = (type, types.ClassType)
     text_type = unicode
diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py
index f37241975645f..6393914f76e7a 100644
--- a/sklearn/feature_extraction/dict_vectorizer.py
+++ b/sklearn/feature_extraction/dict_vectorizer.py
@@ -119,7 +119,7 @@ def fit(self, X, y=None):
 
         for x in X:
             for f, v in six.iteritems(x):
-                if isinstance(v, six.string_types):
+                if isinstance(v, six.str):
                     f = "%s%s%s" % (f, self.separator, v)
                 if f not in vocab:
                     feature_names.append(f)
@@ -165,7 +165,7 @@ def _transform(self, X, fitting):
         # same time
         for x in X:
             for f, v in six.iteritems(x):
-                if isinstance(v, six.string_types):
+                if isinstance(v, six.str):
                     f = "%s%s%s" % (f, self.separator, v)
                     v = 1
                 if f in vocab:
@@ -299,7 +299,7 @@ def transform(self, X):
 
             for i, x in enumerate(X):
                 for f, v in six.iteritems(x):
-                    if isinstance(v, six.string_types):
+                    if isinstance(v, six.str):
                         f = "%s%s%s" % (f, self.separator, v)
                         v = 1
                     try:
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 1162b942e5515..d5a66f6e1462b 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -107,7 +107,7 @@ def strip_tags(s):
 def _check_stop_list(stop):
     if stop == "english":
         return ENGLISH_STOP_WORDS
-    elif isinstance(stop, six.string_types):
+    elif isinstance(stop, six.str):
         raise ValueError("not a built-in stop list: %s" % stop)
     elif stop is None:
         return None
@@ -588,7 +588,7 @@ def fit(self, X, y=None):
             Training data.
         """
         # triggers a parameter validation
-        if isinstance(X, six.string_types):
+        if isinstance(X, six.str):
             raise ValueError(
                 "Iterable over raw text documents expected, "
                 "string object received.")
@@ -613,7 +613,7 @@ def transform(self, X):
         X : scipy.sparse matrix, shape = (n_samples, self.n_features)
             Document-term matrix.
         """
-        if isinstance(X, six.string_types):
+        if isinstance(X, six.str):
             raise ValueError(
                 "Iterable over raw text documents expected, "
                 "string object received.")
@@ -1018,7 +1018,7 @@ def fit_transform(self, raw_documents, y=None):
         # We intentionally don't call the transform method to make
         # fit_transform overridable without unwanted side effects in
         # TfidfVectorizer.
-        if isinstance(raw_documents, six.string_types):
+        if isinstance(raw_documents, six.str):
             raise ValueError(
                 "Iterable over raw text documents expected, "
                 "string object received.")
@@ -1073,7 +1073,7 @@ def transform(self, raw_documents):
         X : sparse matrix, [n_samples, n_features]
             Document-term matrix.
         """
-        if isinstance(raw_documents, six.string_types):
+        if isinstance(raw_documents, six.str):
             raise ValueError(
                 "Iterable over raw text documents expected, "
                 "string object received.")
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index 3e2efdbeb1e73..38455d1f9224c 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -48,7 +48,7 @@ def _calculate_threshold(estimator, importances, threshold):
         else:
             threshold = "mean"
 
-    if isinstance(threshold, six.string_types):
+    if isinstance(threshold, six.str):
         if "*" in threshold:
             scale, reference = threshold.split("*")
             scale = float(scale.strip())
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index 79d913bca1cb5..b48113c4a75ec 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -91,7 +91,7 @@ class Hyperparameter(namedtuple('Hyperparameter',
     __slots__ = ()
 
     def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):
-        if not isinstance(bounds, six.string_types) or bounds != "fixed":
+        if not isinstance(bounds, six.str) or bounds != "fixed":
             bounds = np.atleast_2d(bounds)
             if n_elements > 1:  # vector-valued parameter
                 if bounds.shape[0] == 1:
@@ -102,7 +102,7 @@ def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):
                                      % (name, n_elements, bounds.shape[0]))
 
         if fixed is None:
-            fixed = isinstance(bounds, six.string_types) and bounds == "fixed"
+            fixed = isinstance(bounds, six.str) and bounds == "fixed"
         return super(Hyperparameter, cls).__new__(
             cls, name, value_type, bounds, n_elements, fixed)
 
diff --git a/sklearn/impute.py b/sklearn/impute.py
index a10f6c9eb947f..aa5fa90030f17 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -565,7 +565,7 @@ def fit(self, X, y=None):
             raise ValueError("'features' has to be either 'missing-only' or "
                              "'all'. Got {} instead.".format(self.features))
 
-        if not ((isinstance(self.sparse, six.string_types) and
+        if not ((isinstance(self.sparse, six.str) and
                 self.sparse == "auto") or isinstance(self.sparse, bool)):
             raise ValueError("'sparse' has to be a boolean or 'auto'. "
                              "Got {!r} instead.".format(self.sparse))
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index 51abed0384806..539473b57cf4b 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -519,7 +519,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
         Xy = None
 
     # precompute if n_samples > n_features
-    if isinstance(precompute, six.string_types) and precompute == 'auto':
+    if isinstance(precompute, six.str) and precompute == 'auto':
         precompute = (n_samples > n_features)
 
     if precompute is True:
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 2d046332889d3..d70f85a6d3caf 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -698,7 +698,7 @@ def fit(self, X, y, check_input=True):
                           "well. You are advised to use the LinearRegression "
                           "estimator", stacklevel=2)
 
-        if isinstance(self.precompute, six.string_types):
+        if isinstance(self.precompute, six.str):
             raise ValueError('precompute should be one of True, False or'
                              ' array-like. Got %r' % self.precompute)
 
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index d0b5f6ea25720..3c5b192fd709c 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -25,7 +25,7 @@
 from ..exceptions import ConvergenceWarning
 from ..utils._joblib import Parallel, delayed
 from ..externals.six.moves import xrange
-from ..externals.six import string_types
+from ..externals.six import str
 
 solve_triangular_args = {'check_finite': False}
 
@@ -181,7 +181,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
             # and allows to easily swap columns
             X = X.copy('F')
 
-    elif isinstance(Gram, string_types) and Gram == 'auto' or Gram is True:
+    elif isinstance(Gram, str) and Gram == 'auto' or Gram is True:
         if Gram is True or X.shape[0] > X.shape[1]:
             Gram = np.dot(X.T, X)
         else:
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 2f24c76397ccd..e1cfbc5cc246f 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -976,7 +976,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
 
     scores = list()
 
-    if isinstance(scoring, six.string_types):
+    if isinstance(scoring, six.str):
         scoring = get_scorer(scoring)
     for w in coefs:
         if multi_class == 'ovr':
@@ -1919,7 +1919,7 @@ def score(self, X, y, sample_weight=None):
                           "This warning will disappear in version 0.22.",
                           ChangedBehaviorWarning)
         scoring = self.scoring or 'accuracy'
-        if isinstance(scoring, six.string_types):
+        if isinstance(scoring, six.str):
             scoring = get_scorer(scoring)
 
         return scoring(self, X, y, sample_weight=sample_weight)
diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
index d0c226b51ca5e..442b79c6a399e 100644
--- a/sklearn/manifold/spectral_embedding_.py
+++ b/sklearn/manifold/spectral_embedding_.py
@@ -510,7 +510,7 @@ def fit(self, X, y=None):
         X = check_array(X, ensure_min_samples=2, estimator=self)
 
         random_state = check_random_state(self.random_state)
-        if isinstance(self.affinity, six.string_types):
+        if isinstance(self.affinity, six.str):
             if self.affinity not in set(("nearest_neighbors", "rbf",
                                          "precomputed")):
                 raise ValueError(("%s is not a valid affinity. Expected "
diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
index fe7268515abfd..649b44f0916b7 100644
--- a/sklearn/manifold/t_sne.py
+++ b/sklearn/manifold/t_sne.py
@@ -25,7 +25,7 @@
 from ..metrics.pairwise import pairwise_distances
 from . import _utils
 from . import _barnes_hut_tsne
-from ..externals.six import string_types
+from ..externals.six import str
 
 
 MACHINE_EPSILON = np.finfo(np.double).eps
@@ -671,7 +671,7 @@ def _fit(self, X, skip_num_points=0):
         if self.angle < 0.0 or self.angle > 1.0:
             raise ValueError("'angle' must be between 0.0 - 1.0")
         if self.metric == "precomputed":
-            if isinstance(self.init, string_types) and self.init == 'pca':
+            if isinstance(self.init, str) and self.init == 'pca':
                 raise ValueError("The parameter init=\"pca\" cannot be "
                                  "used with metric=\"precomputed\".")
             if X.shape[0] != X.shape[1]:
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index f4854ff244bc4..22e97d399fd96 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -27,7 +27,7 @@
 
 from ..utils.validation import check_array, check_consistent_length
 from ..utils.validation import column_or_1d
-from ..externals.six import string_types
+from ..externals.six import str
 
 
 __ALL__ = [
@@ -90,7 +90,7 @@ def _check_reg_targets(y_true, y_pred, multioutput):
     n_outputs = y_true.shape[1]
     allowed_multioutput_str = ('raw_values', 'uniform_average',
                                'variance_weighted')
-    if isinstance(multioutput, string_types):
+    if isinstance(multioutput, str):
         if multioutput not in allowed_multioutput_str:
             raise ValueError("Allowed 'multioutput' string values are {}. "
                              "You provided multioutput={!r}".format(
@@ -172,7 +172,7 @@ def mean_absolute_error(y_true, y_pred,
     check_consistent_length(y_true, y_pred, sample_weight)
     output_errors = np.average(np.abs(y_pred - y_true),
                                weights=sample_weight, axis=0)
-    if isinstance(multioutput, string_types):
+    if isinstance(multioutput, str):
         if multioutput == 'raw_values':
             return output_errors
         elif multioutput == 'uniform_average':
@@ -241,7 +241,7 @@ def mean_squared_error(y_true, y_pred,
     check_consistent_length(y_true, y_pred, sample_weight)
     output_errors = np.average((y_true - y_pred) ** 2, axis=0,
                                weights=sample_weight)
-    if isinstance(multioutput, string_types):
+    if isinstance(multioutput, str):
         if multioutput == 'raw_values':
             return output_errors
         elif multioutput == 'uniform_average':
@@ -431,7 +431,7 @@ def explained_variance_score(y_true, y_pred,
     output_scores[valid_score] = 1 - (numerator[valid_score] /
                                       denominator[valid_score])
     output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
-    if isinstance(multioutput, string_types):
+    if isinstance(multioutput, str):
         if multioutput == 'raw_values':
             # return scores individually
             return output_scores
@@ -555,7 +555,7 @@ def r2_score(y_true, y_pred, sample_weight=None,
     # arbitrary set to zero to avoid -inf scores, having a constant
     # y_true is not interesting for scoring a regression anyway
     output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
-    if isinstance(multioutput, string_types):
+    if isinstance(multioutput, str):
         if multioutput == 'raw_values':
             # return scores individually
             return output_scores
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 3907e2439dfeb..d5dfac2ef65e3 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -224,7 +224,7 @@ def get_scorer(scoring):
     scorer : callable
         The scorer.
     """
-    if isinstance(scoring, six.string_types):
+    if isinstance(scoring, six.str):
         try:
             scorer = SCORERS[scoring]
         except KeyError:
@@ -269,7 +269,7 @@ def check_scoring(estimator, scoring=None, allow_none=False):
     if not hasattr(estimator, 'fit'):
         raise TypeError("estimator should be an estimator implementing "
                         "'fit' method, %r was passed" % estimator)
-    if isinstance(scoring, six.string_types):
+    if isinstance(scoring, six.str):
         return get_scorer(scoring)
     elif callable(scoring):
         # Heuristic to ensure user has not passed a metric
@@ -339,7 +339,7 @@ def _check_multimetric_scoring(estimator, scoring=None):
         False if scorer is None/str/callable
     """
     if callable(scoring) or scoring is None or isinstance(scoring,
-                                                          six.string_types):
+                                                          six.str):
         scorers = {"score": check_scoring(estimator, scoring=scoring)}
         return scorers, False
     else:
@@ -365,7 +365,7 @@ def _check_multimetric_scoring(estimator, scoring=None):
                 raise ValueError(err_msg + "Duplicate elements were found in"
                                  " the given list. %r" % repr(scoring))
             elif len(keys) > 0:
-                if not all(isinstance(k, six.string_types) for k in keys):
+                if not all(isinstance(k, six.str) for k in keys):
                     if any(callable(k) for k in keys):
                         raise ValueError(err_msg +
                                          "One or more of the elements were "
@@ -385,7 +385,7 @@ def _check_multimetric_scoring(estimator, scoring=None):
 
         elif isinstance(scoring, dict):
             keys = set(scoring)
-            if not all(isinstance(k, six.string_types) for k in keys):
+            if not all(isinstance(k, six.str) for k in keys):
                 raise ValueError("Non-string types were found in the keys of "
                                  "the given dict. scoring=%r" % repr(scoring))
             if len(keys) == 0:
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index d08d96e31c6da..6c54afb7a9ee8 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -371,7 +371,7 @@ def _check_param_grid(param_grid):
             if isinstance(v, np.ndarray) and v.ndim > 1:
                 raise ValueError("Parameter array should be one-dimensional.")
 
-            if (isinstance(v, six.string_types) or
+            if (isinstance(v, six.str) or
                     not isinstance(v, (np.ndarray, Sequence))):
                 raise ValueError("Parameter values for parameter ({0}) need "
                                  "to be a sequence(but not a string) or"
@@ -619,7 +619,7 @@ def fit(self, X, y=None, groups=None, **fit_params):
 
         if self.multimetric_:
             if self.refit is not False and (
-                    not isinstance(self.refit, six.string_types) or
+                    not isinstance(self.refit, six.str) or
                     # This will work for both dict / list (tuple)
                     self.refit not in scorers):
                 raise ValueError("For multi-metric scoring, the parameter "
diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index 089488bdbaad9..cffe18e6f17e8 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -371,7 +371,7 @@ def _handle_deprecations(self, X):
 
         # if user specified categorical_features -> always use legacy mode
         if self.categorical_features is not None:
-            if (isinstance(self.categorical_features, six.string_types)
+            if (isinstance(self.categorical_features, six.str)
                     and self.categorical_features == 'all'):
                 warnings.warn(
                     "The 'categorical_features' keyword is deprecated in "
@@ -438,7 +438,7 @@ def _legacy_fit_transform(self, X):
                              "be able to use arbitrary integer values as "
                              "category identifiers.")
         n_samples, n_features = X.shape
-        if (isinstance(self._n_values, six.string_types) and
+        if (isinstance(self._n_values, six.str) and
                 self._n_values == 'auto'):
             n_values = np.max(X, axis=0) + 1
         elif isinstance(self._n_values, numbers.Integral):
@@ -473,7 +473,7 @@ def _legacy_fit_transform(self, X):
                                 shape=(n_samples, indices[-1]),
                                 dtype=self.dtype).tocsr()
 
-        if (isinstance(self._n_values, six.string_types) and
+        if (isinstance(self._n_values, six.str) and
                 self._n_values == 'auto'):
             mask = np.array(out.sum(axis=0)).ravel() != 0
             active_features = np.where(mask)[0]
@@ -553,7 +553,7 @@ def _legacy_transform(self, X):
         out = sparse.coo_matrix((data, (row_indices, column_indices)),
                                 shape=(n_samples, indices[-1]),
                                 dtype=self.dtype).tocsr()
-        if (isinstance(self._n_values, six.string_types) and
+        if (isinstance(self._n_values, six.str) and
                 self._n_values == 'auto'):
             out = out[:, self._active_features_]
 
diff --git a/sklearn/preprocessing/base.py b/sklearn/preprocessing/base.py
index 4b0cdbc35e1e5..45e39a2bfcbc5 100644
--- a/sklearn/preprocessing/base.py
+++ b/sklearn/preprocessing/base.py
@@ -48,7 +48,7 @@ def _transform_selected(X, transform, dtype, selected="all", copy=True,
         raise ValueError("The retain_order option can only be set to True "
                          "for dense matrices.")
 
-    if isinstance(selected, six.string_types) and selected == "all":
+    if isinstance(selected, six.str) and selected == "all":
         return transform(X)
 
     if len(selected) == 0:
diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index fe127d77302b6..81900da12e1a3 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -281,7 +281,7 @@ def node_to_str(self, tree, node_id, criterion):
         if self.impurity:
             if isinstance(criterion, _criterion.FriedmanMSE):
                 criterion = "friedman_mse"
-            elif not isinstance(criterion, six.string_types):
+            elif not isinstance(criterion, six.str):
                 criterion = "impurity"
             if labels:
                 node_string += '%s = ' % criterion
@@ -755,7 +755,7 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None,
     own_file = False
     return_string = False
     try:
-        if isinstance(out_file, six.string_types):
+        if isinstance(out_file, six.str):
             if six.PY3:
                 out_file = open(out_file, "w", encoding="utf-8")
             else:
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index faa83efbb7703..f31858979cf4f 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -219,7 +219,7 @@ def fit(self, X, y, sample_weight=None, check_input=True,
 
         min_samples_split = max(min_samples_split, 2 * min_samples_leaf)
 
-        if isinstance(self.max_features, six.string_types):
+        if isinstance(self.max_features, six.str):
             if self.max_features == "auto":
                 if is_classification:
                     max_features = max(1, int(np.sqrt(self.n_features_)))
diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py
index cd2a91601cf9b..88afa758f69c9 100644
--- a/sklearn/utils/class_weight.py
+++ b/sklearn/utils/class_weight.py
@@ -114,12 +114,12 @@ def compute_sample_weight(class_weight, y, indices=None):
         y = np.reshape(y, (-1, 1))
     n_outputs = y.shape[1]
 
-    if isinstance(class_weight, six.string_types):
+    if isinstance(class_weight, six.str):
         if class_weight not in ['balanced']:
             raise ValueError('The only valid preset for class_weight is '
                              '"balanced". Given "%s".' % class_weight)
     elif (indices is not None and
-          not isinstance(class_weight, six.string_types)):
+          not isinstance(class_weight, six.str)):
         raise ValueError('The only valid class_weight for subsampling is '
                          '"balanced". Given "%s".' % class_weight)
     elif n_outputs > 1:
diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index f4d28ec227bab..3c25de040e2a2 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -16,7 +16,7 @@
 
 import numpy as np
 
-from ..externals.six import string_types
+from ..externals.six import str
 from ..utils.fixes import _Sequence as Sequence
 from .validation import check_array
 
@@ -98,7 +98,7 @@ def unique_labels(*ys):
     ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))
 
     # Check that we don't mix string type with number type
-    if (len(set(isinstance(label, string_types) for label in ys_labels)) > 1):
+    if (len(set(isinstance(label, str) for label in ys_labels)) > 1):
         raise ValueError("Mix of label input types (string and number)")
 
     return np.array(sorted(ys_labels))
@@ -236,7 +236,7 @@ def type_of_target(y):
     'multilabel-indicator'
     """
     valid = ((isinstance(y, (Sequence, spmatrix)) or hasattr(y, '__array__'))
-             and not isinstance(y, string_types))
+             and not isinstance(y, str))
 
     if not valid:
         raise ValueError('Expected array-like (array or non-string sequence), '
@@ -258,7 +258,7 @@ def type_of_target(y):
     # The old sequence of sequences format
     try:
         if (not hasattr(y[0], '__array__') and isinstance(y[0], Sequence)
-                and not isinstance(y[0], string_types)):
+                and not isinstance(y[0], str)):
             raise ValueError('You appear to be using a legacy multi-label data'
                              ' representation. Sequence of sequences are no'
                              ' longer supported; use a binary array or sparse'
@@ -268,7 +268,7 @@ def type_of_target(y):
 
     # Invalid inputs
     if y.ndim > 2 or (y.dtype == object and len(y) and
-                      not isinstance(y.flat[0], string_types)):
+                      not isinstance(y.flat[0], str)):
         return 'unknown'  # [[[1, 2]]] or [obj_1] and not ["label_1"]
 
     if y.ndim == 2 and y.shape[1] == 0:
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 3181b925ba83a..cf5588a5506d5 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -205,7 +205,7 @@ def check_memory(memory):
         If ``memory`` is not joblib.Memory-like.
     """
 
-    if memory is None or isinstance(memory, six.string_types):
+    if memory is None or isinstance(memory, six.str):
         if LooseVersion(joblib_version) < '0.12':
             memory = Memory(cachedir=memory, verbose=0)
         else:
@@ -308,7 +308,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy,
 
     changed_format = False
 
-    if isinstance(accept_sparse, six.string_types):
+    if isinstance(accept_sparse, six.str):
         accept_sparse = [accept_sparse]
 
     # Indices dtype validation
@@ -467,7 +467,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
     array_orig = array
 
     # store whether originally we wanted numeric dtype
-    dtype_numeric = isinstance(dtype, six.string_types) and dtype == "numeric"
+    dtype_numeric = isinstance(dtype, six.str) and dtype == "numeric"
 
     dtype_orig = getattr(array, "dtype", None)
     if not hasattr(dtype_orig, 'kind'):
@@ -501,7 +501,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
                          '. Got {!r} instead'.format(force_all_finite))
 
     if estimator is not None:
-        if isinstance(estimator, six.string_types):
+        if isinstance(estimator, six.str):
             estimator_name = estimator
         else:
             estimator_name = estimator.__class__.__name__

From d0dd1d1eb49ac3b6a81553af38f0c4a8f7bb7bdb Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 20 Nov 2018 18:12:26 -0500
Subject: [PATCH 04/29] remove string_types and six imports

---
 examples/ensemble/plot_adaboost_multiclass.py      |  2 --
 sklearn/cluster/mean_shift_.py                     |  3 +--
 sklearn/cluster/tests/test_k_means.py              |  2 +-
 sklearn/cluster/tests/test_spectral.py             |  4 ++--
 sklearn/compose/_column_transformer.py             | 11 +++++------
 sklearn/compose/tests/test_column_transformer.py   |  2 +-
 sklearn/cross_decomposition/pls_.py                |  1 -
 sklearn/datasets/kddcup99.py                       |  1 -
 sklearn/datasets/openml.py                         |  2 +-
 sklearn/datasets/samples_generator.py              |  1 -
 sklearn/datasets/svmlight_format.py                |  6 ++----
 sklearn/datasets/tests/test_lfw.py                 |  1 -
 sklearn/decomposition/base.py                      |  1 -
 sklearn/decomposition/factor_analysis.py           |  1 -
 sklearn/decomposition/fastica_.py                  |  5 ++---
 sklearn/decomposition/pca.py                       |  4 +---
 sklearn/decomposition/tests/test_online_lda.py     |  1 -
 sklearn/discriminant_analysis.py                   |  1 -
 sklearn/ensemble/forest.py                         |  3 +--
 sklearn/ensemble/gradient_boosting.py              |  7 +++----
 sklearn/ensemble/iforest.py                        |  3 +--
 sklearn/ensemble/partial_dependence.py             |  6 ++----
 sklearn/ensemble/tests/test_gradient_boosting.py   |  5 +++--
 sklearn/ensemble/weight_boosting.py                |  4 +---
 sklearn/feature_extraction/dict_vectorizer.py      |  8 +++-----
 sklearn/feature_extraction/text.py                 | 12 +++++-------
 sklearn/feature_selection/base.py                  |  1 -
 sklearn/feature_selection/from_model.py            |  3 +--
 sklearn/feature_selection/tests/test_rfe.py        |  2 +-
 sklearn/gaussian_process/kernels.py                |  5 ++---
 sklearn/impute.py                                  |  6 +-----
 sklearn/linear_model/base.py                       |  3 +--
 sklearn/linear_model/coordinate_descent.py         |  4 +---
 sklearn/linear_model/least_angle.py                |  2 --
 sklearn/linear_model/logistic.py                   |  5 ++---
 sklearn/linear_model/ridge.py                      |  1 -
 sklearn/linear_model/stochastic_gradient.py        |  1 -
 sklearn/linear_model/tests/test_least_angle.py     |  2 +-
 sklearn/linear_model/theil_sen.py                  |  1 -
 sklearn/manifold/spectral_embedding_.py            |  3 +--
 sklearn/manifold/tests/test_t_sne.py               |  2 +-
 sklearn/metrics/scorer.py                          | 11 +++++------
 sklearn/mixture/base.py                            |  1 -
 sklearn/mixture/tests/test_gaussian_mixture.py     |  2 +-
 sklearn/model_selection/_search.py                 |  5 ++---
 sklearn/model_selection/tests/test_search.py       |  3 +--
 sklearn/model_selection/tests/test_split.py        |  3 ---
 sklearn/model_selection/tests/test_validation.py   |  2 +-
 sklearn/multioutput.py                             |  1 -
 sklearn/naive_bayes.py                             |  1 -
 sklearn/neighbors/base.py                          |  1 -
 sklearn/neural_network/multilayer_perceptron.py    |  1 -
 sklearn/neural_network/rbm.py                      |  1 -
 sklearn/neural_network/tests/test_mlp.py           |  2 +-
 sklearn/neural_network/tests/test_rbm.py           |  2 +-
 sklearn/pipeline.py                                |  1 -
 sklearn/preprocessing/_encoders.py                 |  9 ++++-----
 sklearn/preprocessing/base.py                      |  3 +--
 sklearn/preprocessing/data.py                      |  1 -
 sklearn/preprocessing/imputation.py                |  4 ----
 sklearn/preprocessing/label.py                     |  4 ----
 sklearn/preprocessing/tests/test_discretization.py |  1 -
 sklearn/random_projection.py                       |  3 +--
 sklearn/semi_supervised/label_propagation.py       |  1 -
 sklearn/svm/base.py                                |  1 -
 sklearn/tree/export.py                             |  5 ++---
 sklearn/tree/tree.py                               |  3 +--
 sklearn/utils/class_weight.py                      |  5 ++---
 sklearn/utils/extmath.py                           |  1 -
 sklearn/utils/metaestimators.py                    |  1 -
 sklearn/utils/tests/test_estimator_checks.py       |  2 +-
 sklearn/utils/tests/test_fast_dict.py              |  1 -
 sklearn/utils/tests/test_multiclass.py             |  1 -
 sklearn/utils/validation.py                        |  9 ++++-----
 74 files changed, 75 insertions(+), 155 deletions(-)

diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py
index 906df85ccf645..941a2ab731769 100644
--- a/examples/ensemble/plot_adaboost_multiclass.py
+++ b/examples/ensemble/plot_adaboost_multiclass.py
@@ -29,8 +29,6 @@
 #
 # License: BSD 3 clause
 
-from sklearn.externals.six.moves import zip
-
 import matplotlib.pyplot as plt
 
 from sklearn.datasets import make_gaussian_quantiles
diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py
index b6fb0f8f09bd7..89117164a63f8 100644
--- a/sklearn/cluster/mean_shift_.py
+++ b/sklearn/cluster/mean_shift_.py
@@ -18,7 +18,6 @@
 import warnings
 
 from collections import defaultdict
-from ..externals import six
 from ..utils.validation import check_is_fitted
 from ..utils import check_random_state, gen_batches, check_array
 from ..base import BaseEstimator, ClusterMixin
@@ -285,7 +284,7 @@ def get_bin_seeds(X, bin_size, min_bin_freq=1):
         bin_sizes[tuple(binned_point)] += 1
 
     # Select only those bins as seeds which have enough members
-    bin_seeds = np.array([point for point, freq in six.iteritems(bin_sizes) if
+    bin_seeds = np.array([point for point, freq in bin_sizes.items()) if
                           freq >= min_bin_freq], dtype=np.float32)
     if len(bin_seeds) == len(X):
         warnings.warn("Binning data failed with provided bin_size=%f,"
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index cec0fa2897546..0386665610915 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -30,7 +30,7 @@
 from sklearn.cluster.k_means_ import _labels_inertia
 from sklearn.cluster.k_means_ import _mini_batch_step
 from sklearn.datasets.samples_generator import make_blobs
-from sklearn.externals.six.moves import cStringIO as StringIO
+from io import StringIO
 from sklearn.metrics.cluster import homogeneity_score
 
 
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 0c220e7615e67..4f14d3f72c0ae 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from sklearn.externals.six.moves import cPickle
+import pickle
 
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import assert_equal
@@ -52,7 +52,7 @@ def test_spectral_clustering(eigen_solver, assign_labels):
 
         assert adjusted_rand_score(labels, [1, 1, 1, 0, 0, 0, 0]) == 1
 
-        model_copy = cPickle.loads(cPickle.dumps(model))
+        model_copy = pickle.loads(pickle.dumps(model))
         assert model_copy.n_clusters == model.n_clusters
         assert model_copy.eigen_solver == model.eigen_solver
         assert_array_equal(model_copy.labels_, model.labels_)
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 29b11a5a445a7..6ebbb8701658a 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -15,7 +15,6 @@
 
 from ..base import clone, TransformerMixin
 from ..utils._joblib import Parallel, delayed
-from ..externals import six
 from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
 from ..preprocessing import FunctionTransformer
 from ..utils import Bunch
@@ -545,7 +544,7 @@ def _check_key_type(key, superclass):
     ----------
     key : scalar, list, slice, array-like
         The column specification to check
-    superclass : int or six.str
+    superclass : int or str
         The type for which to check the `key`
 
     """
@@ -560,7 +559,7 @@ def _check_key_type(key, superclass):
         if superclass is int:
             return key.dtype.kind == 'i'
         else:
-            # superclass = six.str
+            # superclass = str
             return key.dtype.kind in ('O', 'U', 'S')
     return False
 
@@ -589,7 +588,7 @@ def _get_column(X, key):
     # check whether we have string column names or integers
     if _check_key_type(key, int):
         column_names = False
-    elif _check_key_type(key, six.str):
+    elif _check_key_type(key, str):
         column_names = True
     elif hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_):
         # boolean mask
@@ -635,13 +634,13 @@ def _get_column_indices(X, key):
         else:
             return list(key)
 
-    elif _check_key_type(key, six.str):
+    elif _check_key_type(key, str):
         try:
             all_columns = list(X.columns)
         except AttributeError:
             raise ValueError("Specifying the columns using strings is only "
                              "supported for pandas DataFrames")
-        if isinstance(key, six.str):
+        if isinstance(key, str):
             columns = [key]
         elif isinstance(key, slice):
             start, stop = key.start, key.stop
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index fce40c11422a6..e39b6de4d0859 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -785,7 +785,7 @@ def test_column_transformer_remainder_numpy(key):
 def test_column_transformer_remainder_pandas(key):
     # test different ways that columns are specified with passthrough
     pd = pytest.importorskip('pandas')
-    if isinstance(key, six.str) and key == 'pd-index':
+    if isinstance(key, str) and key == 'pd-index':
         key = pd.Index(['first'])
 
     X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py
index df7cb22b895f7..dcc614677f5f7 100644
--- a/sklearn/cross_decomposition/pls_.py
+++ b/sklearn/cross_decomposition/pls_.py
@@ -17,7 +17,6 @@
 from ..utils.extmath import svd_flip
 from ..utils.validation import check_is_fitted, FLOAT_DTYPES
 from ..exceptions import ConvergenceWarning
-from ..externals import six
 
 __all__ = ['PLSCanonical', 'PLSRegression', 'PLSSVD']
 
diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index 4fac89d7a65df..e460503474a9f 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -21,7 +21,6 @@
 from .base import _fetch_remote
 from .base import get_data_home
 from .base import RemoteFileMetadata
-from ..externals import six
 from ..utils import Bunch
 from ..utils import _joblib
 from ..utils import check_random_state
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index d1d240cad8301..84385a6bb2700 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -574,7 +574,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         target_column = []
     elif not isinstance(target_column, list):
         raise TypeError("Did not recognize type of target_column"
-                        "Should be six.string_type, list or None. Got: "
+                        "Should be string_type, list or None. Got: "
                         "{}".format(type(target_column)))
     data_columns = _valid_data_column_names(features_list,
                                             target_column)
diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
index 17a30b1ec9f37..b8ad97628cbe7 100644
--- a/sklearn/datasets/samples_generator.py
+++ b/sklearn/datasets/samples_generator.py
@@ -17,7 +17,6 @@
 from ..utils import shuffle as util_shuffle
 from ..utils.fixes import _Iterable as Iterable
 from ..utils.random import sample_without_replacement
-from ..externals import six
 map = six.moves.map
 zip = six.moves.zip
 
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index 81c0d7c1ac1c5..f5e2edfe53354 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -23,9 +23,7 @@
 import scipy.sparse as sp
 
 from .. import __version__
-from ..externals import six
-from ..externals.six import u, b
-from ..externals.six.moves import range, zip
+
 from ..utils import check_array, IS_PYPY
 
 if not IS_PYPY:
@@ -159,7 +157,7 @@ def get_data():
 def _gen_open(f):
     if isinstance(f, int):  # file descriptor
         return io.open(f, "rb", closefd=False)
-    elif not isinstance(f, six.str):
+    elif not isinstance(f, str):
         raise TypeError("expected {str, int, file-like}, got %s" % type(f))
 
     _, ext = os.path.splitext(f)
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index be78480b6ce50..422969881fe86 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -14,7 +14,6 @@
 import tempfile
 import numpy as np
 from functools import partial
-from sklearn.externals import six
 from sklearn.externals._pilutil import pillow_installed, imsave
 from sklearn.datasets import fetch_lfw_pairs
 from sklearn.datasets import fetch_lfw_people
diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py
index cc647e2658374..faebbd0c74ac6 100644
--- a/sklearn/decomposition/base.py
+++ b/sklearn/decomposition/base.py
@@ -14,7 +14,6 @@
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import check_array
 from ..utils.validation import check_is_fitted
-from ..externals import six
 from abc import ABCMeta, abstractmethod
 
 
diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py
index 5ede30bca42bc..f5b1834643c5d 100644
--- a/sklearn/decomposition/factor_analysis.py
+++ b/sklearn/decomposition/factor_analysis.py
@@ -26,7 +26,6 @@
 
 
 from ..base import BaseEstimator, TransformerMixin
-from ..externals.six.moves import xrange
 from ..utils import check_array, check_random_state
 from ..utils.extmath import fast_logdet, randomized_svd, squared_norm
 from ..utils.validation import check_is_fitted
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index c7e5fbe6fd685..5995357c4f4a9 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -16,8 +16,7 @@
 
 from ..base import BaseEstimator, TransformerMixin
 from ..exceptions import ConvergenceWarning
-from ..externals import six
-from ..externals.six import moves
+
 from ..utils import check_array, as_float_array, check_random_state
 from ..utils.validation import check_is_fitted
 from ..utils.validation import FLOAT_DTYPES
@@ -286,7 +285,7 @@ def my_g(x):
         def g(x, fun_args):
             return fun(x, **fun_args)
     else:
-        exc = ValueError if isinstance(fun, six.str) else TypeError
+        exc = ValueError if isinstance(fun, str) else TypeError
         raise exc("Unknown function %r;"
                   " should be one of 'logcosh', 'exp', 'cube' or callable"
                   % fun)
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index eff2a6653401c..887c508ead78c 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -19,8 +19,6 @@
 from scipy.sparse import issparse
 from scipy.sparse.linalg import svds
 
-from ..externals import six
-
 from .base import _BasePCA
 from ..utils import check_random_state
 from ..utils import check_array
@@ -479,7 +477,7 @@ def _fit_truncated(self, X, n_components, svd_solver):
         """
         n_samples, n_features = X.shape
 
-        if isinstance(n_components, six.str):
+        if isinstance(n_components, str):
             raise ValueError("n_components=%r cannot be a string "
                              "with svd_solver='%s'"
                              % (n_components, svd_solver))
diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py
index 63346f3dbddbd..b7d95eeb6d899 100644
--- a/sklearn/decomposition/tests/test_online_lda.py
+++ b/sklearn/decomposition/tests/test_online_lda.py
@@ -20,7 +20,6 @@
 from sklearn.utils.testing import if_safe_multiprocessing_with_blas
 
 from sklearn.exceptions import NotFittedError
-from sklearn.externals.six.moves import xrange
 from sklearn.externals.six import StringIO
 
 
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index 7f8d9808cc0e2..184f0b9b530a1 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -14,7 +14,6 @@
 import numpy as np
 from scipy import linalg
 from .externals.six import str
-from .externals.six.moves import xrange
 
 from .base import BaseEstimator, TransformerMixin, ClassifierMixin
 from .linear_model.base import LinearClassifierMixin
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index db793a494e594..a5f539a8653a9 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -51,7 +51,6 @@ class calls the ``fit`` method of each sub-estimator on random samples
 
 from ..base import ClassifierMixin, RegressorMixin
 from ..utils._joblib import Parallel, delayed
-from ..externals import six
 from ..metrics import r2_score
 from ..preprocessing import OneHotEncoder
 from ..tree import (DecisionTreeClassifier, DecisionTreeRegressor,
@@ -493,7 +492,7 @@ def _validate_y_class_weight(self, y):
 
         if self.class_weight is not None:
             valid_presets = ('balanced', 'balanced_subsample')
-            if isinstance(self.class_weight, six.str):
+            if isinstance(self.class_weight, str):
                 if self.class_weight not in valid_presets:
                     raise ValueError('Valid presets for class_weight include '
                                      '"balanced" and "balanced_subsample". Given "%s".'
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 9dcdcfa934881..04d8dab7570a8 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -29,7 +29,6 @@
 from .base import BaseEnsemble
 from ..base import ClassifierMixin
 from ..base import RegressorMixin
-from ..externals import six
 
 from ._gradient_boosting import predict_stages
 from ._gradient_boosting import predict_stage
@@ -1234,7 +1233,7 @@ def _check_params(self):
                              "was %r" % self.subsample)
 
         if self.init is not None:
-            if isinstance(self.init, six.str):
+            if isinstance(self.init, str):
                 if self.init not in INIT_ESTIMATORS:
                     raise ValueError('init="%s" is not supported' % self.init)
             else:
@@ -1248,7 +1247,7 @@ def _check_params(self):
             raise ValueError("alpha must be in (0.0, 1.0) but "
                              "was %r" % self.alpha)
 
-        if isinstance(self.max_features, six.str):
+        if isinstance(self.max_features, str):
             if self.max_features == "auto":
                 # if is_classification
                 if self.n_classes_ > 1:
@@ -1293,7 +1292,7 @@ def _init_state(self):
 
         if self.init is None:
             self.init_ = self.loss_.init_estimator()
-        elif isinstance(self.init, six.str):
+        elif isinstance(self.init, str):
             self.init_ = INIT_ESTIMATORS[self.init]()
         else:
             self.init_ = self.init
diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py
index 61000b05e79dc..9ca7af6ab9eaf 100644
--- a/sklearn/ensemble/iforest.py
+++ b/sklearn/ensemble/iforest.py
@@ -11,7 +11,6 @@
 from scipy.sparse import issparse
 
 import numbers
-from ..externals import six
 from ..tree import ExtraTreeRegressor
 from ..utils import check_random_state, check_array
 from ..utils.fixes import _joblib_parallel_args
@@ -242,7 +241,7 @@ def fit(self, X, y=None, sample_weight=None):
         # ensure that max_sample is in [1, n_samples]:
         n_samples = X.shape[0]
 
-        if isinstance(self.max_samples, six.str):
+        if isinstance(self.max_samples, str):
             if self.max_samples == 'auto':
                 max_samples = min(256, n_samples)
             else:
diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py
index b9ec635278ce4..74a6497e42191 100644
--- a/sklearn/ensemble/partial_dependence.py
+++ b/sklearn/ensemble/partial_dependence.py
@@ -11,8 +11,6 @@
 
 from ..utils.extmath import cartesian
 from ..utils._joblib import Parallel, delayed
-from ..externals import six
-from ..externals.six.moves import map, range, zip
 from ..utils import check_array
 from ..utils.validation import check_is_fitted
 from ..tree._tree import DTYPE
@@ -275,7 +273,7 @@ def plot_partial_dependence(gbrt, X, features, feature_names=None,
         feature_names = feature_names.tolist()
 
     def convert_feature(fx):
-        if isinstance(fx, six.str):
+        if isinstance(fx, str):
             try:
                 fx = feature_names.index(fx)
             except ValueError:
@@ -285,7 +283,7 @@ def convert_feature(fx):
     # convert features into a seq of int tuples
     tmp_features = []
     for fxs in features:
-        if isinstance(fxs, (numbers.Integral,) + six.str):
+        if isinstance(fxs, (numbers.Integral,) + str):
             fxs = (fxs,)
         try:
             fxs = np.array([convert_feature(fx) for fx in fxs], dtype=np.int32)
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index dce50755ed674..ad29041e3d41e 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -738,7 +738,8 @@ def test_oob_multilcass_iris():
 
 def test_verbose_output():
     # Check verbose=1 does not cause error.
-    from sklearn.externals.six.moves import cStringIO as StringIO
+    from io import StringIO
+
     import sys
     old_stdout = sys.stdout
     sys.stdout = StringIO()
@@ -763,7 +764,7 @@ def test_verbose_output():
 
 def test_more_verbose_output():
     # Check verbose=2 does not cause error.
-    from sklearn.externals.six.moves import cStringIO as StringIO
+    from io import StringIO
     import sys
     old_stdout = sys.stdout
     sys.stdout = StringIO()
diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
index 80764fbf11eea..c55e50d5aed30 100644
--- a/sklearn/ensemble/weight_boosting.py
+++ b/sklearn/ensemble/weight_boosting.py
@@ -29,9 +29,7 @@
 
 from .base import BaseEnsemble
 from ..base import ClassifierMixin, RegressorMixin, is_regressor, is_classifier
-from ..externals import six
-from ..externals.six.moves import zip
-from ..externals.six.moves import xrange as range
+
 from .forest import BaseForest
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
 from ..tree.tree import BaseDecisionTree
diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py
index 6393914f76e7a..9636580af18ee 100644
--- a/sklearn/feature_extraction/dict_vectorizer.py
+++ b/sklearn/feature_extraction/dict_vectorizer.py
@@ -9,8 +9,6 @@
 import scipy.sparse as sp
 
 from ..base import BaseEstimator, TransformerMixin
-from ..externals import six
-from ..externals.six.moves import xrange
 from ..utils import check_array, tosequence
 from ..utils.fixes import _Mapping as Mapping
 
@@ -119,7 +117,7 @@ def fit(self, X, y=None):
 
         for x in X:
             for f, v in six.iteritems(x):
-                if isinstance(v, six.str):
+                if isinstance(v, str):
                     f = "%s%s%s" % (f, self.separator, v)
                 if f not in vocab:
                     feature_names.append(f)
@@ -165,7 +163,7 @@ def _transform(self, X, fitting):
         # same time
         for x in X:
             for f, v in six.iteritems(x):
-                if isinstance(v, six.str):
+                if isinstance(v, str):
                     f = "%s%s%s" % (f, self.separator, v)
                     v = 1
                 if f in vocab:
@@ -299,7 +297,7 @@ def transform(self, X):
 
             for i, x in enumerate(X):
                 for f, v in six.iteritems(x):
-                    if isinstance(v, six.str):
+                    if isinstance(v, str):
                         f = "%s%s%s" % (f, self.separator, v)
                         v = 1
                     try:
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index d5a66f6e1462b..32b8a879cb70e 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -25,8 +25,6 @@
 import scipy.sparse as sp
 
 from ..base import BaseEstimator, TransformerMixin
-from ..externals import six
-from ..externals.six.moves import xrange
 from ..preprocessing import normalize
 from .hashing import FeatureHasher
 from .stop_words import ENGLISH_STOP_WORDS
@@ -107,7 +105,7 @@ def strip_tags(s):
 def _check_stop_list(stop):
     if stop == "english":
         return ENGLISH_STOP_WORDS
-    elif isinstance(stop, six.str):
+    elif isinstance(stop, str):
         raise ValueError("not a built-in stop list: %s" % stop)
     elif stop is None:
         return None
@@ -588,7 +586,7 @@ def fit(self, X, y=None):
             Training data.
         """
         # triggers a parameter validation
-        if isinstance(X, six.str):
+        if isinstance(X, str):
             raise ValueError(
                 "Iterable over raw text documents expected, "
                 "string object received.")
@@ -613,7 +611,7 @@ def transform(self, X):
         X : scipy.sparse matrix, shape = (n_samples, self.n_features)
             Document-term matrix.
         """
-        if isinstance(X, six.str):
+        if isinstance(X, str):
             raise ValueError(
                 "Iterable over raw text documents expected, "
                 "string object received.")
@@ -1018,7 +1016,7 @@ def fit_transform(self, raw_documents, y=None):
         # We intentionally don't call the transform method to make
         # fit_transform overridable without unwanted side effects in
         # TfidfVectorizer.
-        if isinstance(raw_documents, six.str):
+        if isinstance(raw_documents, str):
             raise ValueError(
                 "Iterable over raw text documents expected, "
                 "string object received.")
@@ -1073,7 +1071,7 @@ def transform(self, raw_documents):
         X : sparse matrix, [n_samples, n_features]
             Document-term matrix.
         """
-        if isinstance(raw_documents, six.str):
+        if isinstance(raw_documents, str):
             raise ValueError(
                 "Iterable over raw text documents expected, "
                 "string object received.")
diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
index 5bb0b3ea890c3..441b4f45a80fd 100644
--- a/sklearn/feature_selection/base.py
+++ b/sklearn/feature_selection/base.py
@@ -12,7 +12,6 @@
 
 from ..base import TransformerMixin
 from ..utils import check_array, safe_mask
-from ..externals import six
 
 
 class SelectorMixin(six.with_metaclass(ABCMeta, TransformerMixin)):
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index 38455d1f9224c..fb26f9d685688 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -6,7 +6,6 @@
 
 from .base import SelectorMixin
 from ..base import BaseEstimator, clone, MetaEstimatorMixin
-from ..externals import six
 
 from ..exceptions import NotFittedError
 from ..utils.metaestimators import if_delegate_has_method
@@ -48,7 +47,7 @@ def _calculate_threshold(estimator, importances, threshold):
         else:
             threshold = "mean"
 
-    if isinstance(threshold, six.str):
+    if isinstance(threshold, str):
         if "*" in threshold:
             scale, reference = threshold.split("*")
             scale = float(scale.strip())
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
index 30307fa28902b..c7d360f728b1a 100644
--- a/sklearn/feature_selection/tests/test_rfe.py
+++ b/sklearn/feature_selection/tests/test_rfe.py
@@ -214,7 +214,7 @@ def test_rfecv_mockclassifier():
 
 def test_rfecv_verbose_output():
     # Check verbose=1 is producing an output.
-    from sklearn.externals.six.moves import cStringIO as StringIO
+    from io import StringIO
     import sys
     sys.stdout = StringIO()
 
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index b48113c4a75ec..e5a14c5e1db17 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -28,7 +28,6 @@
 from scipy.spatial.distance import pdist, cdist, squareform
 
 from ..metrics.pairwise import pairwise_kernels
-from ..externals import six
 from ..base import clone
 from ..utils.fixes import signature
 
@@ -91,7 +90,7 @@ class Hyperparameter(namedtuple('Hyperparameter',
     __slots__ = ()
 
     def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):
-        if not isinstance(bounds, six.str) or bounds != "fixed":
+        if not isinstance(bounds, str) or bounds != "fixed":
             bounds = np.atleast_2d(bounds)
             if n_elements > 1:  # vector-valued parameter
                 if bounds.shape[0] == 1:
@@ -102,7 +101,7 @@ def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):
                                      % (name, n_elements, bounds.shape[0]))
 
         if fixed is None:
-            fixed = isinstance(bounds, six.str) and bounds == "fixed"
+            fixed = isinstance(bounds, str) and bounds == "fixed"
         return super(Hyperparameter, cls).__new__(
             cls, name, value_type, bounds, n_elements, fixed)
 
diff --git a/sklearn/impute.py b/sklearn/impute.py
index aa5fa90030f17..6088d4d1853eb 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -19,10 +19,6 @@
 from .utils.fixes import _object_dtype_isnan
 from .utils import is_scalar_nan
 
-from .externals import six
-
-zip = six.moves.zip
-map = six.moves.map
 
 __all__ = [
     'MissingIndicator',
@@ -565,7 +561,7 @@ def fit(self, X, y=None):
             raise ValueError("'features' has to be either 'missing-only' or "
                              "'all'. Got {} instead.".format(self.features))
 
-        if not ((isinstance(self.sparse, six.str) and
+        if not ((isinstance(self.sparse, str) and
                 self.sparse == "auto") or isinstance(self.sparse, bool)):
             raise ValueError("'sparse' has to be a boolean or 'auto'. "
                              "Got {!r} instead.".format(self.sparse))
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index 539473b57cf4b..d931c55f7e63a 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -23,7 +23,6 @@
 from scipy import linalg
 from scipy import sparse
 
-from ..externals import six
 from ..utils._joblib import Parallel, delayed
 from ..base import BaseEstimator, ClassifierMixin, RegressorMixin
 from ..utils import check_array, check_X_y
@@ -519,7 +518,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
         Xy = None
 
     # precompute if n_samples > n_features
-    if isinstance(precompute, six.str) and precompute == 'auto':
+    if isinstance(precompute, str) and precompute == 'auto':
         precompute = (n_samples > n_features)
 
     if precompute is True:
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index d70f85a6d3caf..c51d3c577f4e9 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -19,8 +19,6 @@
 from ..utils.validation import check_random_state
 from ..model_selection import check_cv
 from ..utils._joblib import Parallel, delayed, effective_n_jobs
-from ..externals import six
-from ..externals.six.moves import xrange
 from ..utils.extmath import safe_sparse_dot
 from ..utils.fixes import _joblib_parallel_args
 from ..utils.validation import check_is_fitted
@@ -698,7 +696,7 @@ def fit(self, X, y, check_input=True):
                           "well. You are advised to use the LinearRegression "
                           "estimator", stacklevel=2)
 
-        if isinstance(self.precompute, six.str):
+        if isinstance(self.precompute, str):
             raise ValueError('precompute should be one of True, False or'
                              ' array-like. Got %r' % self.precompute)
 
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index 3c5b192fd709c..2079fe3d11379 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -24,8 +24,6 @@
 from ..model_selection import check_cv
 from ..exceptions import ConvergenceWarning
 from ..utils._joblib import Parallel, delayed
-from ..externals.six.moves import xrange
-from ..externals.six import str
 
 solve_triangular_args = {'check_finite': False}
 
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index e1cfbc5cc246f..ec44613670ccd 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -35,7 +35,6 @@
 from ..utils._joblib import Parallel, delayed, effective_n_jobs
 from ..utils.fixes import _joblib_parallel_args
 from ..model_selection import check_cv
-from ..externals import six
 from ..metrics import get_scorer
 
 
@@ -976,7 +975,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
 
     scores = list()
 
-    if isinstance(scoring, six.str):
+    if isinstance(scoring, str):
         scoring = get_scorer(scoring)
     for w in coefs:
         if multi_class == 'ovr':
@@ -1919,7 +1918,7 @@ def score(self, X, y, sample_weight=None):
                           "This warning will disappear in version 0.22.",
                           ChangedBehaviorWarning)
         scoring = self.scoring or 'accuracy'
-        if isinstance(scoring, six.str):
+        if isinstance(scoring, str):
             scoring = get_scorer(scoring)
 
         return scoring(self, X, y, sample_weight=sample_weight)
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 5d53f2f200132..36402d340a30d 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -29,7 +29,6 @@
 from ..utils import column_or_1d
 from ..preprocessing import LabelBinarizer
 from ..model_selection import GridSearchCV
-from ..externals import six
 from ..metrics.scorer import check_scoring
 from ..exceptions import ConvergenceWarning
 
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 5eb4ee2c05d26..1ee974c0389ad 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -20,7 +20,6 @@
 from ..utils.multiclass import _check_partial_fit_first_call
 from ..utils.validation import check_is_fitted
 from ..exceptions import ConvergenceWarning
-from ..externals import six
 from ..model_selection import StratifiedShuffleSplit, ShuffleSplit
 
 from .sgd_fast import plain_sgd, average_sgd
diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py
index f1b3a0c2de298..1397b1c0206eb 100644
--- a/sklearn/linear_model/tests/test_least_angle.py
+++ b/sklearn/linear_model/tests/test_least_angle.py
@@ -31,7 +31,7 @@ def test_simple():
     # Principle of Lars is to keep covariances tied and decreasing
 
     # also test verbose output
-    from sklearn.externals.six.moves import cStringIO as StringIO
+    from io import StringIO
     import sys
     old_stdout = sys.stdout
     try:
diff --git a/sklearn/linear_model/theil_sen.py b/sklearn/linear_model/theil_sen.py
index 297614cd9a7f2..863a5d54672be 100644
--- a/sklearn/linear_model/theil_sen.py
+++ b/sklearn/linear_model/theil_sen.py
@@ -22,7 +22,6 @@
 from ..utils import check_random_state
 from ..utils import check_X_y
 from ..utils._joblib import Parallel, delayed, effective_n_jobs
-from ..externals.six.moves import xrange as range
 from ..exceptions import ConvergenceWarning
 
 _EPSILON = np.finfo(np.double).eps
diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
index 442b79c6a399e..2eb2cac6e80ec 100644
--- a/sklearn/manifold/spectral_embedding_.py
+++ b/sklearn/manifold/spectral_embedding_.py
@@ -16,7 +16,6 @@
 from scipy.sparse.csgraph import laplacian as csgraph_laplacian
 
 from ..base import BaseEstimator
-from ..externals import six
 from ..utils import check_random_state, check_array, check_symmetric
 from ..utils.extmath import _deterministic_vector_sign_flip
 from ..metrics.pairwise import rbf_kernel
@@ -510,7 +509,7 @@ def fit(self, X, y=None):
         X = check_array(X, ensure_min_samples=2, estimator=self)
 
         random_state = check_random_state(self.random_state)
-        if isinstance(self.affinity, six.str):
+        if isinstance(self.affinity, str):
             if self.affinity not in set(("nearest_neighbors", "rbf",
                                          "precomputed")):
                 raise ValueError(("%s is not a valid affinity. Expected "
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index cc692ae0d0cd0..c28481aa14187 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -1,5 +1,5 @@
 import sys
-from sklearn.externals.six.moves import cStringIO as StringIO
+from io import StringIO
 import numpy as np
 import scipy.sparse as sp
 
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index d5dfac2ef65e3..89bf9a9ba8955 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -40,7 +40,6 @@
 
 from ..utils.multiclass import type_of_target
 from ..utils.fixes import _Iterable as Iterable
-from ..externals import six
 from ..base import is_regressor
 
 
@@ -224,7 +223,7 @@ def get_scorer(scoring):
     scorer : callable
         The scorer.
     """
-    if isinstance(scoring, six.str):
+    if isinstance(scoring, str):
         try:
             scorer = SCORERS[scoring]
         except KeyError:
@@ -269,7 +268,7 @@ def check_scoring(estimator, scoring=None, allow_none=False):
     if not hasattr(estimator, 'fit'):
         raise TypeError("estimator should be an estimator implementing "
                         "'fit' method, %r was passed" % estimator)
-    if isinstance(scoring, six.str):
+    if isinstance(scoring, str):
         return get_scorer(scoring)
     elif callable(scoring):
         # Heuristic to ensure user has not passed a metric
@@ -339,7 +338,7 @@ def _check_multimetric_scoring(estimator, scoring=None):
         False if scorer is None/str/callable
     """
     if callable(scoring) or scoring is None or isinstance(scoring,
-                                                          six.str):
+                                                          str):
         scorers = {"score": check_scoring(estimator, scoring=scoring)}
         return scorers, False
     else:
@@ -365,7 +364,7 @@ def _check_multimetric_scoring(estimator, scoring=None):
                 raise ValueError(err_msg + "Duplicate elements were found in"
                                  " the given list. %r" % repr(scoring))
             elif len(keys) > 0:
-                if not all(isinstance(k, six.str) for k in keys):
+                if not all(isinstance(k, str) for k in keys):
                     if any(callable(k) for k in keys):
                         raise ValueError(err_msg +
                                          "One or more of the elements were "
@@ -385,7 +384,7 @@ def _check_multimetric_scoring(estimator, scoring=None):
 
         elif isinstance(scoring, dict):
             keys = set(scoring)
-            if not all(isinstance(k, six.str) for k in keys):
+            if not all(isinstance(k, str) for k in keys):
                 raise ValueError("Non-string types were found in the keys of "
                                  "the given dict. scoring=%r" % repr(scoring))
             if len(keys) == 0:
diff --git a/sklearn/mixture/base.py b/sklearn/mixture/base.py
index 362a0baaa8175..bfdc75a0547e2 100644
--- a/sklearn/mixture/base.py
+++ b/sklearn/mixture/base.py
@@ -15,7 +15,6 @@
 from .. import cluster
 from ..base import BaseEstimator
 from ..base import DensityMixin
-from ..externals import six
 from ..exceptions import ConvergenceWarning
 from ..utils import check_array, check_random_state
 from ..utils.fixes import logsumexp
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 6cc2ffabc0e25..66d34f89d4be7 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -12,7 +12,7 @@
 
 from sklearn.covariance import EmpiricalCovariance
 from sklearn.datasets.samples_generator import make_spd_matrix
-from sklearn.externals.six.moves import cStringIO as StringIO
+from io import StringIO
 from sklearn.metrics.cluster import adjusted_rand_score
 from sklearn.mixture.gaussian_mixture import GaussianMixture
 from sklearn.mixture.gaussian_mixture import (
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 6c54afb7a9ee8..b0ca2eda863e1 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -30,7 +30,6 @@
 from ._validation import _aggregate_score_dicts
 from ..exceptions import NotFittedError
 from ..utils._joblib import Parallel, delayed
-from ..externals import six
 from ..utils import check_random_state
 from ..utils.fixes import sp_version
 from ..utils.fixes import MaskedArray
@@ -371,7 +370,7 @@ def _check_param_grid(param_grid):
             if isinstance(v, np.ndarray) and v.ndim > 1:
                 raise ValueError("Parameter array should be one-dimensional.")
 
-            if (isinstance(v, six.str) or
+            if (isinstance(v, str) or
                     not isinstance(v, (np.ndarray, Sequence))):
                 raise ValueError("Parameter values for parameter ({0}) need "
                                  "to be a sequence(but not a string) or"
@@ -619,7 +618,7 @@ def fit(self, X, y=None, groups=None, **fit_params):
 
         if self.multimetric_:
             if self.refit is not False and (
-                    not isinstance(self.refit, six.str) or
+                    not isinstance(self.refit, str) or
                     # This will work for both dict / list (tuple)
                     self.refit not in scorers):
                 raise ValueError("For multi-metric scoring, the parameter "
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index b0dcc11c9003a..02c7616e5dace 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1,7 +1,6 @@
 """Test the search module"""
 
-from sklearn.externals.six.moves import cStringIO as StringIO
-from sklearn.externals.six.moves import xrange
+from io import StringIO
 from itertools import chain, product
 import pickle
 import sys
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index b7cba19688543..ab05e01f71351 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -56,9 +56,6 @@
 from sklearn.datasets import load_digits
 from sklearn.datasets import make_classification
 
-from sklearn.externals import six
-from sklearn.externals.six.moves import zip
-
 from sklearn.utils.fixes import comb
 
 from sklearn.svm import SVC
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index a7352972173fc..13ee749fd88c0 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -69,7 +69,7 @@
 from sklearn.preprocessing import LabelEncoder
 from sklearn.pipeline import Pipeline
 
-from sklearn.externals.six.moves import cStringIO as StringIO
+from io import StringIO
 from sklearn.base import BaseEstimator
 from sklearn.base import clone
 from sklearn.multiclass import OneVsRestClassifier
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index cddbcbfff2da5..deafc0e7302af 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -26,7 +26,6 @@
 from .utils.validation import check_is_fitted, has_fit_parameter
 from .utils.multiclass import check_classification_targets
 from .utils._joblib import Parallel, delayed
-from .externals import six
 
 __all__ = ["MultiOutputRegressor", "MultiOutputClassifier",
            "ClassifierChain", "RegressorChain"]
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index dced4fbdb3dd2..a0ac6a3105508 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -31,7 +31,6 @@
 from .utils.fixes import logsumexp
 from .utils.multiclass import _check_partial_fit_first_call
 from .utils.validation import check_is_fitted
-from .externals import six
 
 __all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB', 'ComplementNB']
 
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index a426335ae88dd..e0e7af71c79c0 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -24,7 +24,6 @@
 from ..utils import check_X_y, check_array, gen_even_slices
 from ..utils.multiclass import check_classification_targets
 from ..utils.validation import check_is_fitted
-from ..externals import six
 from ..exceptions import DataConversionWarning
 from ..utils._joblib import Parallel, delayed, effective_n_jobs
 from ..utils._joblib import __version__ as joblib_version
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index a8fcc8e11cdb9..e1cb5e9446450 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -17,7 +17,6 @@
 from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS
 from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer
 from ..model_selection import train_test_split
-from ..externals import six
 from ..preprocessing import LabelBinarizer
 from ..utils import gen_batches, check_random_state
 from ..utils import shuffle
diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py
index cbf4906782d7c..5edee53984626 100644
--- a/sklearn/neural_network/rbm.py
+++ b/sklearn/neural_network/rbm.py
@@ -15,7 +15,6 @@
 
 from ..base import BaseEstimator
 from ..base import TransformerMixin
-from ..externals.six.moves import xrange
 from ..utils import check_array
 from ..utils import check_random_state
 from ..utils import gen_even_slices
diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index b0d5ab587a087..9f809823a8c08 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -15,7 +15,7 @@
 from sklearn.datasets import load_digits, load_boston, load_iris
 from sklearn.datasets import make_regression, make_multilabel_classification
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.externals.six.moves import cStringIO as StringIO
+from io import StringIO
 from sklearn.metrics import roc_auc_score
 from sklearn.neural_network import MLPClassifier
 from sklearn.neural_network import MLPRegressor
diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py
index 6298a085786db..e97b0c8f5e7e5 100644
--- a/sklearn/neural_network/tests/test_rbm.py
+++ b/sklearn/neural_network/tests/test_rbm.py
@@ -7,7 +7,7 @@
                                    assert_true)
 
 from sklearn.datasets import load_digits
-from sklearn.externals.six.moves import cStringIO as StringIO
+from io import StringIO
 from sklearn.neural_network import BernoulliRBM
 from sklearn.utils.validation import assert_all_finite
 np.seterr(all='warn')
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index ef4470d91b2ae..c509597941d05 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -17,7 +17,6 @@
 
 from .base import clone, TransformerMixin
 from .utils._joblib import Parallel, delayed
-from .externals import six
 from .utils.metaestimators import if_delegate_has_method
 from .utils import Bunch
 from .utils.validation import check_memory
diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index cffe18e6f17e8..7dfd168c146bb 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -12,7 +12,6 @@
 
 from .. import get_config as _get_config
 from ..base import BaseEstimator, TransformerMixin
-from ..externals import six
 from ..utils import check_array
 from ..utils import deprecated
 from ..utils.fixes import _argmax, _object_dtype_isnan
@@ -371,7 +370,7 @@ def _handle_deprecations(self, X):
 
         # if user specified categorical_features -> always use legacy mode
         if self.categorical_features is not None:
-            if (isinstance(self.categorical_features, six.str)
+            if (isinstance(self.categorical_features, str)
                     and self.categorical_features == 'all'):
                 warnings.warn(
                     "The 'categorical_features' keyword is deprecated in "
@@ -438,7 +437,7 @@ def _legacy_fit_transform(self, X):
                              "be able to use arbitrary integer values as "
                              "category identifiers.")
         n_samples, n_features = X.shape
-        if (isinstance(self._n_values, six.str) and
+        if (isinstance(self._n_values, str) and
                 self._n_values == 'auto'):
             n_values = np.max(X, axis=0) + 1
         elif isinstance(self._n_values, numbers.Integral):
@@ -473,7 +472,7 @@ def _legacy_fit_transform(self, X):
                                 shape=(n_samples, indices[-1]),
                                 dtype=self.dtype).tocsr()
 
-        if (isinstance(self._n_values, six.str) and
+        if (isinstance(self._n_values, str) and
                 self._n_values == 'auto'):
             mask = np.array(out.sum(axis=0)).ravel() != 0
             active_features = np.where(mask)[0]
@@ -553,7 +552,7 @@ def _legacy_transform(self, X):
         out = sparse.coo_matrix((data, (row_indices, column_indices)),
                                 shape=(n_samples, indices[-1]),
                                 dtype=self.dtype).tocsr()
-        if (isinstance(self._n_values, six.str) and
+        if (isinstance(self._n_values, str) and
                 self._n_values == 'auto'):
             out = out[:, self._active_features_]
 
diff --git a/sklearn/preprocessing/base.py b/sklearn/preprocessing/base.py
index 45e39a2bfcbc5..29a1bd87dc8ee 100644
--- a/sklearn/preprocessing/base.py
+++ b/sklearn/preprocessing/base.py
@@ -5,7 +5,6 @@
 
 from ..utils import check_array
 from ..utils.validation import FLOAT_DTYPES
-from ..externals import six
 
 
 def _transform_selected(X, transform, dtype, selected="all", copy=True,
@@ -48,7 +47,7 @@ def _transform_selected(X, transform, dtype, selected="all", copy=True,
         raise ValueError("The retain_order option can only be set to True "
                          "for dense matrices.")
 
-    if isinstance(selected, six.str) and selected == "all":
+    if isinstance(selected, str) and selected == "all":
         return transform(X)
 
     if len(selected) == 0:
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 77c2d2cc970fc..de2396dda5312 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -20,7 +20,6 @@
 from scipy import optimize
 
 from ..base import BaseEstimator, TransformerMixin
-from ..externals import six
 from ..utils import check_array
 from ..utils.extmath import row_norms
 from ..utils.extmath import _incremental_mean_and_var
diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py
index 4318122d4be6c..380af58cc1d40 100644
--- a/sklearn/preprocessing/imputation.py
+++ b/sklearn/preprocessing/imputation.py
@@ -15,10 +15,6 @@
 from ..utils.validation import check_is_fitted
 from ..utils.validation import FLOAT_DTYPES
 
-from ..externals import six
-
-zip = six.moves.zip
-map = six.moves.map
 
 __all__ = [
     'Imputer',
diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py
index 12f95b5e2cb4f..1da320efc2762 100644
--- a/sklearn/preprocessing/label.py
+++ b/sklearn/preprocessing/label.py
@@ -24,10 +24,6 @@
 from ..utils.multiclass import unique_labels
 from ..utils.multiclass import type_of_target
 
-from ..externals import six
-
-zip = six.moves.zip
-map = six.moves.map
 
 __all__ = [
     'label_binarize',
diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index 09bb25ac49c3e..953243ea42cf2 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -5,7 +5,6 @@
 import scipy.sparse as sp
 import warnings
 
-from sklearn.externals.six.moves import xrange as range
 from sklearn.preprocessing import KBinsDiscretizer
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.utils.testing import (
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index 6f6a8b2ba8341..7581847d00c58 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -36,8 +36,7 @@
 import scipy.sparse as sp
 
 from .base import BaseEstimator, TransformerMixin
-from .externals import six
-from .externals.six.moves import xrange
+
 from .utils import check_random_state
 from .utils.extmath import safe_sparse_dot
 from .utils.random import sample_without_replacement
diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py
index 081e54fbb0dfb..04aa6714e0711 100644
--- a/sklearn/semi_supervised/label_propagation.py
+++ b/sklearn/semi_supervised/label_propagation.py
@@ -64,7 +64,6 @@
 from scipy.sparse import csgraph
 
 from ..base import BaseEstimator, ClassifierMixin
-from ..externals import six
 from ..metrics.pairwise import rbf_kernel
 from ..neighbors.unsupervised import NearestNeighbors
 from ..utils.extmath import safe_sparse_dot
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index 70a6cd58a671a..2105a6130e7f0 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -16,7 +16,6 @@
 from ..utils.extmath import safe_sparse_dot
 from ..utils.validation import check_is_fitted, _check_large_sparse
 from ..utils.multiclass import check_classification_targets
-from ..externals import six
 from ..exceptions import ConvergenceWarning
 from ..exceptions import NotFittedError
 
diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index 81900da12e1a3..017181f7cba38 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -16,7 +16,6 @@
 
 import numpy as np
 
-from ..externals import six
 from ..utils.validation import check_is_fitted
 
 from . import _criterion
@@ -281,7 +280,7 @@ def node_to_str(self, tree, node_id, criterion):
         if self.impurity:
             if isinstance(criterion, _criterion.FriedmanMSE):
                 criterion = "friedman_mse"
-            elif not isinstance(criterion, six.str):
+            elif not isinstance(criterion, str):
                 criterion = "impurity"
             if labels:
                 node_string += '%s = ' % criterion
@@ -755,7 +754,7 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None,
     own_file = False
     return_string = False
     try:
-        if isinstance(out_file, six.str):
+        if isinstance(out_file, str):
             if six.PY3:
                 out_file = open(out_file, "w", encoding="utf-8")
             else:
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index f31858979cf4f..5cf30b08a103a 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -30,7 +30,6 @@
 from ..base import ClassifierMixin
 from ..base import RegressorMixin
 from ..base import is_classifier
-from ..externals import six
 from ..utils import check_array
 from ..utils import check_random_state
 from ..utils import compute_sample_weight
@@ -219,7 +218,7 @@ def fit(self, X, y, sample_weight=None, check_input=True,
 
         min_samples_split = max(min_samples_split, 2 * min_samples_leaf)
 
-        if isinstance(self.max_features, six.str):
+        if isinstance(self.max_features, str):
             if self.max_features == "auto":
                 if is_classification:
                     max_features = max(1, int(np.sqrt(self.n_features_)))
diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py
index 88afa758f69c9..efca1fef0d12d 100644
--- a/sklearn/utils/class_weight.py
+++ b/sklearn/utils/class_weight.py
@@ -3,7 +3,6 @@
 # License: BSD 3 clause
 
 import numpy as np
-from ..externals import six
 
 
 def compute_class_weight(class_weight, classes, y):
@@ -114,12 +113,12 @@ def compute_sample_weight(class_weight, y, indices=None):
         y = np.reshape(y, (-1, 1))
     n_outputs = y.shape[1]
 
-    if isinstance(class_weight, six.str):
+    if isinstance(class_weight, str):
         if class_weight not in ['balanced']:
             raise ValueError('The only valid preset for class_weight is '
                              '"balanced". Given "%s".' % class_weight)
     elif (indices is not None and
-          not isinstance(class_weight, six.str)):
+          not isinstance(class_weight, str)):
         raise ValueError('The only valid class_weight for subsampling is '
                          '"balanced". Given "%s".' % class_weight)
     elif n_outputs > 1:
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 73a719da56551..8ccbe31e1989d 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -20,7 +20,6 @@
 from . import check_random_state
 from .fixes import np_version
 from ._logistic_sigmoid import _log_logistic_sigmoid
-from ..externals.six.moves import xrange
 from .sparsefuncs_fast import csr_row_norms
 from .validation import check_array
 
diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py
index 49b059b324595..606173560dd75 100644
--- a/sklearn/utils/metaestimators.py
+++ b/sklearn/utils/metaestimators.py
@@ -9,7 +9,6 @@
 import numpy as np
 
 from ..utils import safe_indexing
-from ..externals import six
 from ..base import BaseEstimator
 
 __all__ = ['if_delegate_has_method']
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 49c9c4b1604fc..a1e9de98e7c71 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -4,7 +4,7 @@
 import numpy as np
 import scipy.sparse as sp
 
-from sklearn.externals.six.moves import cStringIO as StringIO
+from io import StringIO
 
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.utils import deprecated
diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py
index 77c1259aa8bf4..1131257330dcf 100644
--- a/sklearn/utils/tests/test_fast_dict.py
+++ b/sklearn/utils/tests/test_fast_dict.py
@@ -4,7 +4,6 @@
 
 from sklearn.utils.fast_dict import IntFloatDict, argmin
 from sklearn.utils.testing import assert_equal
-from sklearn.externals.six.moves import xrange
 
 
 def test_int_float_dict():
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index d33778c848c2a..6a224e7761a35 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -4,7 +4,6 @@
 import scipy.sparse as sp
 from itertools import product
 
-from sklearn.externals.six.moves import xrange
 from sklearn.externals.six import iteritems
 
 from scipy.sparse import issparse
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index cf5588a5506d5..7e64d21e8f613 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -18,7 +18,6 @@
 
 from numpy.core.numeric import ComplexWarning
 
-from ..externals import six
 from .fixes import signature
 from .. import get_config as _get_config
 from ..exceptions import NonBLASDotWarning
@@ -205,7 +204,7 @@ def check_memory(memory):
         If ``memory`` is not joblib.Memory-like.
     """
 
-    if memory is None or isinstance(memory, six.str):
+    if memory is None or isinstance(memory, str):
         if LooseVersion(joblib_version) < '0.12':
             memory = Memory(cachedir=memory, verbose=0)
         else:
@@ -308,7 +307,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy,
 
     changed_format = False
 
-    if isinstance(accept_sparse, six.str):
+    if isinstance(accept_sparse, str):
         accept_sparse = [accept_sparse]
 
     # Indices dtype validation
@@ -467,7 +466,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
     array_orig = array
 
     # store whether originally we wanted numeric dtype
-    dtype_numeric = isinstance(dtype, six.str) and dtype == "numeric"
+    dtype_numeric = isinstance(dtype, str) and dtype == "numeric"
 
     dtype_orig = getattr(array, "dtype", None)
     if not hasattr(dtype_orig, 'kind'):
@@ -501,7 +500,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
                          '. Got {!r} instead'.format(force_all_finite))
 
     if estimator is not None:
-        if isinstance(estimator, six.str):
+        if isinstance(estimator, str):
             estimator_name = estimator
         else:
             estimator_name = estimator.__class__.__name__

From 4d1e71ef870ba011d8da80ca22a2a4db68ece0f5 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 20 Nov 2018 18:21:14 -0500
Subject: [PATCH 05/29] six no more

---
 sklearn/compose/tests/test_column_transformer.py  | 1 -
 sklearn/covariance/shrunk_covariance_.py          | 1 -
 sklearn/covariance/tests/test_graph_lasso.py      | 2 +-
 sklearn/covariance/tests/test_graphical_lasso.py  | 2 +-
 sklearn/datasets/base.py                          | 2 +-
 sklearn/datasets/kddcup99.py                      | 3 +--
 sklearn/datasets/lfw.py                           | 3 +--
 sklearn/datasets/openml.py                        | 2 +-
 sklearn/datasets/samples_generator.py             | 2 --
 sklearn/datasets/tests/test_openml.py             | 2 +-
 sklearn/datasets/tests/test_samples_generator.py  | 1 -
 sklearn/decomposition/dict_learning.py            | 1 -
 sklearn/decomposition/online_lda.py               | 1 -
 sklearn/decomposition/tests/test_dict_learning.py | 2 +-
 sklearn/decomposition/tests/test_fastica.py       | 1 -
 sklearn/decomposition/tests/test_online_lda.py    | 2 +-
 sklearn/discriminant_analysis.py                  | 1 -
 sklearn/ensemble/bagging.py                       | 1 -
 sklearn/feature_selection/mutual_info_.py         | 1 -
 sklearn/manifold/t_sne.py                         | 1 -
 sklearn/metrics/regression.py                     | 1 -
 sklearn/mixture/gaussian_mixture.py               | 1 -
 sklearn/model_selection/_search.py                | 2 +-
 sklearn/model_selection/_split.py                 | 1 -
 sklearn/model_selection/_validation.py            | 1 -
 sklearn/multiclass.py                             | 5 ++---
 sklearn/preprocessing/_encoders.py                | 3 +--
 sklearn/preprocessing/data.py                     | 5 -----
 sklearn/tests/test_metaestimators.py              | 1 -
 sklearn/tests/test_multiclass.py                  | 1 -
 sklearn/tests/test_naive_bayes.py                 | 1 -
 sklearn/tests/test_pipeline.py                    | 1 -
 sklearn/tree/export.py                            | 8 +++-----
 sklearn/tree/tests/test_export.py                 | 2 +-
 sklearn/utils/estimator_checks.py                 | 1 -
 sklearn/utils/multiclass.py                       | 1 -
 sklearn/utils/testing.py                          | 2 --
 sklearn/utils/tests/test_multiclass.py            | 1 -
 sklearn/utils/tests/test_murmurhash.py            | 1 -
 39 files changed, 17 insertions(+), 54 deletions(-)

diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index e39b6de4d0859..f15188e8754fb 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -15,7 +15,6 @@
 from sklearn.utils.testing import assert_allclose_dense_sparse
 
 from sklearn.base import BaseEstimator
-from sklearn.externals import six
 from sklearn.compose import ColumnTransformer, make_column_transformer
 from sklearn.exceptions import NotFittedError, DataConversionWarning
 from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder
diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
index eed39a45bddc4..94804ccac60d6 100644
--- a/sklearn/covariance/shrunk_covariance_.py
+++ b/sklearn/covariance/shrunk_covariance_.py
@@ -18,7 +18,6 @@
 import numpy as np
 
 from .empirical_covariance_ import empirical_covariance, EmpiricalCovariance
-from ..externals.six.moves import xrange
 from ..utils import check_array
 
 
diff --git a/sklearn/covariance/tests/test_graph_lasso.py b/sklearn/covariance/tests/test_graph_lasso.py
index d368356100a4f..8e7b399cd8144 100644
--- a/sklearn/covariance/tests/test_graph_lasso.py
+++ b/sklearn/covariance/tests/test_graph_lasso.py
@@ -14,7 +14,7 @@
 from sklearn.covariance import (graph_lasso, GraphLasso, GraphLassoCV,
                                 empirical_covariance)
 from sklearn.datasets.samples_generator import make_sparse_spd_matrix
-from sklearn.externals.six.moves import StringIO
+from io import StringIO
 from sklearn.utils import check_random_state
 from sklearn import datasets
 
diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py
index 47f15f4a762ac..239863925f921 100644
--- a/sklearn/covariance/tests/test_graphical_lasso.py
+++ b/sklearn/covariance/tests/test_graphical_lasso.py
@@ -12,7 +12,7 @@
 from sklearn.covariance import (graphical_lasso, GraphicalLasso,
                                 GraphicalLassoCV, empirical_covariance)
 from sklearn.datasets.samples_generator import make_sparse_spd_matrix
-from sklearn.externals.six.moves import StringIO
+from io import StringIO
 from sklearn.utils import check_random_state
 from sklearn import datasets
 
diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py
index eb06f133ec488..e4580b56dc181 100644
--- a/sklearn/datasets/base.py
+++ b/sklearn/datasets/base.py
@@ -22,7 +22,7 @@
 
 import numpy as np
 
-from sklearn.externals.six.moves.urllib.request import urlretrieve
+from moves.urllib.request import urlretrieve
 
 RemoteFileMetadata = namedtuple('RemoteFileMetadata',
                                 ['filename', 'url', 'checksum'])
diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index e460503474a9f..397640117c3d7 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -276,8 +276,7 @@ def _fetch_brute_kddcup99(data_home=None,
         file_ = GzipFile(filename=archive_path, mode='r')
         Xy = []
         for line in file_.readlines():
-            if six.PY3:
-                line = line.decode()
+            line = line.decode()
             Xy.append(line.replace('\n', '').split(','))
         file_.close()
         logger.debug('extraction done')
diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py
index 13c3725a506c4..756a8045773bb 100644
--- a/sklearn/datasets/lfw.py
+++ b/sklearn/datasets/lfw.py
@@ -20,7 +20,6 @@
 from ..utils import deprecated
 from ..utils import Bunch
 from ..utils._joblib import Memory
-from ..externals.six import b
 from ..utils import _joblib
 
 logger = logging.getLogger(__name__)
@@ -369,7 +368,7 @@ def _fetch_lfw_pairs(index_file_path, data_folder_path, slice_=None,
     # parse the index file to find the number of pairs to be able to allocate
     # the right amount of memory before starting to decode the jpeg files
     with open(index_file_path, 'rb') as index_file:
-        split_lines = [ln.strip().split(b('\t')) for ln in index_file]
+        split_lines = [ln.strip().split('\t') for ln in index_file]
     pair_specs = [sl for sl in split_lines if len(sl) > 2]
     n_pairs = len(pair_specs)
 
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 84385a6bb2700..1f1fc158553fd 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -22,7 +22,7 @@
 from sklearn.externals import _arff
 from .base import get_data_home
 from ..externals.six import str, PY2, BytesIO
-from ..externals.six.moves.urllib.error import HTTPError
+from urllib.error import HTTPError
 from ..utils import Bunch
 
 __all__ = ['fetch_openml']
diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
index b8ad97628cbe7..35d8ea6a05589 100644
--- a/sklearn/datasets/samples_generator.py
+++ b/sklearn/datasets/samples_generator.py
@@ -17,8 +17,6 @@
 from ..utils import shuffle as util_shuffle
 from ..utils.fixes import _Iterable as Iterable
 from ..utils.random import sample_without_replacement
-map = six.moves.map
-zip = six.moves.zip
 
 
 def _generate_hypercube(samples, dimensions, rng):
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index c192d50d9caf5..4a858899e2c31 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -18,7 +18,7 @@
 from sklearn.utils.testing import (assert_warns_message,
                                    assert_raise_message)
 from sklearn.externals.six import str
-from sklearn.externals.six.moves.urllib.error import HTTPError
+from urllib.error import HTTPError
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
 
diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py
index 2cf6900442feb..c8b0fbd571145 100644
--- a/sklearn/datasets/tests/test_samples_generator.py
+++ b/sklearn/datasets/tests/test_samples_generator.py
@@ -6,7 +6,6 @@
 import numpy as np
 import pytest
 import scipy.sparse as sp
-from sklearn.externals.six.moves import zip
 
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 65ae605b6c19b..eb5e4f2588612 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -15,7 +15,6 @@
 
 from ..base import BaseEstimator, TransformerMixin
 from ..utils._joblib import Parallel, delayed, effective_n_jobs
-from ..externals.six.moves import zip
 from ..utils import (check_array, check_random_state, gen_even_slices,
                      gen_batches)
 from ..utils.extmath import randomized_svd, row_norms
diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
index b9bf1025e315e..1c8933d2b719d 100644
--- a/sklearn/decomposition/online_lda.py
+++ b/sklearn/decomposition/online_lda.py
@@ -21,7 +21,6 @@
 from ..utils.fixes import logsumexp
 from ..utils.validation import check_non_negative
 from ..utils._joblib import Parallel, delayed, effective_n_jobs
-from ..externals.six.moves import xrange
 from ..exceptions import NotFittedError
 
 from ._online_lda import (mean_change, _dirichlet_expectation_1d,
diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index fd2937ed8f25d..042af84eaef03 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -223,7 +223,7 @@ def test_dict_learning_online_positivity(transform_algorithm,
 def test_dict_learning_online_verbosity():
     n_components = 5
     # test verbosity
-    from sklearn.externals.six.moves import cStringIO as StringIO
+    from io import StringIO
     import sys
 
     old_stdout = sys.stdout
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index d5a329c7340c8..313a13ad8333b 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -17,7 +17,6 @@
 
 from sklearn.decomposition import FastICA, fastica, PCA
 from sklearn.decomposition.fastica_ import _gs_decorrelation
-from sklearn.externals.six import moves
 from sklearn.exceptions import ConvergenceWarning
 
 
diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py
index b7d95eeb6d899..76708f30a3dcd 100644
--- a/sklearn/decomposition/tests/test_online_lda.py
+++ b/sklearn/decomposition/tests/test_online_lda.py
@@ -20,7 +20,7 @@
 from sklearn.utils.testing import if_safe_multiprocessing_with_blas
 
 from sklearn.exceptions import NotFittedError
-from sklearn.externals.six import StringIO
+from io import StringIO
 
 
 def _build_sparse_mtx():
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index 184f0b9b530a1..bd3202d409c86 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -13,7 +13,6 @@
 import warnings
 import numpy as np
 from scipy import linalg
-from .externals.six import str
 
 from .base import BaseEstimator, TransformerMixin, ClassifierMixin
 from .linear_model.base import LinearClassifierMixin
diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index 8be282580b254..719f198a958c8 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -15,7 +15,6 @@
 from ..base import ClassifierMixin, RegressorMixin
 from ..utils._joblib import Parallel, delayed
 from ..externals.six import with_metaclass
-from ..externals.six.moves import zip
 from ..metrics import r2_score, accuracy_score
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
 from ..utils import check_random_state, check_X_y, check_array, column_or_1d
diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py
index 0637f784c5f95..7c5c247eb36ef 100644
--- a/sklearn/feature_selection/mutual_info_.py
+++ b/sklearn/feature_selection/mutual_info_.py
@@ -6,7 +6,6 @@
 from scipy.sparse import issparse
 from scipy.special import digamma
 
-from ..externals.six import moves
 from ..metrics.cluster.supervised import mutual_info_score
 from ..neighbors import NearestNeighbors
 from ..preprocessing import scale
diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
index 649b44f0916b7..9e13920d49e00 100644
--- a/sklearn/manifold/t_sne.py
+++ b/sklearn/manifold/t_sne.py
@@ -25,7 +25,6 @@
 from ..metrics.pairwise import pairwise_distances
 from . import _utils
 from . import _barnes_hut_tsne
-from ..externals.six import str
 
 
 MACHINE_EPSILON = np.finfo(np.double).eps
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index 22e97d399fd96..485c0ad9f9172 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -27,7 +27,6 @@
 
 from ..utils.validation import check_array, check_consistent_length
 from ..utils.validation import column_or_1d
-from ..externals.six import str
 
 
 __ALL__ = [
diff --git a/sklearn/mixture/gaussian_mixture.py b/sklearn/mixture/gaussian_mixture.py
index 2c5f9b6cf151a..4e9b5d5dc904c 100644
--- a/sklearn/mixture/gaussian_mixture.py
+++ b/sklearn/mixture/gaussian_mixture.py
@@ -9,7 +9,6 @@
 from scipy import linalg
 
 from .base import BaseMixture, _check_shape
-from ..externals.six.moves import zip
 from ..utils import check_array
 from ..utils.validation import check_is_fitted
 from ..utils.extmath import row_norms
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index b0ca2eda863e1..b6a90f8b54284 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -275,7 +275,7 @@ def __iter__(self):
         else:
             # Always sort the keys of a dictionary, for reproducibility
             items = sorted(self.param_distributions.items())
-            for _ in six.moves.range(self.n_iter):
+            for _ in range(self.n_iter):
                 params = dict()
                 for k, v in items:
                     if hasattr(v, "rvs"):
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 356c018c58c7d..d3f5ab0f5ba72 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -26,7 +26,6 @@
 from ..utils.validation import check_array
 from ..utils.multiclass import type_of_target
 from ..externals.six import with_metaclass
-from ..externals.six.moves import zip
 from ..utils.fixes import signature, comb
 from ..utils.fixes import _Iterable as Iterable
 from ..base import _pprint
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 125b610098404..f2f3f91a4a26e 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -26,7 +26,6 @@
 from ..utils.metaestimators import _safe_split
 from ..utils._joblib import Parallel, delayed
 from ..utils._joblib import logger
-from ..externals.six.moves import zip
 from ..metrics.scorer import check_scoring, _check_multimetric_scoring
 from ..exceptions import FitFailedWarning
 from ._split import check_cv
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index fa7e87da6c255..fdfe1bed0ca9f 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -54,7 +54,6 @@
 
 from .utils._joblib import Parallel
 from .utils._joblib import delayed
-from .externals.six.moves import zip as izip
 
 __all__ = [
     "OneVsRestClassifier",
@@ -268,7 +267,7 @@ def partial_fit(self, X, y, classes=None):
 
         self.estimators_ = Parallel(n_jobs=self.n_jobs)(
             delayed(_partial_fit_binary)(estimator, X, column)
-            for estimator, column in izip(self.estimators_, columns))
+            for estimator, column in zip(self.estimators_, columns))
 
         return self
 
@@ -557,7 +556,7 @@ def partial_fit(self, X, y, classes=None):
             n_jobs=self.n_jobs)(
                 delayed(_partial_fit_ovo_binary)(
                     estimator, X, y, self.classes_[i], self.classes_[j])
-                for estimator, (i, j) in izip(self.estimators_,
+                for estimator, (i, j) in zip(self.estimators_,
                                               (combinations)))
 
         self.pairwise_indices_ = None
diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index 7dfd168c146bb..430c14067f5f8 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -20,7 +20,6 @@
 from .base import _transform_selected
 from .label import _encode, _encode_check_unknown
 
-range = six.moves.range
 
 __all__ = [
     'OneHotEncoder',
@@ -703,7 +702,7 @@ def get_feature_names(self, input_features=None):
         feature_names = []
         for i in range(len(cats)):
             names = [
-                input_features[i] + '_' + six.text_type(t) for t in cats[i]]
+                input_features[i] + '_' + str(t) for t in cats[i]]
             feature_names.extend(names)
 
         return np.array(feature_names, dtype=object)
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index de2396dda5312..ae04e48070a36 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -38,11 +38,6 @@
 
 BOUNDS_THRESHOLD = 1e-7
 
-
-zip = six.moves.zip
-map = six.moves.map
-range = six.moves.range
-
 __all__ = [
     'Binarizer',
     'KernelCenterer',
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index e1cbe09e43a94..f09017c2426f6 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -5,7 +5,6 @@
 import numpy as np
 
 from sklearn.base import BaseEstimator
-from sklearn.externals.six import iterkeys
 from sklearn.datasets import make_classification
 
 from sklearn.utils.testing import assert_true, assert_false, assert_raises
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 7b46fa0bf14f5..99ec745d90902 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -34,7 +34,6 @@
 from sklearn.pipeline import Pipeline
 from sklearn import svm
 from sklearn import datasets
-from sklearn.externals.six.moves import zip
 
 iris = datasets.load_iris()
 rng = np.random.RandomState(0)
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 9533cff66662d..06e72f3f3eff1 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -11,7 +11,6 @@
 from sklearn.model_selection import train_test_split
 from sklearn.model_selection import cross_val_score
 
-from sklearn.externals.six.moves import zip
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 17793e35998a4..ecccb3a50da5f 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -10,7 +10,6 @@
 import numpy as np
 from scipy import sparse
 
-from sklearn.externals.six.moves import zip
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_raise_message
diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index 017181f7cba38..18052d6233d09 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -11,6 +11,7 @@
 #          Li Li <aiki.nogard@gmail.com>
 # License: BSD 3 clause
 import warnings
+from IO import StringIO
 
 from numbers import Integral
 
@@ -755,15 +756,12 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None,
     return_string = False
     try:
         if isinstance(out_file, str):
-            if six.PY3:
-                out_file = open(out_file, "w", encoding="utf-8")
-            else:
-                out_file = open(out_file, "wb")
+            out_file = open(out_file, "w", encoding="utf-8")
             own_file = True
 
         if out_file is None:
             return_string = True
-            out_file = six.StringIO()
+            out_file = StringIO()
 
         exporter = _DOTTreeExporter(
             out_file=out_file, max_depth=max_depth,
diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index 2471914fa44ce..6c765675faf76 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -11,7 +11,7 @@
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.tree import export_graphviz, plot_tree
-from sklearn.externals.six import StringIO
+from io import StringIO
 from sklearn.utils.testing import (assert_in, assert_equal, assert_raises,
                                    assert_less_equal, assert_raises_regex,
                                    assert_raise_message)
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index cc17cc3c2300b..ecf2a58edeb16 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -12,7 +12,6 @@
 from scipy import sparse
 from scipy.stats import rankdata
 
-from sklearn.externals.six.moves import zip
 from sklearn.utils import IS_PYPY, _IS_32BIT
 from sklearn.utils import _joblib
 from sklearn.utils._joblib import Memory
diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 3c25de040e2a2..a61b3e8263e79 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -16,7 +16,6 @@
 
 import numpy as np
 
-from ..externals.six import str
 from ..utils.fixes import _Sequence as Sequence
 from .validation import check_array
 
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index db43cce6fbaf9..e12394d5982f0 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -802,8 +802,6 @@ def clean_warning_registry():
     """
     reg = "__warningregistry__"
     for mod_name, mod in list(sys.modules.items()):
-        if 'six.moves' in mod_name:
-            continue
         if hasattr(mod, reg):
             getattr(mod, reg).clear()
 
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 6a224e7761a35..3d4622795e95c 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -4,7 +4,6 @@
 import scipy.sparse as sp
 from itertools import product
 
-from sklearn.externals.six import iteritems
 
 from scipy.sparse import issparse
 from scipy.sparse import csc_matrix
diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py
index d59ec6cecad77..cb7899af88ab3 100644
--- a/sklearn/utils/tests/test_murmurhash.py
+++ b/sklearn/utils/tests/test_murmurhash.py
@@ -3,7 +3,6 @@
 # License: BSD 3 clause
 
 import numpy as np
-from sklearn.externals.six import b, u
 from sklearn.utils.murmurhash import murmurhash3_32
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal

From cb844cdae64da38d0c0e60344d98169497a1870a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 12:20:32 -0500
Subject: [PATCH 06/29] six iteritems

---
 benchmarks/bench_plot_fastkmeans.py           | 8 ++++----
 benchmarks/bench_plot_omp_lars.py             | 2 +-
 benchmarks/bench_plot_svd.py                  | 2 +-
 sklearn/datasets/tests/test_lfw.py            | 2 +-
 sklearn/feature_extraction/dict_vectorizer.py | 8 ++++----
 sklearn/feature_extraction/text.py            | 4 ++--
 sklearn/gaussian_process/kernels.py           | 2 +-
 sklearn/pipeline.py                           | 4 ++--
 8 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/benchmarks/bench_plot_fastkmeans.py b/benchmarks/bench_plot_fastkmeans.py
index a7f9a017ad09f..d40d211dd1846 100644
--- a/benchmarks/bench_plot_fastkmeans.py
+++ b/benchmarks/bench_plot_fastkmeans.py
@@ -104,15 +104,15 @@ def compute_bench_2(chunks):
     results = compute_bench(samples_range, features_range)
     results_2 = compute_bench_2(chunks)
 
-    max_time = max([max(i) for i in [t for (label, t) in six.iteritems(results)
+    max_time = max([max(i) for i in [t for (label, t) in results.items()
                                      if "speed" in label]])
     max_inertia = max([max(i) for i in [
-        t for (label, t) in six.iteritems(results)
+        t for (label, t) in results.items()
         if "speed" not in label]])
 
     fig = plt.figure('scikit-learn K-Means benchmark results')
     for c, (label, timings) in zip('brcy',
-                                   sorted(six.iteritems(results))):
+                                   sorted(results.items())):
         if 'speed' in label:
             ax = fig.add_subplot(2, 2, 1, projection='3d')
             ax.set_zlim3d(0.0, max_time * 1.1)
@@ -129,7 +129,7 @@ def compute_bench_2(chunks):
 
     i = 0
     for c, (label, timings) in zip('br',
-                                   sorted(six.iteritems(results_2))):
+                                   sorted(results_2.items())):
         i += 1
         ax = fig.add_subplot(2, 2, i + 2)
         y = np.asarray(timings)
diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py
index a9b2c97aa6a78..a9cc87e9d22f8 100644
--- a/benchmarks/bench_plot_omp_lars.py
+++ b/benchmarks/bench_plot_omp_lars.py
@@ -109,7 +109,7 @@ def compute_bench(samples_range, features_range):
 
     import matplotlib.pyplot as plt
     fig = plt.figure('scikit-learn OMP vs. LARS benchmark results')
-    for i, (label, timings) in enumerate(sorted(six.iteritems(results))):
+    for i, (label, timings) in enumerate(sorted(results.items())):
         ax = fig.add_subplot(1, 2, i+1)
         vmax = max(1 - timings.min(), -1 + timings.max())
         plt.matshow(timings, fignum=False, vmin=1 - vmax, vmax=1 + vmax)
diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py
index 4901ae13f1243..7f96696a33c51 100644
--- a/benchmarks/bench_plot_svd.py
+++ b/benchmarks/bench_plot_svd.py
@@ -66,7 +66,7 @@ def compute_bench(samples_range, features_range, n_iter=3, rank=50):
     label = 'scikit-learn singular value decomposition benchmark results'
     fig = plt.figure(label)
     ax = fig.gca(projection='3d')
-    for c, (label, timings) in zip('rbg', sorted(six.iteritems(results))):
+    for c, (label, timings) in zip('rbg', sorted(results.items())):
         X, Y = np.meshgrid(samples_range, features_range)
         Z = np.asarray(timings).reshape(samples_range.shape[0],
                                         features_range.shape[0])
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 422969881fe86..75aecdfb999f1 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -75,7 +75,7 @@ def setup_module():
     # generate some pairing metadata files using the same format as LFW
     with open(os.path.join(LFW_HOME, 'pairsDevTrain.txt'), 'wb') as f:
         f.write(six.b("10\n"))
-        more_than_two = [name for name, count in six.iteritems(counts)
+        more_than_two = [name for name, count in counts.items()
                          if count >= 2]
         for i in range(5):
             name = random_state.choice(more_than_two)
diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py
index 9636580af18ee..2c7587dd3eeeb 100644
--- a/sklearn/feature_extraction/dict_vectorizer.py
+++ b/sklearn/feature_extraction/dict_vectorizer.py
@@ -116,7 +116,7 @@ def fit(self, X, y=None):
         vocab = {}
 
         for x in X:
-            for f, v in six.iteritems(x):
+            for f, v in x.items():
                 if isinstance(v, str):
                     f = "%s%s%s" % (f, self.separator, v)
                 if f not in vocab:
@@ -162,7 +162,7 @@ def _transform(self, X, fitting):
         # collect all the possible feature names and build sparse matrix at
         # same time
         for x in X:
-            for f, v in six.iteritems(x):
+            for f, v in x.items():
                 if isinstance(v, str):
                     f = "%s%s%s" % (f, self.separator, v)
                     v = 1
@@ -296,7 +296,7 @@ def transform(self, X):
             Xa = np.zeros((len(X), len(vocab)), dtype=dtype)
 
             for i, x in enumerate(X):
-                for f, v in six.iteritems(x):
+                for f, v in x.items():
                     if isinstance(v, str):
                         f = "%s%s%s" % (f, self.separator, v)
                         v = 1
@@ -357,7 +357,7 @@ def restrict(self, support, indices=False):
             new_vocab[names[i]] = len(new_vocab)
 
         self.vocabulary_ = new_vocab
-        self.feature_names_ = [f for f, i in sorted(six.iteritems(new_vocab),
+        self.feature_names_ = [f for f, i in sorted(new_vocab.items(),
                                                     key=itemgetter(1))]
 
         return self
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 32b8a879cb70e..fbbfaed5ef9db 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -870,7 +870,7 @@ def _sort_features(self, X, vocabulary):
 
         Returns a reordered matrix and modifies the vocabulary in place
         """
-        sorted_features = sorted(six.iteritems(vocabulary))
+        sorted_features = sorted(vocabulary.items())
         map_index = np.empty(len(sorted_features), dtype=np.int32)
         for new_val, (term, old_val) in enumerate(sorted_features):
             vocabulary[term] = new_val
@@ -908,7 +908,7 @@ def _limit_features(self, X, vocabulary, high=None, low=None,
 
         new_indices = np.cumsum(mask) - 1  # maps old indices to new
         removed_terms = set()
-        for term, old_index in list(six.iteritems(vocabulary)):
+        for term, old_index in list(vocabulary.items()):
             if mask[old_index]:
                 vocabulary[term] = new_indices[old_index]
             else:
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index e5a14c5e1db17..0df0d1197dde9 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -175,7 +175,7 @@ def set_params(self, **params):
             # Simple optimisation to gain speed (inspect is slow)
             return self
         valid_params = self.get_params(deep=True)
-        for key, value in six.iteritems(params):
+        for key, value in params.items():
             split = key.split('__', 1)
             if len(split) > 1:
                 # nested objects case
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index c509597941d05..32ad908d5bf45 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -214,7 +214,7 @@ def _fit(self, X, y=None, **fit_params):
 
         fit_params_steps = dict((name, {}) for name, step in self.steps
                                 if step is not None)
-        for pname, pval in six.iteritems(fit_params):
+        for pname, pval in fit_params.items():
             step, param = pname.split('__', 1)
             fit_params_steps[step][param] = pval
         Xt = X
@@ -543,7 +543,7 @@ def _name_estimators(estimators):
     for est, name in zip(estimators, names):
         namecount[name] += 1
 
-    for k, v in list(six.iteritems(namecount)):
+    for k, v in list(namecount.items()):
         if v == 1:
             del namecount[k]
 

From 067a1e380a082c18e6fd1490ae3f0d061aa89774 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 12:23:46 -0500
Subject: [PATCH 07/29] six metaclass

---
 sklearn/ensemble/forest.py                      | 2 +-
 sklearn/ensemble/gradient_boosting.py           | 8 ++++----
 sklearn/ensemble/weight_boosting.py             | 2 +-
 sklearn/feature_selection/base.py               | 2 +-
 sklearn/linear_model/base.py                    | 2 +-
 sklearn/linear_model/coordinate_descent.py      | 2 +-
 sklearn/linear_model/ridge.py                   | 2 +-
 sklearn/metrics/scorer.py                       | 2 +-
 sklearn/multioutput.py                          | 2 +-
 sklearn/neighbors/base.py                       | 2 +-
 sklearn/neural_network/multilayer_perceptron.py | 2 +-
 sklearn/svm/base.py                             | 2 +-
 sklearn/tree/tree.py                            | 2 +-
 sklearn/utils/metaestimators.py                 | 2 +-
 14 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index a5f539a8653a9..5c193df2eb2d0 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -122,7 +122,7 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
     return tree
 
 
-class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble)):
+class BaseForest(BaseEnsemble, metaclass=ABCMeta):
     """Base class for forests of trees.
 
     Warning: This class should not be used directly. Use derived classes
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 04d8dab7570a8..8b5ab415141c6 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -299,7 +299,7 @@ def predict(self, X):
         return y
 
 
-class LossFunction(six.with_metaclass(ABCMeta, object)):
+class LossFunction(object, metaclass=ABCMeta):
     """Abstract base class for various loss functions.
 
     Parameters
@@ -406,7 +406,7 @@ def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,
         """Template method for updating terminal regions (=leaves). """
 
 
-class RegressionLossFunction(six.with_metaclass(ABCMeta, LossFunction)):
+class RegressionLossFunction(LossFunction, metaclass=ABCMeta):
     """Base class for regression loss functions.
 
     Parameters
@@ -740,7 +740,7 @@ def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,
         tree.value[leaf, 0] = val
 
 
-class ClassificationLossFunction(six.with_metaclass(ABCMeta, LossFunction)):
+class ClassificationLossFunction(LossFunction, metaclass=ABCMeta):
     """Base class for classification loss functions. """
 
     def _score_to_proba(self, score):
@@ -1118,7 +1118,7 @@ def update(self, j, est):
                 self.verbose_mod *= 10
 
 
-class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble)):
+class BaseGradientBoosting(BaseEnsemble, metaclass=ABCMeta):
     """Abstract base class for Gradient Boosting. """
 
     @abstractmethod
diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
index c55e50d5aed30..d5e3f3a912407 100644
--- a/sklearn/ensemble/weight_boosting.py
+++ b/sklearn/ensemble/weight_boosting.py
@@ -45,7 +45,7 @@
 ]
 
 
-class BaseWeightBoosting(six.with_metaclass(ABCMeta, BaseEnsemble)):
+class BaseWeightBoosting(BaseEnsemble, metaclass=ABCMeta):
     """Base class for AdaBoost estimators.
 
     Warning: This class should not be used directly. Use derived classes
diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
index 441b4f45a80fd..5add330188f78 100644
--- a/sklearn/feature_selection/base.py
+++ b/sklearn/feature_selection/base.py
@@ -14,7 +14,7 @@
 from ..utils import check_array, safe_mask
 
 
-class SelectorMixin(six.with_metaclass(ABCMeta, TransformerMixin)):
+class SelectorMixin(TransformerMixin, metaclass=ABCMeta):
     """
     Transformer mixin that performs feature selection given a support mask
 
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index d931c55f7e63a..e5747d8f04925 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -182,7 +182,7 @@ def _rescale_data(X, y, sample_weight):
     return X, y
 
 
-class LinearModel(six.with_metaclass(ABCMeta, BaseEstimator)):
+class LinearModel(BaseEstimator, metaclass=ABCMeta):
     """Base class for Linear Models"""
 
     @abstractmethod
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index c51d3c577f4e9..78ae74182700d 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -1047,7 +1047,7 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None,
     return this_mses
 
 
-class LinearModelCV(six.with_metaclass(ABCMeta, LinearModel)):
+class LinearModelCV(LinearModel, metaclass=ABCMeta):
     """Base class for iterative model fitting along a regularization path"""
 
     @abstractmethod
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 36402d340a30d..06028f441900c 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -463,7 +463,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
         return coef
 
 
-class _BaseRidge(six.with_metaclass(ABCMeta, LinearModel)):
+class _BaseRidge(LinearModel, metaclass=ABCMeta):
 
     @abstractmethod
     def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 89bf9a9ba8955..9def4d484803b 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -43,7 +43,7 @@
 from ..base import is_regressor
 
 
-class _BaseScorer(six.with_metaclass(ABCMeta, object)):
+class _BaseScorer(object, metaclass=ABCMeta):
     def __init__(self, score_func, sign, kwargs):
         self._kwargs = kwargs
         self._score_func = score_func
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index deafc0e7302af..69f05183f2fe8 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -367,7 +367,7 @@ def score(self, X, y):
         return np.mean(np.all(y == y_pred, axis=1))
 
 
-class _BaseChain(six.with_metaclass(ABCMeta, BaseEstimator)):
+class _BaseChain(BaseEstimator, metaclass=ABCMeta):
     def __init__(self, base_estimator, order=None, cv=None, random_state=None):
         self.base_estimator = base_estimator
         self.order = order
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index e0e7af71c79c0..8f3799adfffe4 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -102,7 +102,7 @@ def _get_weights(dist, weights):
                          "'distance', or a callable function")
 
 
-class NeighborsBase(six.with_metaclass(ABCMeta, BaseEstimator)):
+class NeighborsBase(BaseEstimator, metaclass=ABCMeta):
     """Base class for nearest neighbors estimators."""
 
     @abstractmethod
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index e1cb5e9446450..765ff953223a2 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -36,7 +36,7 @@ def _pack(coefs_, intercepts_):
     return np.hstack([l.ravel() for l in coefs_ + intercepts_])
 
 
-class BaseMultilayerPerceptron(six.with_metaclass(ABCMeta, BaseEstimator)):
+class BaseMultilayerPerceptron(BaseEstimator, metaclass=ABCMeta):
     """Base class for MLP classification and regression.
 
     Warning: This class should not be used directly.
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index 2105a6130e7f0..73a3f50f68c30 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -56,7 +56,7 @@ def _one_vs_one_coef(dual_coef, n_support, support_vectors):
     return coef
 
 
-class BaseLibSVM(six.with_metaclass(ABCMeta, BaseEstimator)):
+class BaseLibSVM(BaseEstimator, metaclass=ABCMeta):
     """Base class for estimators that use libsvm as backing library
 
     This implements support vector machine classification and regression.
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 5cf30b08a103a..f8982590f7671 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -71,7 +71,7 @@
 # =============================================================================
 
 
-class BaseDecisionTree(six.with_metaclass(ABCMeta, BaseEstimator)):
+class BaseDecisionTree(BaseEstimator, metaclass=ABCMeta):
     """Base class for decision trees.
 
     Warning: This class should not be used directly.
diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py
index 606173560dd75..25e1fe825717f 100644
--- a/sklearn/utils/metaestimators.py
+++ b/sklearn/utils/metaestimators.py
@@ -14,7 +14,7 @@
 __all__ = ['if_delegate_has_method']
 
 
-class _BaseComposition(six.with_metaclass(ABCMeta, BaseEstimator)):
+class _BaseComposition(BaseEstimator, metaclass=ABCMeta):
     """Handles parameter management for classifiers composed of named estimators.
     """
     @abstractmethod

From 44ec241436ce3d3ad75d50ff692af8a5ea68e229 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 12:24:49 -0500
Subject: [PATCH 08/29] metaclass with two base classes

---
 sklearn/decomposition/base.py               | 2 +-
 sklearn/ensemble/forest.py                  | 2 +-
 sklearn/linear_model/stochastic_gradient.py | 2 +-
 sklearn/mixture/base.py                     | 2 +-
 sklearn/naive_bayes.py                      | 2 +-
 sklearn/svm/base.py                         | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py
index faebbd0c74ac6..b318de0cd0daf 100644
--- a/sklearn/decomposition/base.py
+++ b/sklearn/decomposition/base.py
@@ -17,7 +17,7 @@
 from abc import ABCMeta, abstractmethod
 
 
-class _BasePCA(six.with_metaclass(ABCMeta, BaseEstimator, TransformerMixin)):
+class _BasePCA(BaseEstimator, TransformerMixin, metaclass=ABCMeta):
     """Base class for PCA methods.
 
     Warning: This class should not be used directly.
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 5c193df2eb2d0..f19bd8bbe3b44 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -635,7 +635,7 @@ def predict_log_proba(self, X):
             return proba
 
 
-class ForestRegressor(six.with_metaclass(ABCMeta, BaseForest, RegressorMixin)):
+class ForestRegressor(BaseForest, RegressorMixin, metaclass=ABCMeta):
     """Base class for forest of trees-based regressors.
 
     Warning: This class should not be used directly. Use derived classes
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 1ee974c0389ad..3fc7e8a0fbf56 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -64,7 +64,7 @@ def __call__(self, coef, intercept):
         return est.score(self.X_val, self.y_val, self.sample_weight_val)
 
 
-class BaseSGD(six.with_metaclass(ABCMeta, BaseEstimator, SparseCoefMixin)):
+class BaseSGD(BaseEstimator, SparseCoefMixin, metaclass=ABCMeta):
     """Base class for SGD classification and regression."""
 
     def __init__(self, loss, penalty='l2', alpha=0.0001, C=1.0,
diff --git a/sklearn/mixture/base.py b/sklearn/mixture/base.py
index bfdc75a0547e2..afcec0b94b2d9 100644
--- a/sklearn/mixture/base.py
+++ b/sklearn/mixture/base.py
@@ -63,7 +63,7 @@ def _check_X(X, n_components=None, n_features=None, ensure_min_samples=1):
     return X
 
 
-class BaseMixture(six.with_metaclass(ABCMeta, DensityMixin, BaseEstimator)):
+class BaseMixture(DensityMixin, BaseEstimator, metaclass=ABCMeta):
     """Base class for mixture models.
 
     This abstract class specifies an interface for all mixture classes and
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index a0ac6a3105508..1a3771807f3cb 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -35,7 +35,7 @@
 __all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB', 'ComplementNB']
 
 
-class BaseNB(six.with_metaclass(ABCMeta, BaseEstimator, ClassifierMixin)):
+class BaseNB(BaseEstimator, ClassifierMixin, metaclass=ABCMeta):
     """Abstract base class for naive Bayes estimators"""
 
     @abstractmethod
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index 73a3f50f68c30..d08f21c2fd374 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -498,7 +498,7 @@ def _get_coef(self):
         return safe_sparse_dot(self._dual_coef_, self.support_vectors_)
 
 
-class BaseSVC(six.with_metaclass(ABCMeta, BaseLibSVM, ClassifierMixin)):
+class BaseSVC(BaseLibSVM, ClassifierMixin, metaclass=ABCMeta):
     """ABC for LibSVM-based classifiers."""
     @abstractmethod
     def __init__(self, kernel, degree, gamma, coef0, tol, C, nu,

From 185e0bfc2940851f1b16e1617ae72e4ae43aaaa9 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 12:27:48 -0500
Subject: [PATCH 09/29] multi-line metaclasses

---
 sklearn/linear_model/stochastic_gradient.py | 3 +--
 sklearn/model_selection/_search.py          | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 3fc7e8a0fbf56..17aad4521e54e 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -468,8 +468,7 @@ def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,
     return result
 
 
-class BaseSGDClassifier(six.with_metaclass(ABCMeta, BaseSGD,
-                                           LinearClassifierMixin)):
+class BaseSGDClassifier(BaseSGD, LinearClassifierMixin, metaclass=ABCMeta):
 
     loss_functions = {
         "hinge": (Hinge, 1.0),
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index b6a90f8b54284..157d4ede09dff 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -381,8 +381,7 @@ def _check_param_grid(param_grid):
                                  "to be a non-empty sequence.".format(name))
 
 
-class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
-                                      MetaEstimatorMixin)):
+class BaseSearchCV(BaseEstimator, MetaEstimatorMixin, metaclass=ABCMeta):
     """Abstract base class for hyper parameter search with cross-validation.
     """
 

From 6791cbf46ddb5b0ceac003dac38ccb6e335b00de Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 12:40:50 -0500
Subject: [PATCH 10/29] more metaclass and b fun

---
 sklearn/cross_decomposition/pls_.py           |  4 +--
 sklearn/datasets/svmlight_format.py           | 28 +++++++--------
 sklearn/datasets/tests/test_base.py           |  7 ++--
 .../datasets/tests/test_svmlight_format.py    | 34 +++++++++----------
 sklearn/ensemble/bagging.py                   |  2 +-
 sklearn/ensemble/base.py                      |  4 +--
 sklearn/ensemble/forest.py                    |  5 ++-
 sklearn/feature_extraction/text.py            |  4 +--
 sklearn/gaussian_process/kernels.py           |  2 +-
 sklearn/metrics/tests/test_pairwise.py        |  2 --
 sklearn/model_selection/_split.py             |  9 +++--
 sklearn/multioutput.py                        |  3 +-
 sklearn/random_projection.py                  |  3 +-
 sklearn/semi_supervised/label_propagation.py  |  3 +-
 sklearn/utils/metaestimators.py               |  5 ++-
 15 files changed, 51 insertions(+), 64 deletions(-)

diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py
index dcc614677f5f7..ea35089de6637 100644
--- a/sklearn/cross_decomposition/pls_.py
+++ b/sklearn/cross_decomposition/pls_.py
@@ -116,8 +116,8 @@ def _center_scale_xy(X, Y, scale=True):
     return X, Y, x_mean, y_mean, x_std, y_std
 
 
-class _PLS(six.with_metaclass(ABCMeta), BaseEstimator, TransformerMixin,
-           RegressorMixin):
+class _PLS(BaseEstimator, TransformerMixin, RegressorMixin,
+           metaclass=ABCMeta):
     """Partial Least Squares (PLS)
 
     This class implements the generic PLS algorithm, constructors' parameters
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index f5e2edfe53354..60e3a3961655e 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -326,28 +326,28 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):
     X_is_sp = int(hasattr(X, "tocsr"))
     y_is_sp = int(hasattr(y, "tocsr"))
     if X.dtype.kind == 'i':
-        value_pattern = u("%d:%d")
+        value_pattern = "%d:%d"
     else:
-        value_pattern = u("%d:%.16g")
+        value_pattern = "%d:%.16g"
 
     if y.dtype.kind == 'i':
-        label_pattern = u("%d")
+        label_pattern = "%d"
     else:
-        label_pattern = u("%.16g")
+        label_pattern = "%.16g"
 
-    line_pattern = u("%s")
+    line_pattern = "%s"
     if query_id is not None:
-        line_pattern += u(" qid:%d")
-    line_pattern += u(" %s\n")
+        line_pattern += " qid:%d"
+    line_pattern += " %s\n"
 
     if comment:
-        f.write(b("# Generated by dump_svmlight_file from scikit-learn %s\n"
-                % __version__))
-        f.write(b("# Column indices are %s-based\n"
-                  % ["zero", "one"][one_based]))
+        f.write("# Generated by dump_svmlight_file from scikit-learn %s\n"
+                % __version__)
+        f.write("# Column indices are %s-based\n"
+                  % ["zero", "one"][one_based])
 
-        f.write(b("#\n"))
-        f.writelines(b("# %s\n" % line) for line in comment.splitlines())
+        f.write("#\n")
+        f.writelines("# %s\n" % line for line in comment.splitlines())
 
     for i in range(X.shape[0]):
         if X_is_sp:
@@ -437,7 +437,7 @@ def dump_svmlight_file(X, y, f,  zero_based=True, comment=None, query_id=None,
             comment.decode("ascii")     # just for the exception
         else:
             comment = comment.encode("utf-8")
-        if six.b("\0") in comment:
+        if "\0" in comment:
             raise ValueError("comment string contains NUL byte")
 
     yval = check_array(y, accept_sparse='csr', ensure_2d=False)
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index e01ec39eb4943..83be6d05b561a 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -24,7 +24,6 @@
 from sklearn.datasets.base import Bunch
 from sklearn.datasets.tests.test_common import check_return_X_y
 
-from sklearn.externals.six import b, u
 from sklearn.externals._pilutil import pillow_installed
 
 from sklearn.utils.testing import assert_false
@@ -56,7 +55,7 @@ def test_category_dir_1(load_files_root):
     test_category_dir1 = tempfile.mkdtemp(dir=load_files_root)
     sample_file = tempfile.NamedTemporaryFile(dir=test_category_dir1,
                                               delete=False)
-    sample_file.write(b("Hello World!\n"))
+    sample_file.write("Hello World!\n")
     sample_file.close()
     yield str(test_category_dir1)
     _remove_dir(test_category_dir1)
@@ -97,7 +96,7 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2,
     assert_equal(len(res.filenames), 1)
     assert_equal(len(res.target_names), 2)
     assert_equal(res.DESCR, None)
-    assert_equal(res.data, [b("Hello World!\n")])
+    assert_equal(res.data, ["Hello World!\n"])
 
 
 def test_load_files_w_categories_desc_and_encoding(
@@ -108,7 +107,7 @@ def test_load_files_w_categories_desc_and_encoding(
     assert_equal(len(res.filenames), 1)
     assert_equal(len(res.target_names), 1)
     assert_equal(res.DESCR, "test")
-    assert_equal(res.data, [u("Hello World!\n")])
+    assert_equal(res.data, ["Hello World!\n"])
 
 
 def test_load_files_wo_load_content(
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
index ca1f7ddae8ecd..584b226cb3a0e 100644
--- a/sklearn/datasets/tests/test_svmlight_format.py
+++ b/sklearn/datasets/tests/test_svmlight_format.py
@@ -10,8 +10,6 @@
 
 import pytest
 
-from sklearn.externals.six import b
-
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
@@ -153,13 +151,13 @@ def test_load_invalid_order_file():
 
 
 def test_load_zero_based():
-    f = BytesIO(b("-1 4:1.\n1 0:1\n"))
+    f = BytesIO("-1 4:1.\n1 0:1\n")
     assert_raises(ValueError, load_svmlight_file, f, zero_based=False)
 
 
 def test_load_zero_based_auto():
-    data1 = b("-1 1:1 2:2 3:3\n")
-    data2 = b("-1 0:0 1:1\n")
+    data1 = "-1 1:1 2:2 3:3\n"
+    data2 = "-1 0:0 1:1\n"
 
     f1 = BytesIO(data1)
     X, y = load_svmlight_file(f1, zero_based="auto")
@@ -174,10 +172,10 @@ def test_load_zero_based_auto():
 
 def test_load_with_qid():
     # load svmfile with qid attribute
-    data = b("""
+    data = """
     3 qid:1 1:0.53 2:0.12
     2 qid:1 1:0.13 2:0.1
-    7 qid:2 1:0.87 2:0.12""")
+    7 qid:2 1:0.87 2:0.12"""
     X, y = load_svmlight_file(BytesIO(data), query_id=False)
     assert_array_equal(y, [3, 2, 7])
     assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]])
@@ -280,9 +278,9 @@ def test_dump_multilabel():
         dump_svmlight_file(X, y, f, multilabel=True)
         f.seek(0)
         # make sure it dumps multilabel correctly
-        assert_equal(f.readline(), b("1 0:1 2:3 4:5\n"))
-        assert_equal(f.readline(), b("0,2 \n"))
-        assert_equal(f.readline(), b("0,1 1:5 3:1\n"))
+        assert_equal(f.readline(), "1 0:1 2:3 4:5\n")
+        assert_equal(f.readline(), "0,2 \n")
+        assert_equal(f.readline(), "0,1 1:5 3:1\n")
 
 
 def test_dump_concise():
@@ -303,11 +301,11 @@ def test_dump_concise():
     f.seek(0)
     # make sure it's using the most concise format possible
     assert_equal(f.readline(),
-                 b("1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n"))
-    assert_equal(f.readline(), b("2.1 0:1000000000 1:2e+18 2:3e+27\n"))
-    assert_equal(f.readline(), b("3.01 \n"))
-    assert_equal(f.readline(), b("1.000000000000001 \n"))
-    assert_equal(f.readline(), b("1 \n"))
+                 "1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n")
+    assert_equal(f.readline(), "2.1 0:1000000000 1:2e+18 2:3e+27\n")
+    assert_equal(f.readline(), "3.01 \n")
+    assert_equal(f.readline(), "1.000000000000001 \n")
+    assert_equal(f.readline(), "1 \n")
     f.seek(0)
     # make sure it's correct too :)
     X2, y2 = load_svmlight_file(f)
@@ -329,7 +327,7 @@ def test_dump_comment():
     assert_array_almost_equal(y, y2)
 
     # XXX we have to update this to support Python 3.x
-    utf8_comment = b("It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc")
+    utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc"
     f = BytesIO()
     assert_raises(UnicodeDecodeError,
                   dump_svmlight_file, X, y, f, comment=utf8_comment)
@@ -376,11 +374,11 @@ def test_dump_query_id():
 
 def test_load_with_long_qid():
     # load svmfile with longint qid attribute
-    data = b("""
+    data = """
     1 qid:0 0:1 1:2 2:3
     0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
     0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
-    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985""")
+    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985"""
     X, y, qid = load_svmlight_file(BytesIO(data), query_id=True)
 
     true_X = [[1,          2,                 3],
diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index 719f198a958c8..ef2399e0d6041 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -183,7 +183,7 @@ def _parallel_predict_regression(estimators, estimators_features, X):
                                               estimators_features))
 
 
-class BaseBagging(with_metaclass(ABCMeta, BaseEnsemble)):
+class BaseBagging(BaseEnsemble, metaclass=ABCMeta):
     """Base class for Bagging meta-estimator.
 
     Warning: This class should not be used directly. Use derived classes
diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
index 0f1d7087ed501..1ca2ef8b20492 100644
--- a/sklearn/ensemble/base.py
+++ b/sklearn/ensemble/base.py
@@ -13,7 +13,6 @@
 from ..base import MetaEstimatorMixin
 from ..utils import check_random_state
 from ..utils._joblib import effective_n_jobs
-from ..externals import six
 from abc import ABCMeta, abstractmethod
 
 MAX_RAND_SEED = np.iinfo(np.int32).max
@@ -58,8 +57,7 @@ def _set_random_states(estimator, random_state=None):
         estimator.set_params(**to_set)
 
 
-class BaseEnsemble(six.with_metaclass(ABCMeta, BaseEstimator,
-                                      MetaEstimatorMixin)):
+class BaseEnsemble(BaseEstimator, MetaEstimatorMixin, metaclass=ABCMeta):
     """Base class for all ensemble classes.
 
     Warning: This class should not be used directly. Use derived classes
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index f19bd8bbe3b44..789274278f7e1 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -242,7 +242,7 @@ def fit(self, X, y, sample_weight=None):
 
         if self.n_estimators == 'warn':
             warn("The default value of n_estimators will change from "
-                          "10 in version 0.20 to 100 in 0.22.", FutureWarning)
+                 "10 in version 0.20 to 100 in 0.22.", FutureWarning)
             self.n_estimators = 10
 
         # Validate or convert input data
@@ -394,8 +394,7 @@ def _accumulate_prediction(predict, X, out, lock):
                 out[i] += prediction[i]
 
 
-class ForestClassifier(six.with_metaclass(ABCMeta, BaseForest,
-                                          ClassifierMixin)):
+class ForestClassifier(BaseForest, ClassifierMixin, metaclass=ABCMeta):
     """Base class for forest of trees-based classifiers.
 
     Warning: This class should not be used directly. Use derived classes
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index fbbfaed5ef9db..824ec3beccc4e 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -343,7 +343,7 @@ def _validate_vocabulary(self):
                         raise ValueError(msg)
                 vocabulary = vocab
             else:
-                indices = set(six.itervalues(vocabulary))
+                indices = set(vocabulary.values())
                 if len(indices) != len(vocabulary):
                     raise ValueError("Vocabulary contains repeated indices.")
                 for i in range(len(vocabulary)):
@@ -1124,7 +1124,7 @@ def get_feature_names(self):
 
         self._check_vocabulary()
 
-        return [t for t, i in sorted(six.iteritems(self.vocabulary_),
+        return [t for t, i in sorted(self.vocabulary_.items(),
                                      key=itemgetter(1))]
 
 
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index 0df0d1197dde9..7d83b5db0fd7e 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -115,7 +115,7 @@ def __eq__(self, other):
                 self.fixed == other.fixed)
 
 
-class Kernel(six.with_metaclass(ABCMeta)):
+class Kernel(metaclass=ABCMeta):
     """Base class for all kernels.
 
     .. versionadded:: 0.18
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index 5dd2d86c94545..d2f4578856a31 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -19,8 +19,6 @@
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns_message
 
-from sklearn.externals.six import iteritems
-
 from sklearn.metrics.pairwise import euclidean_distances
 from sklearn.metrics.pairwise import manhattan_distances
 from sklearn.metrics.pairwise import linear_kernel
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index d3f5ab0f5ba72..582f3d69bf6f9 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -25,7 +25,6 @@
 from ..utils.validation import _num_samples, column_or_1d
 from ..utils.validation import check_array
 from ..utils.multiclass import type_of_target
-from ..externals.six import with_metaclass
 from ..utils.fixes import signature, comb
 from ..utils.fixes import _Iterable as Iterable
 from ..base import _pprint
@@ -59,7 +58,7 @@
     "in version 0.22.")
 
 
-class BaseCrossValidator(with_metaclass(ABCMeta)):
+class BaseCrossValidator(metaclass=ABCMeta):
     """Base class for all cross-validators
 
     Implementations must define `_iter_test_masks` or `_iter_test_indices`.
@@ -265,7 +264,7 @@ def get_n_splits(self, X, y=None, groups=None):
         return int(comb(_num_samples(X), self.p, exact=True))
 
 
-class _BaseKFold(with_metaclass(ABCMeta, BaseCrossValidator)):
+class _BaseKFold(BaseCrossValidator, metaclass=ABCMeta):
     """Base class for KFold, GroupKFold, and StratifiedKFold"""
 
     @abstractmethod
@@ -1059,7 +1058,7 @@ def split(self, X, y=None, groups=None):
         return super(LeavePGroupsOut, self).split(X, y, groups)
 
 
-class _RepeatedSplits(with_metaclass(ABCMeta)):
+class _RepeatedSplits(metaclass=ABCMeta):
     """Repeated splits for an arbitrary randomized CV splitter.
 
     Repeats splits for cross-validators n times with different randomization
@@ -1264,7 +1263,7 @@ def __init__(self, n_splits=5, n_repeats=10, random_state=None):
             StratifiedKFold, n_repeats, random_state, n_splits=n_splits)
 
 
-class BaseShuffleSplit(with_metaclass(ABCMeta)):
+class BaseShuffleSplit(metaclass=ABCMeta):
     """Base class for ShuffleSplit and StratifiedShuffleSplit"""
 
     def __init__(self, n_splits=10, test_size="default", train_size=None,
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index 69f05183f2fe8..e5fc27f79b76b 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -59,8 +59,7 @@ def _partial_fit_estimator(estimator, X, y, classes=None, sample_weight=None,
     return estimator
 
 
-class MultiOutputEstimator(six.with_metaclass(ABCMeta, BaseEstimator,
-                                              MetaEstimatorMixin)):
+class MultiOutputEstimator(BaseEstimator, MetaEstimatorMixin, metaclass=ABCMeta):
     @abstractmethod
     def __init__(self, estimator, n_jobs=None):
         self.estimator = estimator
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index 7581847d00c58..bac632fc7df65 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -291,8 +291,7 @@ def sparse_random_matrix(n_components, n_features, density='auto',
         return np.sqrt(1 / density) / np.sqrt(n_components) * components
 
 
-class BaseRandomProjection(six.with_metaclass(ABCMeta, BaseEstimator,
-                                              TransformerMixin)):
+class BaseRandomProjection(BaseEstimator, TransformerMixin, metaclass=ABCMeta):
     """Base class for random projections.
 
     Warning: This class should not be used directly.
diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py
index 04aa6714e0711..6b04eb8256daa 100644
--- a/sklearn/semi_supervised/label_propagation.py
+++ b/sklearn/semi_supervised/label_propagation.py
@@ -72,8 +72,7 @@
 from ..exceptions import ConvergenceWarning
 
 
-class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator,
-                                              ClassifierMixin)):
+class BaseLabelPropagation(BaseEstimator, ClassifierMixin, metaclass=ABCMeta):
     """Base class for label propagation module.
 
     Parameters
diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py
index 25e1fe825717f..e2d6ca58b5e05 100644
--- a/sklearn/utils/metaestimators.py
+++ b/sklearn/utils/metaestimators.py
@@ -29,8 +29,7 @@ def _get_params(self, attr, deep=True):
         out.update(estimators)
         for name, estimator in estimators:
             if hasattr(estimator, 'get_params'):
-                for key, value in six.iteritems(
-                        estimator.get_params(deep=True)):
+                for key, value in estimator.get_params(deep=True).items():
                     out['%s__%s' % (name, key)] = value
         return out
 
@@ -44,7 +43,7 @@ def _set_params(self, attr, **params):
         names = []
         if items:
             names, _ = zip(*items)
-        for name in list(six.iterkeys(params)):
+        for name in list(params.keys()):
             if '__' not in name and name in names:
                 self._replace_estimator(attr, name, params.pop(name))
         # 3. Step parameters and other initialisation arguments

From adfef757d58b0d772a2aa23c6aaa2191f2b508f3 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 13:01:53 -0500
Subject: [PATCH 11/29] getting rid of six (and python2)

---
 sklearn/cluster/mean_shift_.py                 |  2 +-
 sklearn/datasets/_svmlight_format.pyx          |  2 --
 sklearn/datasets/base.py                       |  2 +-
 sklearn/datasets/openml.py                     | 16 +++-------------
 sklearn/datasets/tests/test_lfw.py             | 15 ++++++++-------
 sklearn/decomposition/fastica_.py              |  4 ++--
 sklearn/externals/joblib/_parallel_backends.py |  2 +-
 sklearn/externals/joblib/_store_backends.py    |  2 +-
 sklearn/metrics/tests/test_pairwise.py         |  2 +-
 sklearn/model_selection/tests/test_split.py    |  3 +--
 sklearn/tree/export.py                         |  2 +-
 sklearn/utils/tests/test_bench.py              | 11 -----------
 sklearn/utils/tests/test_murmurhash.py         | 16 ++++++++--------
 13 files changed, 28 insertions(+), 51 deletions(-)
 delete mode 100644 sklearn/utils/tests/test_bench.py

diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py
index 89117164a63f8..ce5dac8b5a318 100644
--- a/sklearn/cluster/mean_shift_.py
+++ b/sklearn/cluster/mean_shift_.py
@@ -284,7 +284,7 @@ def get_bin_seeds(X, bin_size, min_bin_freq=1):
         bin_sizes[tuple(binned_point)] += 1
 
     # Select only those bins as seeds which have enough members
-    bin_seeds = np.array([point for point, freq in bin_sizes.items()) if
+    bin_seeds = np.array([point for point, freq in bin_sizes.items() if
                           freq >= min_bin_freq], dtype=np.float32)
     if len(bin_seeds) == len(X):
         warnings.warn("Binning data failed with provided bin_size=%f,"
diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx
index bba5db9d3cf50..ea45920ad70f2 100644
--- a/sklearn/datasets/_svmlight_format.pyx
+++ b/sklearn/datasets/_svmlight_format.pyx
@@ -14,8 +14,6 @@ cimport numpy as np
 import numpy as np
 import scipy.sparse as sp
 
-from ..externals.six import b
-
 np.import_array()
 
 
diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py
index e4580b56dc181..bb48f48c57e35 100644
--- a/sklearn/datasets/base.py
+++ b/sklearn/datasets/base.py
@@ -22,7 +22,7 @@
 
 import numpy as np
 
-from moves.urllib.request import urlretrieve
+from urllib.request import urlretrieve
 
 RemoteFileMetadata = namedtuple('RemoteFileMetadata',
                                 ['filename', 'url', 'checksum'])
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 1f1fc158553fd..09c4c515a5745 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -21,7 +21,6 @@
 
 from sklearn.externals import _arff
 from .base import get_data_home
-from ..externals.six import str, PY2, BytesIO
 from urllib.error import HTTPError
 from ..utils import Bunch
 
@@ -92,8 +91,6 @@ def is_gzip(_fsrc):
     if data_home is None:
         fsrc = urlopen(req)
         if is_gzip(fsrc):
-            if PY2:
-                fsrc = BytesIO(fsrc.read())
             return gzip.GzipFile(fileobj=fsrc, mode='rb')
         return fsrc
 
@@ -360,16 +357,9 @@ def _arff_load():
             else:
                 return_type = _arff.DENSE
 
-            if PY2:
-                arff_file = _arff.load(
-                    response.read(),
-                    encode_nominal=encode_nominal,
-                    return_type=return_type,
-                )
-            else:
-                arff_file = _arff.loads(response.read().decode('utf-8'),
-                                        encode_nominal=encode_nominal,
-                                        return_type=return_type)
+            arff_file = _arff.loads(response.read().decode('utf-8'),
+                                    encode_nominal=encode_nominal,
+                                    return_type=return_type)
         return arff_file
 
     return _arff_load()
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 75aecdfb999f1..56323c4aba266 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -70,30 +70,31 @@ def setup_module():
 
     # add some random file pollution to test robustness
     with open(os.path.join(LFW_HOME, 'lfw_funneled', '.test.swp'), 'wb') as f:
-        f.write(six.b('Text file to be ignored by the dataset loader.'))
+        f.write('Text file to be ignored by the dataset loader.'.encode())
 
     # generate some pairing metadata files using the same format as LFW
     with open(os.path.join(LFW_HOME, 'pairsDevTrain.txt'), 'wb') as f:
-        f.write(six.b("10\n"))
+        f.write("10\n").encode()
         more_than_two = [name for name, count in counts.items()
                          if count >= 2]
         for i in range(5):
             name = random_state.choice(more_than_two)
             first, second = random_state.sample(range(counts[name]), 2)
-            f.write(six.b('%s\t%d\t%d\n' % (name, first, second)))
+            f.write('%s\t%d\t%d\n' % (name, first, second).encode())
 
         for i in range(5):
             first_name, second_name = random_state.sample(FAKE_NAMES, 2)
             first_index = random_state.choice(np.arange(counts[first_name]))
             second_index = random_state.choice(np.arange(counts[second_name]))
-            f.write(six.b('%s\t%d\t%s\t%d\n' % (first_name, first_index,
-                                                second_name, second_index)))
+            f.write(('%s\t%d\t%s\t%d\n' % (first_name, first_index,
+                                           second_name, second_index)).encode()
+                                           )
 
     with open(os.path.join(LFW_HOME, 'pairsDevTest.txt'), 'wb') as f:
-        f.write(six.b("Fake place holder that won't be tested"))
+        f.write("Fake place holder that won't be tested".encode())
 
     with open(os.path.join(LFW_HOME, 'pairs.txt'), 'wb') as f:
-        f.write(six.b("Fake place holder that won't be tested"))
+        f.write("Fake place holder that won't be tested".encode())
 
 
 def teardown_module():
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index 5995357c4f4a9..f9ee5e42fbbc8 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -74,7 +74,7 @@ def _ica_def(X, tol, g, fun_args, max_iter, w_init):
         w = w_init[j, :].copy()
         w /= np.sqrt((w ** 2).sum())
 
-        for i in moves.range(max_iter):
+        for i in range(max_iter):
             gwtx, g_wtx = g(np.dot(w.T, X), fun_args)
 
             w1 = (X * gwtx).mean(axis=1) - g_wtx.mean() * w
@@ -103,7 +103,7 @@ def _ica_par(X, tol, g, fun_args, max_iter, w_init):
     W = _sym_decorrelation(w_init)
     del w_init
     p_ = float(X.shape[1])
-    for ii in moves.range(max_iter):
+    for ii in range(max_iter):
         gwtx, g_wtx = g(np.dot(W, X), fun_args)
         W1 = _sym_decorrelation(np.dot(gwtx, X.T) / p_
                                 - g_wtx[:, np.newaxis] * W)
diff --git a/sklearn/externals/joblib/_parallel_backends.py b/sklearn/externals/joblib/_parallel_backends.py
index 0f0bcf0ab4213..dd62d508694a9 100644
--- a/sklearn/externals/joblib/_parallel_backends.py
+++ b/sklearn/externals/joblib/_parallel_backends.py
@@ -27,7 +27,7 @@
     from .externals.loky import process_executor, cpu_count
 
 
-class ParallelBackendBase(with_metaclass(ABCMeta)):
+class ParallelBackendBase(metaclass=ABCMeta):
     """Helper abc which defines all methods a ParallelBackend must implement"""
 
     supports_timeout = False
diff --git a/sklearn/externals/joblib/_store_backends.py b/sklearn/externals/joblib/_store_backends.py
index 9196f0a7746a1..3e2c02d1d64fb 100644
--- a/sklearn/externals/joblib/_store_backends.py
+++ b/sklearn/externals/joblib/_store_backends.py
@@ -31,7 +31,7 @@ def concurrency_safe_write(object_to_write, filename, write_func):
     return temporary_filename
 
 
-class StoreBackendBase(with_metaclass(ABCMeta)):
+class StoreBackendBase(metaclass=ABCMeta):
     """Helper Abstract Base Class which defines all methods that
        a StorageBackend must implement."""
 
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index d2f4578856a31..f76215d5e1bbf 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -296,7 +296,7 @@ def test_pairwise_kernels_filter_param():
     assert_raises(TypeError, pairwise_kernels, X, Y, "rbf", **params)
 
 
-@pytest.mark.parametrize('metric, func', iteritems(PAIRED_DISTANCES))
+@pytest.mark.parametrize('metric, func', PAIRED_DISTANCES.items())
 def test_paired_distances(metric, func):
     # Test the pairwise_distance helper function.
     rng = np.random.RandomState(0)
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index ab05e01f71351..ebdd3ab17225c 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -545,8 +545,7 @@ def test_shuffle_split():
     ss1 = ShuffleSplit(test_size=0.2, random_state=0).split(X)
     ss2 = ShuffleSplit(test_size=2, random_state=0).split(X)
     ss3 = ShuffleSplit(test_size=np.int32(2), random_state=0).split(X)
-    for typ in six.integer_types:
-        ss4 = ShuffleSplit(test_size=typ(2), random_state=0).split(X)
+    ss4 = ShuffleSplit(test_size=int(2), random_state=0).split(X)
     for t1, t2, t3, t4 in zip(ss1, ss2, ss3, ss4):
         assert_array_equal(t1[0], t2[0])
         assert_array_equal(t2[0], t3[0])
diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index 18052d6233d09..017275cfb1c19 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -11,7 +11,7 @@
 #          Li Li <aiki.nogard@gmail.com>
 # License: BSD 3 clause
 import warnings
-from IO import StringIO
+from io import StringIO
 
 from numbers import Integral
 
diff --git a/sklearn/utils/tests/test_bench.py b/sklearn/utils/tests/test_bench.py
deleted file mode 100644
index c04ba4ad25eba..0000000000000
--- a/sklearn/utils/tests/test_bench.py
+++ /dev/null
@@ -1,11 +0,0 @@
-
-import datetime
-
-from sklearn.utils.bench import total_seconds
-from sklearn.utils.testing import assert_equal
-
-
-def test_total_seconds():
-    delta = (datetime.datetime(2012, 1, 1, 5, 5, 1)
-             - datetime.datetime(2012, 1, 1, 5, 5, 4))
-    assert_equal(86397, total_seconds(delta))
diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py
index cb7899af88ab3..6f57b06c6bb4e 100644
--- a/sklearn/utils/tests/test_murmurhash.py
+++ b/sklearn/utils/tests/test_murmurhash.py
@@ -43,19 +43,19 @@ def test_mmhash3_int_array():
 
 
 def test_mmhash3_bytes():
-    assert_equal(murmurhash3_32(b('foo'), 0), -156908512)
-    assert_equal(murmurhash3_32(b('foo'), 42), -1322301282)
+    assert_equal(murmurhash3_32('foo'.encode(), 0), -156908512)
+    assert_equal(murmurhash3_32('foo'.encode(), 42), -1322301282)
 
-    assert_equal(murmurhash3_32(b('foo'), 0, positive=True), 4138058784)
-    assert_equal(murmurhash3_32(b('foo'), 42, positive=True), 2972666014)
+    assert_equal(murmurhash3_32('foo'.encode(), 0, positive=True), 4138058784)
+    assert_equal(murmurhash3_32('foo'.encode(), 42, positive=True), 2972666014)
 
 
 def test_mmhash3_unicode():
-    assert_equal(murmurhash3_32(u('foo'), 0), -156908512)
-    assert_equal(murmurhash3_32(u('foo'), 42), -1322301282)
+    assert_equal(murmurhash3_32('foo', 0), -156908512)
+    assert_equal(murmurhash3_32('foo', 42), -1322301282)
 
-    assert_equal(murmurhash3_32(u('foo'), 0, positive=True), 4138058784)
-    assert_equal(murmurhash3_32(u('foo'), 42, positive=True), 2972666014)
+    assert_equal(murmurhash3_32('foo', 0, positive=True), 4138058784)
+    assert_equal(murmurhash3_32('foo', 42, positive=True), 2972666014)
 
 
 def test_no_collision_on_byte_range():

From aa3f485e4efc3d4369def4e807e6950a99697ee8 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 13:03:46 -0500
Subject: [PATCH 12/29] another moves

---
 sklearn/decomposition/tests/test_fastica.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index 313a13ad8333b..5efda7d67a178 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -129,7 +129,7 @@ def g_test(x):
         ica = FastICA(fun=fn, algorithm=algo, random_state=0)
         assert_raises(ValueError, ica.fit, m.T)
 
-    assert_raises(TypeError, FastICA(fun=moves.range(10)).fit, m.T)
+    assert_raises(TypeError, FastICA(fun=range(10)).fit, m.T)
 
 
 def test_fastica_nowhiten():

From 6a5815b9e006073b784788ed7fe07793d578aa85 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 16:24:25 -0500
Subject: [PATCH 13/29] build on 32bit python3.5

---
 appveyor.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/appveyor.yml b/appveyor.yml
index 531aaca31aec5..bd59d727ffdc7 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -22,6 +22,11 @@ environment:
       PYTHON_ARCH: "64"
       CHECK_WARNINGS: "true"
 
+    - PYTHON: "C:\\Python35"
+      PYTHON_VERSION: "3.5.x"
+      PYTHON_ARCH: "32"
+
+
 # Because we only have a single worker, we don't want to waste precious
 # appveyor CI time and make other PRs wait for repeated failures in a failing
 # PR. The following option cancels pending jobs in a given PR after the first

From 906576e1ff2fccec2ef12277705907f7d85fcb51 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 16:26:32 -0500
Subject: [PATCH 14/29] remove b in pyx

---
 sklearn/datasets/_svmlight_format.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx
index ea45920ad70f2..7a1ad9dc93cf7 100644
--- a/sklearn/datasets/_svmlight_format.pyx
+++ b/sklearn/datasets/_svmlight_format.pyx
@@ -31,7 +31,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based,
     cdef char *line_cstr
     cdef int idx, prev_idx
     cdef Py_ssize_t i
-    cdef bytes qid_prefix = b('qid')
+    cdef bytes qid_prefix = 'qid'.encode()
     cdef Py_ssize_t n_features
     cdef long long offset_max = offset + length if length > 0 else -1
 

From 403158072160ab1776643f8f836edefcbf0f68e5 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 16:30:36 -0500
Subject: [PATCH 15/29] minor six fixes

---
 sklearn/ensemble/partial_dependence.py    | 2 +-
 sklearn/feature_selection/mutual_info_.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py
index 74a6497e42191..9460b29184df7 100644
--- a/sklearn/ensemble/partial_dependence.py
+++ b/sklearn/ensemble/partial_dependence.py
@@ -283,7 +283,7 @@ def convert_feature(fx):
     # convert features into a seq of int tuples
     tmp_features = []
     for fxs in features:
-        if isinstance(fxs, (numbers.Integral,) + str):
+        if isinstance(fxs, (numbers.Integral, str)):
             fxs = (fxs,)
         try:
             fxs = np.array([convert_feature(fx) for fx in fxs], dtype=np.int32)
diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py
index 7c5c247eb36ef..057a696077e8f 100644
--- a/sklearn/feature_selection/mutual_info_.py
+++ b/sklearn/feature_selection/mutual_info_.py
@@ -285,7 +285,7 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False,
         y += 1e-10 * np.maximum(1, np.mean(np.abs(y))) * rng.randn(n_samples)
 
     mi = [_compute_mi(x, y, discrete_feature, discrete_target, n_neighbors) for
-          x, discrete_feature in moves.zip(_iterate_columns(X), discrete_mask)]
+          x, discrete_feature zip(_iterate_columns(X), discrete_mask)]
 
     return np.array(mi)
 

From 8aa75f13f66a12b41ff88fdf6b9501654ef18032 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 21 Nov 2018 16:31:41 -0500
Subject: [PATCH 16/29] typo

---
 sklearn/feature_selection/mutual_info_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py
index 057a696077e8f..ac5492317bc50 100644
--- a/sklearn/feature_selection/mutual_info_.py
+++ b/sklearn/feature_selection/mutual_info_.py
@@ -285,7 +285,7 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False,
         y += 1e-10 * np.maximum(1, np.mean(np.abs(y))) * rng.randn(n_samples)
 
     mi = [_compute_mi(x, y, discrete_feature, discrete_target, n_neighbors) for
-          x, discrete_feature zip(_iterate_columns(X), discrete_mask)]
+          x, discrete_feature in zip(_iterate_columns(X), discrete_mask)]
 
     return np.array(mi)
 

From 28c9fd779d30a3d1b458e1c3c1fc51190f181224 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 14 Dec 2018 17:02:12 -0500
Subject: [PATCH 17/29] unused imports, minor cleanups

---
 sklearn/cluster/tests/test_k_means.py          |  1 -
 sklearn/compose/_column_transformer.py         |  2 +-
 sklearn/datasets/kddcup99.py                   |  1 -
 sklearn/datasets/tests/test_svmlight_format.py | 13 ++++++-------
 sklearn/ensemble/bagging.py                    |  1 -
 sklearn/metrics/cluster/tests/test_common.py   |  2 +-
 sklearn/model_selection/_search.py             |  1 -
 sklearn/preprocessing/_discretization.py       |  1 -
 sklearn/tests/test_discriminant_analysis.py    |  1 -
 sklearn/tests/test_metaestimators.py           |  2 +-
 sklearn/utils/extmath.py                       |  1 -
 sklearn/utils/tests/test_deprecation.py        |  2 --
 sklearn/utils/tests/test_extmath.py            |  2 --
 sklearn/utils/tests/test_fixes.py              |  1 -
 sklearn/utils/tests/test_multiclass.py         |  4 ++--
 sklearn/utils/tests/test_validation.py         |  1 -
 16 files changed, 11 insertions(+), 25 deletions(-)

diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 245ed3d97ea95..37571d427002b 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -9,7 +9,6 @@
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 71944f3721e8c..c2fad0d76870b 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -691,7 +691,7 @@ def _validate_transformers(transformers):
         return True
 
     for t in transformers:
-        if isinstance(t, six.string_types) and t in ('drop', 'passthrough'):
+        if isinstance(t, str) and t in ('drop', 'passthrough'):
             continue
         if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not
                 hasattr(t, "transform")):
diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index 713cb19beee36..baa84bab445d5 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -8,7 +8,6 @@
 
 """
 
-import sys
 import errno
 from gzip import GzipFile
 import logging
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
index e8242e1195e26..05a958bd0c88b 100644
--- a/sklearn/datasets/tests/test_svmlight_format.py
+++ b/sklearn/datasets/tests/test_svmlight_format.py
@@ -17,7 +17,6 @@
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import fails_if_pypy
-from sklearn.utils.fixes import sp_version
 
 import sklearn
 from sklearn.datasets import (load_svmlight_file, load_svmlight_files,
@@ -43,8 +42,8 @@ def test_load_svmlight_file():
 
     # test X's non-zero values
     for i, j, val in ((0, 2, 2.5), (0, 10, -5.2), (0, 15, 1.5),
-                     (1, 5, 1.0), (1, 12, -3),
-                     (2, 20, 27)):
+                      (1, 5, 1.0), (1, 12, -3),
+                      (2, 20, 27)):
 
         assert_equal(X[i, j], val)
 
@@ -106,7 +105,7 @@ def test_load_svmlight_file_n_features():
 
     # test X's non-zero values
     for i, j, val in ((0, 2, 2.5), (0, 10, -5.2),
-                     (1, 5, 1.0), (1, 12, -3)):
+                      (1, 5, 1.0), (1, 12, -3)):
 
         assert_equal(X[i, j], val)
 
@@ -376,9 +375,9 @@ def test_load_with_long_qid():
     X, y, qid = load_svmlight_file(BytesIO(data), query_id=True)
 
     true_X = [[1,          2,                 3],
-             [1440446648, 72048431380967004, 236784985],
-             [1440446648, 72048431380967004, 236784985],
-             [1440446648, 72048431380967004, 236784985]]
+              [1440446648, 72048431380967004, 236784985],
+              [1440446648, 72048431380967004, 236784985],
+              [1440446648, 72048431380967004, 236784985]]
 
     true_y = [1, 0, 0, 3]
     trueQID = [0, 72048431380967004, -9223372036854775807, 9223372036854775807]
diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index ef2399e0d6041..63a7721f905bc 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -14,7 +14,6 @@
 from .base import BaseEnsemble, _partition_estimators
 from ..base import ClassifierMixin, RegressorMixin
 from ..utils._joblib import Parallel, delayed
-from ..externals.six import with_metaclass
 from ..metrics import r2_score, accuracy_score
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
 from ..utils import check_random_state, check_X_y, check_array, column_or_1d
diff --git a/sklearn/metrics/cluster/tests/test_common.py b/sklearn/metrics/cluster/tests/test_common.py
index d3fde5c3b5565..810a573b12e63 100644
--- a/sklearn/metrics/cluster/tests/test_common.py
+++ b/sklearn/metrics/cluster/tests/test_common.py
@@ -15,7 +15,7 @@
 from sklearn.metrics.cluster import calinski_harabasz_score
 from sklearn.metrics.cluster import davies_bouldin_score
 
-from sklearn.utils.testing import assert_allclose, ignore_warnings
+from sklearn.utils.testing import assert_allclose
 
 
 # Dictionaries of metrics
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index bafb43efb98ba..d12e41930a001 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -31,7 +31,6 @@
 from ..exceptions import NotFittedError
 from ..utils._joblib import Parallel, delayed
 from ..utils import check_random_state
-from ..utils.fixes import sp_version
 from ..utils.fixes import MaskedArray
 from ..utils.fixes import _Mapping as Mapping, _Sequence as Sequence
 from ..utils.fixes import _Iterable as Iterable
diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 2ba1b019fc12e..b57e03230f4f1 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -17,7 +17,6 @@
 from ..utils.validation import check_array
 from ..utils.validation import check_is_fitted
 from ..utils.validation import FLOAT_DTYPES
-from ..utils.fixes import np_version
 
 
 class KBinsDiscretizer(BaseEstimator, TransformerMixin):
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index e7b14e2b1f9f2..3cf4f5c016f79 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -6,7 +6,6 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import (assert_array_equal, assert_no_warnings,
                                    assert_warns_message)
-from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index da9ccb4314801..47de7ae374b74 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -104,7 +104,7 @@ def score(self, X, y, *args, **kwargs):
             self._check_fit()
             return 1.0
 
-    methods = [k for k in iterkeys(SubEstimator.__dict__)
+    methods = [k for k in SubEstimator.__dict__.keys()
                if not k.startswith('_') and not k.startswith('fit')]
     methods.sort()
 
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index bb2c2455d6201..fef2c7aff7971 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -18,7 +18,6 @@
 from scipy import linalg, sparse
 
 from . import check_random_state
-from .fixes import np_version
 from ._logistic_sigmoid import _log_logistic_sigmoid
 from .sparsefuncs_fast import csr_row_norms
 from .validation import check_array
diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py
index f6049debeb20a..c8d8484d71bc4 100644
--- a/sklearn/utils/tests/test_deprecation.py
+++ b/sklearn/utils/tests/test_deprecation.py
@@ -2,13 +2,11 @@
 # License: BSD 3 clause
 
 
-import sys
 import pickle
 
 from sklearn.utils.deprecation import _is_deprecated
 from sklearn.utils.deprecation import deprecated
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import SkipTest
 
 
 @deprecated('qwerty')
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index 81b0044c804f0..69cb83804dced 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -20,8 +20,6 @@
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import skip_if_32bit
-from sklearn.utils.testing import SkipTest
-from sklearn.utils.fixes import np_version
 
 from sklearn.utils.extmath import density
 from sklearn.utils.extmath import randomized_svd
diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py
index 0dd97c03cb032..b253fc1f54cec 100644
--- a/sklearn/utils/tests/test_fixes.py
+++ b/sklearn/utils/tests/test_fixes.py
@@ -8,7 +8,6 @@
 import numpy as np
 import pytest
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_allclose
 
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index be5989b14e152..443988ddc3ecb 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -224,7 +224,7 @@ def test_unique_labels_mixed_types():
 
 
 def test_is_multilabel():
-    for group, group_examples in iteritems(EXAMPLES):
+    for group, group_examples in EXAMPLES.items():
         if group in ['multilabel-indicator']:
             dense_exp = True
         else:
@@ -277,7 +277,7 @@ def test_check_classification_targets():
 
 # @ignore_warnings
 def test_type_of_target():
-    for group, group_examples in iteritems(EXAMPLES):
+    for group, group_examples in EXAMPLES.items():
         for example in group_examples:
             assert_equal(type_of_target(example), group,
                          msg=('type_of_target(%r) should be %r, got %r'
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index ec8b10ce2b54e..99019e25c0c81 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -10,7 +10,6 @@
 from pytest import importorskip
 import numpy as np
 import scipy.sparse as sp
-from scipy import __version__ as scipy_version
 
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises

From 03bf639e497619a4838498d3057a7556147be222 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 14 Dec 2018 17:13:03 -0500
Subject: [PATCH 18/29] remove six import from openml test

---
 sklearn/datasets/tests/test_openml.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 4a858899e2c31..4cda24c7398b2 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -17,7 +17,6 @@
                                      _retry_with_clean_cache)
 from sklearn.utils.testing import (assert_warns_message,
                                    assert_raise_message)
-from sklearn.externals.six import str
 from urllib.error import HTTPError
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial

From 14c321995a537421e3f2a13ce850ceb9a8cf0a6b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 14 Dec 2018 17:14:44 -0500
Subject: [PATCH 19/29] remove six from bicluster example

---
 examples/bicluster/plot_bicluster_newsgroups.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py
index 12d42e23a0f19..430c37a651197 100644
--- a/examples/bicluster/plot_bicluster_newsgroups.py
+++ b/examples/bicluster/plot_bicluster_newsgroups.py
@@ -32,7 +32,6 @@
 
 from sklearn.cluster.bicluster import SpectralCoclustering
 from sklearn.cluster import MiniBatchKMeans
-from sklearn.externals.six import iteritems
 from sklearn.datasets.twenty_newsgroups import fetch_20newsgroups
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.cluster import v_measure_score
@@ -116,7 +115,7 @@ def most_common(d):
 
     Like Counter.most_common in Python >=2.7.
     """
-    return sorted(iteritems(d), key=operator.itemgetter(1), reverse=True)
+    return sorted(d.items(), key=operator.itemgetter(1), reverse=True)
 
 
 bicluster_ncuts = list(bicluster_ncut(i)

From c450e95c27b42f45a248df2e4c0de5d03ff831f4 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 14 Dec 2018 17:18:56 -0500
Subject: [PATCH 20/29] revert externals

---
 sklearn/externals/_arff.py                     | 4 ++--
 sklearn/externals/joblib/_parallel_backends.py | 2 +-
 sklearn/externals/joblib/_store_backends.py    | 2 +-
 sklearn/externals/six.py                       | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py
index 2552e77b894a6..82f504542f9a9 100644
--- a/sklearn/externals/_arff.py
+++ b/sklearn/externals/_arff.py
@@ -431,7 +431,7 @@ def decode_data(self, s, conversors):
                 raise BadDataFormat(s)
             # XXX: int 0 is used for implicit values, not '0'
             values = [values[i] if i in values else 0 for i in
-                      range(len(conversors))]
+                      xrange(len(conversors))]
         else:
             if len(values) != len(conversors):
                 raise BadDataFormat(s)
@@ -524,7 +524,7 @@ def encode_data(self, data, attributes):
         data = data.data
 
         # Check if the rows are sorted
-        if not all(row[i] <= row[i + 1] for i in range(len(row) - 1)):
+        if not all(row[i] <= row[i + 1] for i in xrange(len(row) - 1)):
             raise ValueError("liac-arff can only output COO matrices with "
                              "sorted rows.")
 
diff --git a/sklearn/externals/joblib/_parallel_backends.py b/sklearn/externals/joblib/_parallel_backends.py
index dd62d508694a9..0f0bcf0ab4213 100644
--- a/sklearn/externals/joblib/_parallel_backends.py
+++ b/sklearn/externals/joblib/_parallel_backends.py
@@ -27,7 +27,7 @@
     from .externals.loky import process_executor, cpu_count
 
 
-class ParallelBackendBase(metaclass=ABCMeta):
+class ParallelBackendBase(with_metaclass(ABCMeta)):
     """Helper abc which defines all methods a ParallelBackend must implement"""
 
     supports_timeout = False
diff --git a/sklearn/externals/joblib/_store_backends.py b/sklearn/externals/joblib/_store_backends.py
index 3e2c02d1d64fb..9196f0a7746a1 100644
--- a/sklearn/externals/joblib/_store_backends.py
+++ b/sklearn/externals/joblib/_store_backends.py
@@ -31,7 +31,7 @@ def concurrency_safe_write(object_to_write, filename, write_func):
     return temporary_filename
 
 
-class StoreBackendBase(metaclass=ABCMeta):
+class StoreBackendBase(with_metaclass(ABCMeta)):
     """Helper Abstract Base Class which defines all methods that
        a StorageBackend must implement."""
 
diff --git a/sklearn/externals/six.py b/sklearn/externals/six.py
index 5b7dc61f98e7e..85898ec71275f 100644
--- a/sklearn/externals/six.py
+++ b/sklearn/externals/six.py
@@ -33,7 +33,7 @@
 PY3 = sys.version_info[0] == 3
 
 if PY3:
-    str = str,
+    string_types = str,
     integer_types = int,
     class_types = type,
     text_type = str
@@ -41,7 +41,7 @@
 
     MAXSIZE = sys.maxsize
 else:
-    str = basestring,
+    string_types = basestring,
     integer_types = (int, long)
     class_types = (type, types.ClassType)
     text_type = unicode

From 559db2b55139b91002a31da642bb3febfba86ed5 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 14 Dec 2018 17:24:50 -0500
Subject: [PATCH 21/29] fix some encoding stuff

---
 sklearn/datasets/svmlight_format.py           | 15 +++++-----
 .../datasets/tests/test_svmlight_format.py    | 28 +++++++++----------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index 60e3a3961655e..df3c6dba98e29 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -341,13 +341,14 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):
     line_pattern += " %s\n"
 
     if comment:
-        f.write("# Generated by dump_svmlight_file from scikit-learn %s\n"
-                % __version__)
-        f.write("# Column indices are %s-based\n"
-                  % ["zero", "one"][one_based])
+        f.write(("# Generated by dump_svmlight_file from scikit-learn %s\n"
+                % __version__).encode())
+        f.write(("# Column indices are %s-based\n"
+                  % ["zero", "one"][one_based]).encode())
 
-        f.write("#\n")
-        f.writelines("# %s\n" % line for line in comment.splitlines())
+        f.write("#\n".encode())
+        f.writelines((("# %s\n" % line).encode()
+                      for line in comment.splitlines()))
 
     for i in range(X.shape[0]):
         if X_is_sp:
@@ -437,7 +438,7 @@ def dump_svmlight_file(X, y, f,  zero_based=True, comment=None, query_id=None,
             comment.decode("ascii")     # just for the exception
         else:
             comment = comment.encode("utf-8")
-        if "\0" in comment:
+        if "\0".encode() in comment:
             raise ValueError("comment string contains NUL byte")
 
     yval = check_array(y, accept_sparse='csr', ensure_2d=False)
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
index 05a958bd0c88b..67fd4f1321058 100644
--- a/sklearn/datasets/tests/test_svmlight_format.py
+++ b/sklearn/datasets/tests/test_svmlight_format.py
@@ -150,13 +150,13 @@ def test_load_invalid_order_file():
 
 
 def test_load_zero_based():
-    f = BytesIO("-1 4:1.\n1 0:1\n")
+    f = BytesIO("-1 4:1.\n1 0:1\n".encode())
     assert_raises(ValueError, load_svmlight_file, f, zero_based=False)
 
 
 def test_load_zero_based_auto():
-    data1 = "-1 1:1 2:2 3:3\n"
-    data2 = "-1 0:0 1:1\n"
+    data1 = "-1 1:1 2:2 3:3\n".encode()
+    data2 = "-1 0:0 1:1\n".encode()
 
     f1 = BytesIO(data1)
     X, y = load_svmlight_file(f1, zero_based="auto")
@@ -174,7 +174,7 @@ def test_load_with_qid():
     data = """
     3 qid:1 1:0.53 2:0.12
     2 qid:1 1:0.13 2:0.1
-    7 qid:2 1:0.87 2:0.12"""
+    7 qid:2 1:0.87 2:0.12""".encode()
     X, y = load_svmlight_file(BytesIO(data), query_id=False)
     assert_array_equal(y, [3, 2, 7])
     assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]])
@@ -271,9 +271,9 @@ def test_dump_multilabel():
         dump_svmlight_file(X, y, f, multilabel=True)
         f.seek(0)
         # make sure it dumps multilabel correctly
-        assert_equal(f.readline(), "1 0:1 2:3 4:5\n")
-        assert_equal(f.readline(), "0,2 \n")
-        assert_equal(f.readline(), "0,1 1:5 3:1\n")
+        assert_equal(f.readline(), "1 0:1 2:3 4:5\n".encode())
+        assert_equal(f.readline(), "0,2 \n".encode())
+        assert_equal(f.readline(), "0,1 1:5 3:1\n".encode())
 
 
 def test_dump_concise():
@@ -294,11 +294,11 @@ def test_dump_concise():
     f.seek(0)
     # make sure it's using the most concise format possible
     assert_equal(f.readline(),
-                 "1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n")
-    assert_equal(f.readline(), "2.1 0:1000000000 1:2e+18 2:3e+27\n")
-    assert_equal(f.readline(), "3.01 \n")
-    assert_equal(f.readline(), "1.000000000000001 \n")
-    assert_equal(f.readline(), "1 \n")
+                 "1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n".encode())
+    assert_equal(f.readline(), "2.1 0:1000000000 1:2e+18 2:3e+27\n".encode())
+    assert_equal(f.readline(), "3.01 \n".encode())
+    assert_equal(f.readline(), "1.000000000000001 \n".encode())
+    assert_equal(f.readline(), "1 \n".encode())
     f.seek(0)
     # make sure it's correct too :)
     X2, y2 = load_svmlight_file(f)
@@ -320,7 +320,7 @@ def test_dump_comment():
     assert_array_almost_equal(y, y2)
 
     # XXX we have to update this to support Python 3.x
-    utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc"
+    utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc".encode()
     f = BytesIO()
     assert_raises(UnicodeDecodeError,
                   dump_svmlight_file, X, y, f, comment=utf8_comment)
@@ -371,7 +371,7 @@ def test_load_with_long_qid():
     1 qid:0 0:1 1:2 2:3
     0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
     0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
-    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985"""
+    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985""".encode()
     X, y, qid = load_svmlight_file(BytesIO(data), query_id=True)
 
     true_X = [[1,          2,                 3],

From fb6a96a9100c39e0ca9e5372536180f090fcb4a6 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 14 Dec 2018 17:33:05 -0500
Subject: [PATCH 22/29] fix more bytes issues/  typos

---
 sklearn/datasets/lfw.py             | 2 +-
 sklearn/datasets/tests/test_base.py | 4 ++--
 sklearn/datasets/tests/test_lfw.py  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/datasets/lfw.py b/sklearn/datasets/lfw.py
index 756a8045773bb..52f69bdb23498 100644
--- a/sklearn/datasets/lfw.py
+++ b/sklearn/datasets/lfw.py
@@ -368,7 +368,7 @@ def _fetch_lfw_pairs(index_file_path, data_folder_path, slice_=None,
     # parse the index file to find the number of pairs to be able to allocate
     # the right amount of memory before starting to decode the jpeg files
     with open(index_file_path, 'rb') as index_file:
-        split_lines = [ln.strip().split('\t') for ln in index_file]
+        split_lines = [ln.decode().strip().split('\t') for ln in index_file]
     pair_specs = [sl for sl in split_lines if len(sl) > 2]
     n_pairs = len(pair_specs)
 
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index 2ae238b1ef5cf..78c0c11663782 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -54,7 +54,7 @@ def test_category_dir_1(load_files_root):
     test_category_dir1 = tempfile.mkdtemp(dir=load_files_root)
     sample_file = tempfile.NamedTemporaryFile(dir=test_category_dir1,
                                               delete=False)
-    sample_file.write("Hello World!\n")
+    sample_file.write("Hello World!\n".encode())
     sample_file.close()
     yield str(test_category_dir1)
     _remove_dir(test_category_dir1)
@@ -95,7 +95,7 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2,
     assert_equal(len(res.filenames), 1)
     assert_equal(len(res.target_names), 2)
     assert_equal(res.DESCR, None)
-    assert_equal(res.data, ["Hello World!\n"])
+    assert_equal(res.data, ["Hello World!\n".encode()])
 
 
 def test_load_files_w_categories_desc_and_encoding(
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 56323c4aba266..b5c6f8d7409ea 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -74,13 +74,13 @@ def setup_module():
 
     # generate some pairing metadata files using the same format as LFW
     with open(os.path.join(LFW_HOME, 'pairsDevTrain.txt'), 'wb') as f:
-        f.write("10\n").encode()
+        f.write("10\n".encode())
         more_than_two = [name for name, count in counts.items()
                          if count >= 2]
         for i in range(5):
             name = random_state.choice(more_than_two)
             first, second = random_state.sample(range(counts[name]), 2)
-            f.write('%s\t%d\t%d\n' % (name, first, second).encode())
+            f.write(('%s\t%d\t%d\n' % (name, first, second)).encode())
 
         for i in range(5):
             first_name, second_name = random_state.sample(FAKE_NAMES, 2)

From e414e6994880a144cafbf3a3e376b08ef6d3b069 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 28 Dec 2018 13:41:33 -0500
Subject: [PATCH 23/29] undo encode() changes

---
 sklearn/datasets/_svmlight_format.pyx         |  2 +-
 sklearn/datasets/svmlight_format.py           | 15 ++++-----
 sklearn/datasets/tests/test_lfw.py            |  5 ++-
 .../datasets/tests/test_svmlight_format.py    | 32 +++++++++----------
 sklearn/utils/tests/test_murmurhash.py        |  8 ++---
 5 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx
index 7a1ad9dc93cf7..ea45920ad70f2 100644
--- a/sklearn/datasets/_svmlight_format.pyx
+++ b/sklearn/datasets/_svmlight_format.pyx
@@ -31,7 +31,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based,
     cdef char *line_cstr
     cdef int idx, prev_idx
     cdef Py_ssize_t i
-    cdef bytes qid_prefix = 'qid'.encode()
+    cdef bytes qid_prefix = b('qid')
     cdef Py_ssize_t n_features
     cdef long long offset_max = offset + length if length > 0 else -1
 
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index df3c6dba98e29..2c2028b53ae4b 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -341,14 +341,13 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):
     line_pattern += " %s\n"
 
     if comment:
-        f.write(("# Generated by dump_svmlight_file from scikit-learn %s\n"
-                % __version__).encode())
-        f.write(("# Column indices are %s-based\n"
-                  % ["zero", "one"][one_based]).encode())
+        f.write(b"# Generated by dump_svmlight_file from scikit-learn %s\n"
+                % __version__)
+        f.write(b"# Column indices are %s-based\n"
+                 % ["zero", "one"][one_based])
 
-        f.write("#\n".encode())
-        f.writelines((("# %s\n" % line).encode()
-                      for line in comment.splitlines()))
+        f.write(b"#\n")
+        f.writelines(b"# %s\n" % line for line in comment.splitlines())
 
     for i in range(X.shape[0]):
         if X_is_sp:
@@ -438,7 +437,7 @@ def dump_svmlight_file(X, y, f,  zero_based=True, comment=None, query_id=None,
             comment.decode("ascii")     # just for the exception
         else:
             comment = comment.encode("utf-8")
-        if "\0".encode() in comment:
+        if b"\0" in comment:
             raise ValueError("comment string contains NUL byte")
 
     yval = check_array(y, accept_sparse='csr', ensure_2d=False)
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index b5c6f8d7409ea..68c7f0a6d6b37 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -86,9 +86,8 @@ def setup_module():
             first_name, second_name = random_state.sample(FAKE_NAMES, 2)
             first_index = random_state.choice(np.arange(counts[first_name]))
             second_index = random_state.choice(np.arange(counts[second_name]))
-            f.write(('%s\t%d\t%s\t%d\n' % (first_name, first_index,
-                                           second_name, second_index)).encode()
-                                           )
+            f.write(b'%s\t%d\t%s\t%d\n' % (first_name, first_index,
+                                           second_name, second_index))
 
     with open(os.path.join(LFW_HOME, 'pairsDevTest.txt'), 'wb') as f:
         f.write("Fake place holder that won't be tested".encode())
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
index 67fd4f1321058..eac9bc01fac73 100644
--- a/sklearn/datasets/tests/test_svmlight_format.py
+++ b/sklearn/datasets/tests/test_svmlight_format.py
@@ -150,13 +150,13 @@ def test_load_invalid_order_file():
 
 
 def test_load_zero_based():
-    f = BytesIO("-1 4:1.\n1 0:1\n".encode())
+    f = BytesIO(b"-1 4:1.\n1 0:1\n")
     assert_raises(ValueError, load_svmlight_file, f, zero_based=False)
 
 
 def test_load_zero_based_auto():
-    data1 = "-1 1:1 2:2 3:3\n".encode()
-    data2 = "-1 0:0 1:1\n".encode()
+    data1 = b"-1 1:1 2:2 3:3\n"
+    data2 = b"-1 0:0 1:1\n"
 
     f1 = BytesIO(data1)
     X, y = load_svmlight_file(f1, zero_based="auto")
@@ -171,10 +171,10 @@ def test_load_zero_based_auto():
 
 def test_load_with_qid():
     # load svmfile with qid attribute
-    data = """
+    data = b"""
     3 qid:1 1:0.53 2:0.12
     2 qid:1 1:0.13 2:0.1
-    7 qid:2 1:0.87 2:0.12""".encode()
+    7 qid:2 1:0.87 2:0.12"""
     X, y = load_svmlight_file(BytesIO(data), query_id=False)
     assert_array_equal(y, [3, 2, 7])
     assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]])
@@ -271,9 +271,9 @@ def test_dump_multilabel():
         dump_svmlight_file(X, y, f, multilabel=True)
         f.seek(0)
         # make sure it dumps multilabel correctly
-        assert_equal(f.readline(), "1 0:1 2:3 4:5\n".encode())
-        assert_equal(f.readline(), "0,2 \n".encode())
-        assert_equal(f.readline(), "0,1 1:5 3:1\n".encode())
+        assert_equal(f.readline(), b"1 0:1 2:3 4:5\n")
+        assert_equal(f.readline(), b"0,2 \n")
+        assert_equal(f.readline(), b"0,1 1:5 3:1\n")
 
 
 def test_dump_concise():
@@ -294,11 +294,11 @@ def test_dump_concise():
     f.seek(0)
     # make sure it's using the most concise format possible
     assert_equal(f.readline(),
-                 "1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n".encode())
-    assert_equal(f.readline(), "2.1 0:1000000000 1:2e+18 2:3e+27\n".encode())
-    assert_equal(f.readline(), "3.01 \n".encode())
-    assert_equal(f.readline(), "1.000000000000001 \n".encode())
-    assert_equal(f.readline(), "1 \n".encode())
+                 b"1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n")
+    assert_equal(f.readline(), b"2.1 0:1000000000 1:2e+18 2:3e+27\n")
+    assert_equal(f.readline(), b"3.01 \n")
+    assert_equal(f.readline(), b"1.000000000000001 \n")
+    assert_equal(f.readline(), b"1 \n")
     f.seek(0)
     # make sure it's correct too :)
     X2, y2 = load_svmlight_file(f)
@@ -320,7 +320,7 @@ def test_dump_comment():
     assert_array_almost_equal(y, y2)
 
     # XXX we have to update this to support Python 3.x
-    utf8_comment = "It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc".encode()
+    utf8_comment = b"It is true that\n\xc2\xbd\xc2\xb2 = \xc2\xbc"
     f = BytesIO()
     assert_raises(UnicodeDecodeError,
                   dump_svmlight_file, X, y, f, comment=utf8_comment)
@@ -367,11 +367,11 @@ def test_dump_query_id():
 
 def test_load_with_long_qid():
     # load svmfile with longint qid attribute
-    data = """
+    data = b"""
     1 qid:0 0:1 1:2 2:3
     0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
     0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
-    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985""".encode()
+    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985"""
     X, y, qid = load_svmlight_file(BytesIO(data), query_id=True)
 
     true_X = [[1,          2,                 3],
diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py
index a3d6df221bf1a..6066012fa0162 100644
--- a/sklearn/utils/tests/test_murmurhash.py
+++ b/sklearn/utils/tests/test_murmurhash.py
@@ -43,11 +43,11 @@ def test_mmhash3_int_array():
 
 
 def test_mmhash3_bytes():
-    assert_equal(murmurhash3_32('foo'.encode(), 0), -156908512)
-    assert_equal(murmurhash3_32('foo'.encode(), 42), -1322301282)
+    assert_equal(murmurhash3_32(b'foo', 0), -156908512)
+    assert_equal(murmurhash3_32(b'foo', 42), -1322301282)
 
-    assert_equal(murmurhash3_32('foo'.encode(), 0, positive=True), 4138058784)
-    assert_equal(murmurhash3_32('foo'.encode(), 42, positive=True), 2972666014)
+    assert_equal(murmurhash3_32(b'foo', 0, positive=True), 4138058784)
+    assert_equal(murmurhash3_32(b'foo', 42, positive=True), 2972666014)
 
 
 def test_mmhash3_unicode():

From 8c849beedfadb401cad81924ddeb6a3b300d4f50 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 28 Dec 2018 13:43:21 -0500
Subject: [PATCH 24/29] fix cython b

---
 sklearn/datasets/_svmlight_format.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/datasets/_svmlight_format.pyx b/sklearn/datasets/_svmlight_format.pyx
index ea45920ad70f2..99f443ccae53f 100644
--- a/sklearn/datasets/_svmlight_format.pyx
+++ b/sklearn/datasets/_svmlight_format.pyx
@@ -31,7 +31,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based,
     cdef char *line_cstr
     cdef int idx, prev_idx
     cdef Py_ssize_t i
-    cdef bytes qid_prefix = b('qid')
+    cdef bytes qid_prefix = b'qid'
     cdef Py_ssize_t n_features
     cdef long long offset_max = offset + length if length > 0 else -1
 

From eb8a3ce87eca8a21e0718f2a61fa8343903d0150 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 28 Dec 2018 13:45:38 -0500
Subject: [PATCH 25/29] fix remark by roman

---
 sklearn/datasets/openml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 13a6fcf83373e..fa195ce030298 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -555,7 +555,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         target_column = []
     elif not isinstance(target_column, list):
         raise TypeError("Did not recognize type of target_column"
-                        "Should be string_type, list or None. Got: "
+                        "Should be str, list or None. Got: "
                         "{}".format(type(target_column)))
     data_columns = _valid_data_column_names(features_list,
                                             target_column)

From ac02f515af7d200a8e276ef141110b769bc9b83c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 28 Dec 2018 13:49:03 -0500
Subject: [PATCH 26/29] Apply suggestions from code review

s/encode/u

Co-Authored-By: amueller <t3kcit@gmail.com>
---
 sklearn/datasets/tests/test_base.py | 4 ++--
 sklearn/datasets/tests/test_lfw.py  | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index 78c0c11663782..08a6ba29413cf 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -54,7 +54,7 @@ def test_category_dir_1(load_files_root):
     test_category_dir1 = tempfile.mkdtemp(dir=load_files_root)
     sample_file = tempfile.NamedTemporaryFile(dir=test_category_dir1,
                                               delete=False)
-    sample_file.write("Hello World!\n".encode())
+    sample_file.write(b"Hello World!\n")
     sample_file.close()
     yield str(test_category_dir1)
     _remove_dir(test_category_dir1)
@@ -95,7 +95,7 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2,
     assert_equal(len(res.filenames), 1)
     assert_equal(len(res.target_names), 2)
     assert_equal(res.DESCR, None)
-    assert_equal(res.data, ["Hello World!\n".encode()])
+    assert_equal(res.data, [b"Hello World!\n"])
 
 
 def test_load_files_w_categories_desc_and_encoding(
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 68c7f0a6d6b37..2a0559598a7d6 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -70,11 +70,11 @@ def setup_module():
 
     # add some random file pollution to test robustness
     with open(os.path.join(LFW_HOME, 'lfw_funneled', '.test.swp'), 'wb') as f:
-        f.write('Text file to be ignored by the dataset loader.'.encode())
+        f.write(b'Text file to be ignored by the dataset loader.')
 
     # generate some pairing metadata files using the same format as LFW
     with open(os.path.join(LFW_HOME, 'pairsDevTrain.txt'), 'wb') as f:
-        f.write("10\n".encode())
+        f.write(b"10\n")
         more_than_two = [name for name, count in counts.items()
                          if count >= 2]
         for i in range(5):
@@ -90,10 +90,10 @@ def setup_module():
                                            second_name, second_index))
 
     with open(os.path.join(LFW_HOME, 'pairsDevTest.txt'), 'wb') as f:
-        f.write("Fake place holder that won't be tested".encode())
+        f.write(b"Fake place holder that won't be tested")
 
     with open(os.path.join(LFW_HOME, 'pairs.txt'), 'wb') as f:
-        f.write("Fake place holder that won't be tested".encode())
+        f.write(b"Fake place holder that won't be tested")
 
 
 def teardown_module():

From 3cce1c6a3de1034d31022fcdae2103e053f84f7e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 28 Dec 2018 13:55:16 -0500
Subject: [PATCH 27/29] pep8

---
 .../bench_sample_without_replacement.py       | 34 +++++++++----------
 sklearn/datasets/svmlight_format.py           |  2 +-
 sklearn/multioutput.py                        |  3 +-
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
index 4fb23efb7f24f..c993f719ac245 100644
--- a/benchmarks/bench_sample_without_replacement.py
+++ b/benchmarks/bench_sample_without_replacement.py
@@ -89,49 +89,47 @@ def bench_sample(sampling, n_population, n_samples):
     # Set Python core input
     sampling_algorithm["python-core-sample"] = \
         lambda n_population, n_sample: \
-            random.sample(range(n_population), n_sample)
+        random.sample(range(n_population), n_sample)
 
     ###########################################################################
     # Set custom automatic method selection
     sampling_algorithm["custom-auto"] = \
         lambda n_population, n_samples, random_state=None: \
-            sample_without_replacement(n_population,
-                                       n_samples,
-                                       method="auto",
-                                       random_state=random_state)
+        sample_without_replacement(n_population, n_samples, method="auto",
+                                   random_state=random_state)
 
     ###########################################################################
     # Set custom tracking based method
     sampling_algorithm["custom-tracking-selection"] = \
         lambda n_population, n_samples, random_state=None: \
-            sample_without_replacement(n_population,
-                                       n_samples,
-                                       method="tracking_selection",
-                                       random_state=random_state)
+        sample_without_replacement(n_population,
+                                   n_samples,
+                                   method="tracking_selection",
+                                   random_state=random_state)
 
     ###########################################################################
     # Set custom reservoir based method
     sampling_algorithm["custom-reservoir-sampling"] = \
         lambda n_population, n_samples, random_state=None: \
-            sample_without_replacement(n_population,
-                                       n_samples,
-                                       method="reservoir_sampling",
-                                       random_state=random_state)
+        sample_without_replacement(n_population,
+                                   n_samples,
+                                   method="reservoir_sampling",
+                                   random_state=random_state)
 
     ###########################################################################
     # Set custom reservoir based method
     sampling_algorithm["custom-pool"] = \
         lambda n_population, n_samples, random_state=None: \
-            sample_without_replacement(n_population,
-                                       n_samples,
-                                       method="pool",
-                                       random_state=random_state)
+        sample_without_replacement(n_population,
+                                   n_samples,
+                                   method="pool",
+                                   random_state=random_state)
 
     ###########################################################################
     # Numpy permutation based
     sampling_algorithm["numpy-permutation"] = \
         lambda n_population, n_sample: \
-            np.random.permutation(n_population)[:n_sample]
+        np.random.permutation(n_population)[:n_sample]
 
     ###########################################################################
     # Remove unspecified algorithm
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index 2c2028b53ae4b..d5ad1cabb7180 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -344,7 +344,7 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):
         f.write(b"# Generated by dump_svmlight_file from scikit-learn %s\n"
                 % __version__)
         f.write(b"# Column indices are %s-based\n"
-                 % ["zero", "one"][one_based])
+                % ["zero", "one"][one_based])
 
         f.write(b"#\n")
         f.writelines(b"# %s\n" % line for line in comment.splitlines())
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index e5fc27f79b76b..a3ec122140d68 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -59,7 +59,8 @@ def _partial_fit_estimator(estimator, X, y, classes=None, sample_weight=None,
     return estimator
 
 
-class MultiOutputEstimator(BaseEstimator, MetaEstimatorMixin, metaclass=ABCMeta):
+class MultiOutputEstimator(BaseEstimator, MetaEstimatorMixin,
+                           metaclass=ABCMeta):
     @abstractmethod
     def __init__(self, estimator, n_jobs=None):
         self.estimator = estimator

From b849e4e3a7a528cf5fec30f449d187222ad2720f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 28 Dec 2018 15:35:41 -0500
Subject: [PATCH 28/29] string formatting fun

---
 sklearn/datasets/tests/test_lfw.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 2a0559598a7d6..1afd09084371c 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -86,8 +86,9 @@ def setup_module():
             first_name, second_name = random_state.sample(FAKE_NAMES, 2)
             first_index = random_state.choice(np.arange(counts[first_name]))
             second_index = random_state.choice(np.arange(counts[second_name]))
-            f.write(b'%s\t%d\t%s\t%d\n' % (first_name, first_index,
-                                           second_name, second_index))
+            f.write(('%s\t%d\t%s\t%d\n' % (first_name, first_index,
+                                           second_name, second_index)
+                     ).encode())
 
     with open(os.path.join(LFW_HOME, 'pairsDevTest.txt'), 'wb') as f:
         f.write(b"Fake place holder that won't be tested")

From 3df76787fb595b0dfbd5b87d33383bf14e1e82e9 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 28 Dec 2018 15:37:54 -0500
Subject: [PATCH 29/29] more string formatting fun

---
 sklearn/datasets/svmlight_format.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index d5ad1cabb7180..bef7c7b471a3a 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -341,10 +341,10 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):
     line_pattern += " %s\n"
 
     if comment:
-        f.write(b"# Generated by dump_svmlight_file from scikit-learn %s\n"
-                % __version__)
-        f.write(b"# Column indices are %s-based\n"
-                % ["zero", "one"][one_based])
+        f.write(("# Generated by dump_svmlight_file from scikit-learn %s\n"
+                % __version__).encode())
+        f.write(("# Column indices are %s-based\n"
+                % ["zero", "one"][one_based]).encode())
 
         f.write(b"#\n")
         f.writelines(b"# %s\n" % line for line in comment.splitlines())