From f53646b5103a429889ab378d9e978238411ff2e7 Mon Sep 17 00:00:00 2001
From: Anthony Sottile <asottile@umich.edu>
Date: Wed, 9 Jan 2019 21:16:44 -0800
Subject: [PATCH] burn python2.x bridges more

---
 benchmarks/bench_20newsgroups.py              |  18 +--
 benchmarks/bench_mnist.py                     |   4 +-
 benchmarks/bench_multilabel_metrics.py        |   2 +-
 benchmarks/bench_plot_neighbors.py            |  24 +--
 benchmarks/bench_plot_nmf.py                  |   2 +-
 .../bench_sample_without_replacement.py       |   6 +-
 doc/conf.py                                   |   2 +-
 doc/sphinxext/sphinx_issues.py                |   8 +-
 .../machine_learning_map/parse_path.py        |   2 +-
 .../machine_learning_map/pyparsing.py         | 140 +++++++++---------
 .../data/languages/fetch_data.py              |  30 ++--
 .../exercise_01_language_train_model.py       |   8 +-
 .../exercise_01_language_train_model.py       |   8 +-
 .../plot_out_of_core_classification.py        |   2 +-
 examples/applications/svm_gui.py              |   8 +-
 .../wikipedia_principal_eigenvector.py        |   4 +-
 .../bicluster/plot_bicluster_newsgroups.py    |   2 +-
 examples/ensemble/plot_bias_variance.py       |   4 +-
 .../plot_gradient_boosting_quantile.py        |   4 +-
 examples/exercises/plot_cv_diabetes.py        |   2 +-
 .../plot_gpr_noisy_targets.py                 |   8 +-
 examples/mixture/plot_gmm_covariances.py      |   6 +-
 .../plot_nested_cross_validation_iris.py      |   2 +-
 .../model_selection/plot_precision_recall.py  |  14 +-
 .../model_selection/plot_randomized_search.py |   6 +-
 examples/model_selection/plot_roc.py          |   6 +-
 .../neighbors/plot_digits_kde_sampling.py     |   2 +-
 examples/neighbors/plot_kde_1d.py             |   4 +-
 setup.py                                      |  20 +--
 sklearn/__init__.py                           |   2 +-
 sklearn/_build_utils/__init__.py              |   2 +-
 sklearn/base.py                               |  26 ++--
 sklearn/calibration.py                        |   2 +-
 sklearn/cluster/bicluster.py                  |  14 +-
 sklearn/cluster/birch.py                      |   4 +-
 sklearn/cluster/hierarchical.py               |   2 +-
 sklearn/cluster/k_means_.py                   |   2 +-
 sklearn/cluster/optics_.py                    |   2 +-
 sklearn/cluster/tests/test_mean_shift.py      |   4 +-
 sklearn/compose/_column_transformer.py        |   6 +-
 sklearn/compose/tests/test_target.py          |   4 +-
 sklearn/covariance/elliptic_envelope.py       |   4 +-
 sklearn/covariance/graph_lasso_.py            |   4 +-
 sklearn/covariance/shrunk_covariance_.py      |   4 +-
 sklearn/cross_decomposition/cca_.py           |   2 +-
 sklearn/cross_decomposition/pls_.py           |   4 +-
 sklearn/datasets/openml.py                    |   2 +-
 sklearn/datasets/rcv1.py                      |   2 +-
 sklearn/datasets/tests/test_openml.py         |   2 +-
 sklearn/datasets/tests/test_rcv1.py           |   2 +-
 .../datasets/tests/test_samples_generator.py  |   2 +-
 sklearn/decomposition/fastica_.py             |   2 +-
 sklearn/decomposition/pca.py                  |   2 +-
 sklearn/decomposition/sparse_pca.py           |   2 +-
 sklearn/dummy.py                              |   4 +-
 sklearn/ensemble/bagging.py                   |  22 +--
 sklearn/ensemble/base.py                      |   8 +-
 sklearn/ensemble/forest.py                    |  18 +--
 sklearn/ensemble/gradient_boosting.py         |  46 +++---
 sklearn/ensemble/iforest.py                   |   8 +-
 sklearn/ensemble/partial_dependence.py        |   2 +-
 sklearn/ensemble/tests/test_bagging.py        |  12 +-
 sklearn/ensemble/tests/test_forest.py         |  22 +--
 .../ensemble/tests/test_weight_boosting.py    |  10 +-
 sklearn/ensemble/voting_classifier.py         |   5 +-
 sklearn/ensemble/weight_boosting.py           |  14 +-
 sklearn/externals/_arff.py                    |  66 ++++-----
 sklearn/externals/joblib/_dask.py             |   2 +-
 .../externals/joblib/_memmapping_reducer.py   |   2 +-
 .../externals/joblib/_parallel_backends.py    |  12 +-
 sklearn/externals/joblib/_store_backends.py   |   8 +-
 .../externals/cloudpickle/cloudpickle.py      |   4 +-
 .../externals/joblib/externals/loky/_base.py  |  38 ++---
 .../externals/loky/backend/_win_reduction.py  |   2 +-
 .../externals/loky/backend/fork_exec.py       |   4 +-
 .../loky/backend/popen_loky_posix.py          |   4 +-
 .../joblib/externals/loky/backend/process.py  |   8 +-
 .../joblib/externals/loky/backend/queues.py   |   2 +-
 .../externals/loky/backend/reduction.py       |   2 +-
 .../loky/backend/semaphore_tracker.py         |   4 +-
 .../joblib/externals/loky/backend/semlock.py  |   2 +-
 .../externals/loky/backend/synchronize.py     |  10 +-
 .../externals/loky/cloudpickle_wrapper.py     |   2 +-
 .../joblib/externals/loky/process_executor.py |  14 +-
 .../externals/loky/reusable_executor.py       |   6 +-
 sklearn/externals/joblib/hashing.py           |   4 +-
 sklearn/externals/joblib/logger.py            |   4 +-
 sklearn/externals/joblib/memory.py            |  30 ++--
 sklearn/externals/joblib/numpy_pickle.py      |   2 +-
 .../externals/joblib/numpy_pickle_compat.py   |   2 +-
 sklearn/externals/joblib/parallel.py          |   6 +-
 sklearn/externals/joblib/pool.py              |   8 +-
 sklearn/externals/six.py                      |  10 +-
 sklearn/feature_extraction/dict_vectorizer.py |   2 +-
 .../tests/test_feature_hasher.py              |   4 +-
 sklearn/feature_extraction/tests/test_text.py |  42 +++---
 sklearn/feature_extraction/text.py            |  10 +-
 sklearn/feature_selection/tests/test_rfe.py   |   2 +-
 .../feature_selection/univariate_selection.py |  12 +-
 sklearn/gaussian_process/gpc.py               |   2 +-
 sklearn/gaussian_process/kernels.py           |  42 +++---
 .../gaussian_process/tests/test_kernels.py    |   4 +-
 sklearn/impute.py                             |  12 +-
 sklearn/isotonic.py                           |   8 +-
 sklearn/linear_model/base.py                  |   2 +-
 sklearn/linear_model/coordinate_descent.py    |  14 +-
 sklearn/linear_model/least_angle.py           |   2 +-
 sklearn/linear_model/logistic.py              |   6 +-
 sklearn/linear_model/passive_aggressive.py    |   4 +-
 sklearn/linear_model/perceptron.py            |   2 +-
 sklearn/linear_model/ridge.py                 |  10 +-
 sklearn/linear_model/stochastic_gradient.py   |  12 +-
 sklearn/linear_model/tests/test_logistic.py   |   2 +-
 sklearn/linear_model/tests/test_perceptron.py |   2 +-
 sklearn/linear_model/tests/test_sgd.py        |  10 +-
 sklearn/linear_model/theil_sen.py             |  18 +--
 sklearn/metrics/base.py                       |   4 +-
 sklearn/metrics/classification.py             |  50 +++----
 sklearn/metrics/pairwise.py                   |   4 +-
 sklearn/metrics/ranking.py                    |   8 +-
 sklearn/metrics/regression.py                 |   2 +-
 sklearn/metrics/scorer.py                     |   8 +-
 sklearn/metrics/tests/test_classification.py  |   8 +-
 sklearn/metrics/tests/test_common.py          |  14 +-
 sklearn/metrics/tests/test_score_objects.py   |  22 +--
 sklearn/mixture/bayesian_mixture.py           |   2 +-
 sklearn/mixture/gaussian_mixture.py           |   2 +-
 .../mixture/tests/test_gaussian_mixture.py    |   2 +-
 sklearn/model_selection/_search.py            |  16 +-
 sklearn/model_selection/_split.py             |  37 +++--
 sklearn/model_selection/_validation.py        |   8 +-
 sklearn/model_selection/tests/test_search.py  |  14 +-
 sklearn/model_selection/tests/test_split.py   |   6 +-
 .../model_selection/tests/test_validation.py  |   9 +-
 sklearn/multiclass.py                         |   8 +-
 sklearn/multioutput.py                        |  14 +-
 sklearn/neighbors/base.py                     |  10 +-
 sklearn/neighbors/classification.py           |   4 +-
 sklearn/neighbors/kde.py                      |  14 +-
 sklearn/neighbors/lof.py                      |   4 +-
 sklearn/neighbors/regression.py               |   4 +-
 sklearn/neighbors/unsupervised.py             |   2 +-
 .../neural_network/_stochastic_optimizers.py  |   6 +-
 .../neural_network/multilayer_perceptron.py   |   6 +-
 sklearn/pipeline.py                           |   4 +-
 sklearn/preprocessing/_encoders.py            |   8 +-
 sklearn/preprocessing/imputation.py           |   6 +-
 sklearn/preprocessing/label.py                |  24 +--
 sklearn/preprocessing/tests/test_data.py      |   4 +-
 sklearn/preprocessing/tests/test_encoders.py  |   8 +-
 sklearn/random_projection.py                  |   4 +-
 sklearn/semi_supervised/label_propagation.py  |   6 +-
 sklearn/svm/base.py                           |   4 +-
 sklearn/svm/classes.py                        |  14 +-
 sklearn/tests/test_base.py                    |   6 +-
 sklearn/tests/test_common.py                  |   8 +-
 sklearn/tests/test_docstring_parameters.py    |   6 +-
 sklearn/tests/test_impute.py                  |   4 +-
 sklearn/tests/test_metaestimators.py          |   2 +-
 sklearn/tests/test_multiclass.py              |   6 +-
 sklearn/tests/test_pipeline.py                |   6 +-
 sklearn/tree/_reingold_tilford.py             |   4 +-
 sklearn/tree/export.py                        |   8 +-
 sklearn/tree/tests/test_tree.py               | 108 +++++++-------
 sklearn/tree/tree.py                          |  12 +-
 sklearn/utils/__init__.py                     |   2 +-
 sklearn/utils/deprecation.py                  |   2 +-
 sklearn/utils/estimator_checks.py             |  10 +-
 sklearn/utils/linear_assignment_.py           |   2 +-
 sklearn/utils/metaestimators.py               |  12 +-
 sklearn/utils/mocking.py                      |   4 +-
 sklearn/utils/multiclass.py                   |   8 +-
 sklearn/utils/random.py                       |   2 +-
 sklearn/utils/sparsefuncs.py                  |   4 +-
 sklearn/utils/testing.py                      |   6 +-
 sklearn/utils/tests/test_estimator_checks.py  |  12 +-
 sklearn/utils/tests/test_metaestimators.py    |  10 +-
 sklearn/utils/tests/test_multiclass.py        |  12 +-
 sklearn/utils/tests/test_testing.py           |   8 +-
 sklearn/utils/tests/test_utils.py             |   2 +-
 sklearn/utils/tests/test_validation.py        |   4 +-
 sklearn/utils/validation.py                   |   4 +-
 182 files changed, 872 insertions(+), 877 deletions(-)

diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py
index 5bd56f32d6f67..2811e20364867 100644
--- a/benchmarks/bench_20newsgroups.py
+++ b/benchmarks/bench_20newsgroups.py
@@ -49,16 +49,16 @@
 
     print("20 newsgroups")
     print("=============")
-    print("X_train.shape = {0}".format(X_train.shape))
-    print("X_train.format = {0}".format(X_train.format))
-    print("X_train.dtype = {0}".format(X_train.dtype))
-    print("X_train density = {0}"
+    print("X_train.shape = {}".format(X_train.shape))
+    print("X_train.format = {}".format(X_train.format))
+    print("X_train.dtype = {}".format(X_train.dtype))
+    print("X_train density = {}"
           "".format(X_train.nnz / np.product(X_train.shape)))
-    print("y_train {0}".format(y_train.shape))
-    print("X_test {0}".format(X_test.shape))
-    print("X_test.format = {0}".format(X_test.format))
-    print("X_test.dtype = {0}".format(X_test.dtype))
-    print("y_test {0}".format(y_test.shape))
+    print("y_train {}".format(y_train.shape))
+    print("X_test {}".format(X_test.shape))
+    print("X_test.format = {}".format(X_test.format))
+    print("X_test.dtype = {}".format(X_test.dtype))
+    print("y_test {}".format(y_test.shape))
     print()
 
     print("Classifier Training")
diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py
index 3fd29dfb15145..f23dd90539a75 100644
--- a/benchmarks/bench_mnist.py
+++ b/benchmarks/bench_mnist.py
@@ -169,12 +169,12 @@ def load_data(dtype=np.float32, order='F'):
     print()
     print("Classification performance:")
     print("===========================")
-    print("{0: <24} {1: >10} {2: >11} {3: >12}"
+    print("{: <24} {: >10} {: >11} {: >12}"
           "".format("Classifier  ", "train-time", "test-time", "error-rate"))
     print("-" * 60)
     for name in sorted(args["classifiers"], key=error.get):
 
-        print("{0: <23} {1: >10.2f}s {2: >10.2f}s {3: >12.4f}"
+        print("{: <23} {: >10.2f}s {: >10.2f}s {: >12.4f}"
               "".format(name, train_time[name], test_time[name], error[name]))
 
     print()
diff --git a/benchmarks/bench_multilabel_metrics.py b/benchmarks/bench_multilabel_metrics.py
index a7b9374126959..9f223fa78f840 100755
--- a/benchmarks/bench_multilabel_metrics.py
+++ b/benchmarks/bench_multilabel_metrics.py
@@ -184,7 +184,7 @@ def _plot(results, metrics, formats, title, x_ticks, x_label,
     if args.plot is not None:
         print('Displaying plot', file=sys.stderr)
         title = ('Multilabel metrics with %s' %
-                 ', '.join('{0}={1}'.format(field, getattr(args, field))
+                 ', '.join('{}={}'.format(field, getattr(args, field))
                            for field in ['samples', 'classes', 'density']
                            if args.plot != field))
         _plot(results, args.metrics, args.formats, title, steps, args.plot)
diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py
index 19bd22a7157a5..55cfc9a960d07 100644
--- a/benchmarks/bench_plot_neighbors.py
+++ b/benchmarks/bench_plot_neighbors.py
@@ -38,10 +38,10 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
 
     #------------------------------------------------------------
     # varying N
-    N_results_build = dict([(alg, np.zeros(len(Nrange)))
-                            for alg in algorithms])
-    N_results_query = dict([(alg, np.zeros(len(Nrange)))
-                            for alg in algorithms])
+    N_results_build = {alg: np.zeros(len(Nrange))
+                       for alg in algorithms}
+    N_results_query = {alg: np.zeros(len(Nrange))
+                       for alg in algorithms}
 
     for i, NN in enumerate(Nrange):
         print("N = %i (%i out of %i)" % (NN, i + 1, len(Nrange)))
@@ -61,10 +61,10 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
 
     #------------------------------------------------------------
     # varying D
-    D_results_build = dict([(alg, np.zeros(len(Drange)))
-                            for alg in algorithms])
-    D_results_query = dict([(alg, np.zeros(len(Drange)))
-                            for alg in algorithms])
+    D_results_build = {alg: np.zeros(len(Drange))
+                       for alg in algorithms}
+    D_results_query = {alg: np.zeros(len(Drange))
+                       for alg in algorithms}
 
     for i, DD in enumerate(Drange):
         print("D = %i (%i out of %i)" % (DD, i + 1, len(Drange)))
@@ -84,10 +84,10 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
 
     #------------------------------------------------------------
     # varying k
-    k_results_build = dict([(alg, np.zeros(len(krange)))
-                            for alg in algorithms])
-    k_results_query = dict([(alg, np.zeros(len(krange)))
-                            for alg in algorithms])
+    k_results_build = {alg: np.zeros(len(krange))
+                       for alg in algorithms}
+    k_results_query = {alg: np.zeros(len(krange))
+                       for alg in algorithms}
 
     X = get_data(N, DD, dataset)
 
diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
index 9cb96aa9dc1e2..3768447bed3a9 100644
--- a/benchmarks/bench_plot_nmf.py
+++ b/benchmarks/bench_plot_nmf.py
@@ -203,7 +203,7 @@ class _PGNMF(NMF):
     def __init__(self, n_components=None, solver='pg', init=None,
                  tol=1e-4, max_iter=200, random_state=None,
                  alpha=0., l1_ratio=0., nls_max_iter=10):
-        super(_PGNMF, self).__init__(
+        super().__init__(
             n_components=n_components, init=init, solver=solver, tol=tol,
             max_iter=max_iter, random_state=random_state, alpha=alpha,
             l1_ratio=l1_ratio)
diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
index c993f719ac245..c8bea117b406e 100644
--- a/benchmarks/bench_sample_without_replacement.py
+++ b/benchmarks/bench_sample_without_replacement.py
@@ -133,9 +133,9 @@ def bench_sample(sampling, n_population, n_samples):
 
     ###########################################################################
     # Remove unspecified algorithm
-    sampling_algorithm = dict((key, value)
-                              for key, value in sampling_algorithm.items()
-                              if key in selected_algorithm)
+    sampling_algorithm = {key: value
+                          for key, value in sampling_algorithm.items()
+                          if key in selected_algorithm}
 
     ###########################################################################
     # Perform benchmark
diff --git a/doc/conf.py b/doc/conf.py
index 51bfd772a7f54..66e061837e423 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -288,6 +288,6 @@ def setup(app):
 
 # The following is used by sphinx.ext.linkcode to provide links to github
 linkcode_resolve = make_linkcode_resolve('sklearn',
-                                         u'https://github.com/scikit-learn/'
+                                         'https://github.com/scikit-learn/'
                                          'scikit-learn/blob/{revision}/'
                                          '{package}/{path}#L{lineno}')
diff --git a/doc/sphinxext/sphinx_issues.py b/doc/sphinxext/sphinx_issues.py
index c952ca0feafba..e77351f5b61fa 100644
--- a/doc/sphinxext/sphinx_issues.py
+++ b/doc/sphinxext/sphinx_issues.py
@@ -50,11 +50,11 @@ def user_role(name, rawtext, text, lineno,
     if config.issues_user_uri:
         ref = config.issues_user_uri.format(user=target)
     else:
-        ref = 'https://github.com/{0}'.format(target)
+        ref = 'https://github.com/{}'.format(target)
     if has_explicit_title:
         text = title
     else:
-        text = '@{0}'.format(target)
+        text = '@{}'.format(target)
 
     link = nodes.reference(text=text, refuri=ref, **options)
     return [link], []
@@ -66,10 +66,10 @@ def _make_issue_node(issue_no, config, options=None):
         if config.issues_uri:
             ref = config.issues_uri.format(issue=issue_no)
         elif config.issues_github_path:
-            ref = 'https://github.com/{0}/issues/{1}'.format(
+            ref = 'https://github.com/{}/issues/{}'.format(
                 config.issues_github_path, issue_no
             )
-        issue_text = '#{0}'.format(issue_no)
+        issue_text = '#{}'.format(issue_no)
         link = nodes.reference(text=issue_text, refuri=ref, **options)
     else:
         link = None
diff --git a/doc/tutorial/machine_learning_map/parse_path.py b/doc/tutorial/machine_learning_map/parse_path.py
index 1539092df64ad..61df1cf0ad2e8 100644
--- a/doc/tutorial/machine_learning_map/parse_path.py
+++ b/doc/tutorial/machine_learning_map/parse_path.py
@@ -29,7 +29,7 @@ class CaselessPreservingLiteral(CaselessLiteral):
         instead of as defined.
     """
     def __init__( self, matchString ):
-        super(CaselessPreservingLiteral,self).__init__( matchString.upper() )
+        super().__init__(matchString.upper())
         self.name = "'%s'" % matchString
         self.errmsg = "Expected " + self.name
         self.myException.msg = self.errmsg
diff --git a/doc/tutorial/machine_learning_map/pyparsing.py b/doc/tutorial/machine_learning_map/pyparsing.py
index 20690df7aec47..4d0518b2a89f9 100644
--- a/doc/tutorial/machine_learning_map/pyparsing.py
+++ b/doc/tutorial/machine_learning_map/pyparsing.py
@@ -170,7 +170,7 @@ def _xml_escape(data):
         data = data.replace(from_, to_)
     return data
 
-class _Constants(object):
+class _Constants:
     pass
 
 alphas     = string.ascii_uppercase + string.ascii_lowercase
@@ -289,7 +289,7 @@ def __init__( self, parseElementList ):
     def __str__( self ):
         return "RecursiveGrammarException: %s" % self.parseElementTrace
 
-class _ParseResultsWithOffset(object):
+class _ParseResultsWithOffset:
     def __init__(self,p1,p2):
         self.tup = (p1,p2)
     def __getitem__(self,i):
@@ -299,7 +299,7 @@ def __repr__(self):
     def setOffset(self,i):
         self.tup = (self.tup[0],i)
 
-class ParseResults(object):
+class ParseResults:
     """
     Structured parse results, to provide multiple means of access to the parsed data:
        - as a list (C{len(results)})
@@ -728,7 +728,7 @@ def toItem(obj):
             else:
                 return obj
                 
-        return dict((k,toItem(v)) for k,v in item_fn())
+        return {k:toItem(v) for k,v in item_fn()}
 
     def copy( self ):
         """
@@ -747,8 +747,8 @@ def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
         """
         nl = "\n"
         out = []
-        namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
-                                                            for v in vlist)
+        namedItems = {v[1]:k for (k,vlist) in self.__tokdict.items()
+                                                            for v in vlist}
         nextLevelIndent = indent + "  "
 
         # collapse out indents if formatting is not desired
@@ -1078,7 +1078,7 @@ def wrapper(*args):
 
     return wrapper
 
-class ParserElement(object):
+class ParserElement:
     """Abstract base level parser element class."""
     DEFAULT_WHITE_CHARS = " \n\t\r"
     verbose_stacktrace = False
@@ -1431,7 +1431,7 @@ def canParseNext(self, instring, loc):
         else:
             return True
 
-    class _UnboundedCache(object):
+    class _UnboundedCache:
         def __init__(self):
             cache = {}
             self.not_in_cache = not_in_cache = object()
@@ -1454,7 +1454,7 @@ def cache_len(self):
             self.__len__ = types.MethodType(cache_len, self)
 
     if _OrderedDict is not None:
-        class _FifoCache(object):
+        class _FifoCache:
             def __init__(self, size):
                 self.not_in_cache = not_in_cache = object()
 
@@ -1483,7 +1483,7 @@ def cache_len(self):
                 self.__len__ = types.MethodType(cache_len, self)
 
     else:
-        class _FifoCache(object):
+        class _FifoCache:
             def __init__(self, size):
                 self.not_in_cache = not_in_cache = object()
 
@@ -2175,7 +2175,7 @@ def __eq__(self,other):
         elif isinstance(other, basestring):
             return self.matches(other)
         else:
-            return super(ParserElement,self)==other
+            return super()==other
 
     def __ne__(self,other):
         return not (self == other)
@@ -2345,7 +2345,7 @@ class Token(ParserElement):
     Abstract C{ParserElement} subclass, for defining atomic matching patterns.
     """
     def __init__( self ):
-        super(Token,self).__init__( savelist=False )
+        super().__init__( savelist=False )
 
 
 class Empty(Token):
@@ -2353,7 +2353,7 @@ class Empty(Token):
     An empty token, will always match.
     """
     def __init__( self ):
-        super(Empty,self).__init__()
+        super().__init__()
         self.name = "Empty"
         self.mayReturnEmpty = True
         self.mayIndexError = False
@@ -2364,7 +2364,7 @@ class NoMatch(Token):
     A token that will never match.
     """
     def __init__( self ):
-        super(NoMatch,self).__init__()
+        super().__init__()
         self.name = "NoMatch"
         self.mayReturnEmpty = True
         self.mayIndexError = False
@@ -2389,7 +2389,7 @@ class Literal(Token):
     use L{Keyword} or L{CaselessKeyword}.
     """
     def __init__( self, matchString ):
-        super(Literal,self).__init__()
+        super().__init__()
         self.match = matchString
         self.matchLen = len(matchString)
         try:
@@ -2435,7 +2435,7 @@ class Keyword(Token):
     DEFAULT_KEYWORD_CHARS = alphanums+"_$"
 
     def __init__( self, matchString, identChars=None, caseless=False ):
-        super(Keyword,self).__init__()
+        super().__init__()
         if identChars is None:
             identChars = Keyword.DEFAULT_KEYWORD_CHARS
         self.match = matchString
@@ -2470,7 +2470,7 @@ def parseImpl( self, instring, loc, doActions=True ):
         raise ParseException(instring, loc, self.errmsg, self)
 
     def copy(self):
-        c = super(Keyword,self).copy()
+        c = super().copy()
         c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
         return c
 
@@ -2492,7 +2492,7 @@ class CaselessLiteral(Literal):
     (Contrast with example for L{CaselessKeyword}.)
     """
     def __init__( self, matchString ):
-        super(CaselessLiteral,self).__init__( matchString.upper() )
+        super().__init__( matchString.upper() )
         # Preserve the defining literal.
         self.returnString = matchString
         self.name = "'%s'" % self.returnString
@@ -2513,7 +2513,7 @@ class CaselessKeyword(Keyword):
     (Contrast with example for L{CaselessLiteral}.)
     """
     def __init__( self, matchString, identChars=None ):
-        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
+        super().__init__( matchString, identChars, caseless=True )
 
     def parseImpl( self, instring, loc, doActions=True ):
         if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
@@ -2547,7 +2547,7 @@ class CloseMatch(Token):
         patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
     """
     def __init__(self, match_string, maxMismatches=1):
-        super(CloseMatch,self).__init__()
+        super().__init__()
         self.name = match_string
         self.match_string = match_string
         self.maxMismatches = maxMismatches
@@ -2630,7 +2630,7 @@ class Word(Token):
         csv_value = Word(printables, excludeChars=",")
     """
     def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
-        super(Word,self).__init__()
+        super().__init__()
         if excludeChars:
             initChars = ''.join(c for c in initChars if c not in excludeChars)
             if bodyChars:
@@ -2720,7 +2720,7 @@ def parseImpl( self, instring, loc, doActions=True ):
 
     def __str__( self ):
         try:
-            return super(Word,self).__str__()
+            return super().__str__()
         except Exception:
             pass
 
@@ -2757,7 +2757,7 @@ class Regex(Token):
     compiledREtype = type(re.compile("[A-Z]"))
     def __init__( self, pattern, flags=0):
         """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
-        super(Regex,self).__init__()
+        super().__init__()
 
         if isinstance(pattern, basestring):
             if not pattern:
@@ -2804,7 +2804,7 @@ def parseImpl( self, instring, loc, doActions=True ):
 
     def __str__( self ):
         try:
-            return super(Regex,self).__str__()
+            return super().__str__()
         except Exception:
             pass
 
@@ -2840,7 +2840,7 @@ class QuotedString(Token):
         [['This is the quote with "embedded" quotes']]
     """
     def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
-        super(QuotedString,self).__init__()
+        super().__init__()
 
         # remove white space from quote chars - wont work anyway
         quoteChar = quoteChar.strip()
@@ -2941,7 +2941,7 @@ def parseImpl( self, instring, loc, doActions=True ):
 
     def __str__( self ):
         try:
-            return super(QuotedString,self).__str__()
+            return super().__str__()
         except Exception:
             pass
 
@@ -2968,7 +2968,7 @@ class CharsNotIn(Token):
         ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
     """
     def __init__( self, notChars, min=1, max=0, exact=0 ):
-        super(CharsNotIn,self).__init__()
+        super().__init__()
         self.skipWhitespace = False
         self.notChars = notChars
 
@@ -3010,7 +3010,7 @@ def parseImpl( self, instring, loc, doActions=True ):
 
     def __str__( self ):
         try:
-            return super(CharsNotIn, self).__str__()
+            return super().__str__()
         except Exception:
             pass
 
@@ -3038,7 +3038,7 @@ class White(Token):
         "\f": "<FF>",
         }
     def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
-        super(White,self).__init__()
+        super().__init__()
         self.matchWhite = ws
         self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
         #~ self.leaveWhitespace()
@@ -3075,7 +3075,7 @@ def parseImpl( self, instring, loc, doActions=True ):
 
 class _PositionToken(Token):
     def __init__( self ):
-        super(_PositionToken,self).__init__()
+        super().__init__()
         self.name=self.__class__.__name__
         self.mayReturnEmpty = True
         self.mayIndexError = False
@@ -3085,7 +3085,7 @@ class GoToColumn(_PositionToken):
     Token to advance to a specific column of input text; useful for tabular report scraping.
     """
     def __init__( self, colno ):
-        super(GoToColumn,self).__init__()
+        super().__init__()
         self.col = colno
 
     def preParse( self, instring, loc ):
@@ -3128,7 +3128,7 @@ class LineStart(_PositionToken):
 
     """
     def __init__( self ):
-        super(LineStart,self).__init__()
+        super().__init__()
         self.errmsg = "Expected start of line"
 
     def parseImpl( self, instring, loc, doActions=True ):
@@ -3141,7 +3141,7 @@ class LineEnd(_PositionToken):
     Matches if current position is at the end of a line within the parse string
     """
     def __init__( self ):
-        super(LineEnd,self).__init__()
+        super().__init__()
         self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
         self.errmsg = "Expected end of line"
 
@@ -3161,7 +3161,7 @@ class StringStart(_PositionToken):
     Matches if current position is at the beginning of the parse string
     """
     def __init__( self ):
-        super(StringStart,self).__init__()
+        super().__init__()
         self.errmsg = "Expected start of text"
 
     def parseImpl( self, instring, loc, doActions=True ):
@@ -3176,7 +3176,7 @@ class StringEnd(_PositionToken):
     Matches if current position is at the end of the parse string
     """
     def __init__( self ):
-        super(StringEnd,self).__init__()
+        super().__init__()
         self.errmsg = "Expected end of text"
 
     def parseImpl( self, instring, loc, doActions=True ):
@@ -3198,7 +3198,7 @@ class WordStart(_PositionToken):
     the string being parsed, or at the beginning of a line.
     """
     def __init__(self, wordChars = printables):
-        super(WordStart,self).__init__()
+        super().__init__()
         self.wordChars = set(wordChars)
         self.errmsg = "Not at the start of a word"
 
@@ -3218,7 +3218,7 @@ class WordEnd(_PositionToken):
     the string being parsed, or at the end of a line.
     """
     def __init__(self, wordChars = printables):
-        super(WordEnd,self).__init__()
+        super().__init__()
         self.wordChars = set(wordChars)
         self.skipWhitespace = False
         self.errmsg = "Not at the end of a word"
@@ -3237,7 +3237,7 @@ class ParseExpression(ParserElement):
     Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
     """
     def __init__( self, exprs, savelist = False ):
-        super(ParseExpression,self).__init__(savelist)
+        super().__init__(savelist)
         if isinstance( exprs, _generatorType ):
             exprs = list(exprs)
 
@@ -3276,18 +3276,18 @@ def leaveWhitespace( self ):
     def ignore( self, other ):
         if isinstance( other, Suppress ):
             if other not in self.ignoreExprs:
-                super( ParseExpression, self).ignore( other )
+                super().ignore( other )
                 for e in self.exprs:
                     e.ignore( self.ignoreExprs[-1] )
         else:
-            super( ParseExpression, self).ignore( other )
+            super().ignore( other )
             for e in self.exprs:
                 e.ignore( self.ignoreExprs[-1] )
         return self
 
     def __str__( self ):
         try:
-            return super(ParseExpression,self).__str__()
+            return super().__str__()
         except Exception:
             pass
 
@@ -3296,7 +3296,7 @@ def __str__( self ):
         return self.strRepr
 
     def streamline( self ):
-        super(ParseExpression,self).streamline()
+        super().streamline()
 
         for e in self.exprs:
             e.streamline()
@@ -3330,7 +3330,7 @@ def streamline( self ):
         return self
 
     def setResultsName( self, name, listAllMatches=False ):
-        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
+        ret = super().setResultsName(name,listAllMatches)
         return ret
 
     def validate( self, validateTrace=[] ):
@@ -3340,7 +3340,7 @@ def validate( self, validateTrace=[] ):
         self.checkRecursion( [] )
         
     def copy(self):
-        ret = super(ParseExpression,self).copy()
+        ret = super().copy()
         ret.exprs = [e.copy() for e in self.exprs]
         return ret
 
@@ -3367,7 +3367,7 @@ def __init__(self, *args, **kwargs):
             self.leaveWhitespace()
 
     def __init__( self, exprs, savelist = True ):
-        super(And,self).__init__(exprs, savelist)
+        super().__init__(exprs, savelist)
         self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
         self.setWhitespaceChars( self.exprs[0].whiteChars )
         self.skipWhitespace = self.exprs[0].skipWhitespace
@@ -3435,7 +3435,7 @@ class Or(ParseExpression):
         [['123'], ['3.1416'], ['789']]
     """
     def __init__( self, exprs, savelist = False ):
-        super(Or,self).__init__(exprs, savelist)
+        super().__init__(exprs, savelist)
         if self.exprs:
             self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
         else:
@@ -3517,7 +3517,7 @@ class MatchFirst(ParseExpression):
         print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
     """
     def __init__( self, exprs, savelist = False ):
-        super(MatchFirst,self).__init__(exprs, savelist)
+        super().__init__(exprs, savelist)
         if self.exprs:
             self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
         else:
@@ -3622,14 +3622,14 @@ class Each(ParseExpression):
         - size: 20
     """
     def __init__( self, exprs, savelist = True ):
-        super(Each,self).__init__(exprs, savelist)
+        super().__init__(exprs, savelist)
         self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
         self.skipWhitespace = True
         self.initExprGroups = True
 
     def parseImpl( self, instring, loc, doActions=True ):
         if self.initExprGroups:
-            self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
+            self.opt1map = {id(e.expr):e for e in self.exprs if isinstance(e,Optional)}
             opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
             opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
             self.optionals = opt1 + opt2
@@ -3696,7 +3696,7 @@ class ParseElementEnhance(ParserElement):
     Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
     """
     def __init__( self, expr, savelist=False ):
-        super(ParseElementEnhance,self).__init__(savelist)
+        super().__init__(savelist)
         if isinstance( expr, basestring ):
             if issubclass(ParserElement._literalStringClass, Token):
                 expr = ParserElement._literalStringClass(expr)
@@ -3729,17 +3729,17 @@ def leaveWhitespace( self ):
     def ignore( self, other ):
         if isinstance( other, Suppress ):
             if other not in self.ignoreExprs:
-                super( ParseElementEnhance, self).ignore( other )
+                super().ignore( other )
                 if self.expr is not None:
                     self.expr.ignore( self.ignoreExprs[-1] )
         else:
-            super( ParseElementEnhance, self).ignore( other )
+            super().ignore( other )
             if self.expr is not None:
                 self.expr.ignore( self.ignoreExprs[-1] )
         return self
 
     def streamline( self ):
-        super(ParseElementEnhance,self).streamline()
+        super().streamline()
         if self.expr is not None:
             self.expr.streamline()
         return self
@@ -3759,7 +3759,7 @@ def validate( self, validateTrace=[] ):
 
     def __str__( self ):
         try:
-            return super(ParseElementEnhance,self).__str__()
+            return super().__str__()
         except Exception:
             pass
 
@@ -3786,7 +3786,7 @@ class FollowedBy(ParseElementEnhance):
         [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
     """
     def __init__( self, expr ):
-        super(FollowedBy,self).__init__(expr)
+        super().__init__(expr)
         self.mayReturnEmpty = True
 
     def parseImpl( self, instring, loc, doActions=True ):
@@ -3806,7 +3806,7 @@ class NotAny(ParseElementEnhance):
         
     """
     def __init__( self, expr ):
-        super(NotAny,self).__init__(expr)
+        super().__init__(expr)
         #~ self.leaveWhitespace()
         self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
         self.mayReturnEmpty = True
@@ -3828,7 +3828,7 @@ def __str__( self ):
 
 class _MultipleMatch(ParseElementEnhance):
     def __init__( self, expr, stopOn=None):
-        super(_MultipleMatch, self).__init__(expr)
+        super().__init__(expr)
         self.saveAsList = True
         ender = stopOn
         if isinstance(ender, basestring):
@@ -3912,12 +3912,12 @@ class ZeroOrMore(_MultipleMatch):
     Example: similar to L{OneOrMore}
     """
     def __init__( self, expr, stopOn=None):
-        super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
+        super().__init__(expr, stopOn=stopOn)
         self.mayReturnEmpty = True
         
     def parseImpl( self, instring, loc, doActions=True ):
         try:
-            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
+            return super().parseImpl(instring, loc, doActions)
         except (ParseException,IndexError):
             return loc, []
 
@@ -3930,7 +3930,7 @@ def __str__( self ):
 
         return self.strRepr
 
-class _NullToken(object):
+class _NullToken:
     def __bool__(self):
         return False
     __nonzero__ = __bool__
@@ -3974,7 +3974,7 @@ class Optional(ParseElementEnhance):
         FAIL: Expected end of text (at char 5), (line:1, col:6)
     """
     def __init__( self, expr, default=_optionalNotMatched ):
-        super(Optional,self).__init__( expr, savelist=False )
+        super().__init__( expr, savelist=False )
         self.saveAsList = self.expr.saveAsList
         self.defaultValue = default
         self.mayReturnEmpty = True
@@ -4058,7 +4058,7 @@ class SkipTo(ParseElementEnhance):
         - sev: Minor
     """
     def __init__( self, other, include=False, ignore=None, failOn=None ):
-        super( SkipTo, self ).__init__( other )
+        super().__init__( other )
         self.ignoreExpr = ignore
         self.mayReturnEmpty = True
         self.mayIndexError = False
@@ -4137,7 +4137,7 @@ class Forward(ParseElementEnhance):
     C{Forward}.
     """
     def __init__( self, other=None ):
-        super(Forward,self).__init__( other, savelist=False )
+        super().__init__( other, savelist=False )
 
     def __lshift__( self, other ):
         if isinstance( other, basestring ):
@@ -4192,7 +4192,7 @@ def __str__( self ):
 
     def copy(self):
         if self.expr is not None:
-            return super(Forward,self).copy()
+            return super().copy()
         else:
             ret = Forward()
             ret <<= self
@@ -4207,7 +4207,7 @@ class TokenConverter(ParseElementEnhance):
     Abstract subclass of C{ParseExpression}, for converting parsed results.
     """
     def __init__( self, expr, savelist=False ):
-        super(TokenConverter,self).__init__( expr )#, savelist )
+        super().__init__( expr )#, savelist )
         self.saveAsList = False
 
 class Combine(TokenConverter):
@@ -4228,7 +4228,7 @@ class Combine(TokenConverter):
         print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
     """
     def __init__( self, expr, joinString="", adjacent=True ):
-        super(Combine,self).__init__( expr )
+        super().__init__( expr )
         # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
         if adjacent:
             self.leaveWhitespace()
@@ -4241,7 +4241,7 @@ def ignore( self, other ):
         if self.adjacent:
             ParserElement.ignore(self, other)
         else:
-            super( Combine, self).ignore( other )
+            super().ignore( other )
         return self
 
     def postParse( self, instring, loc, tokenlist ):
@@ -4269,7 +4269,7 @@ class Group(TokenConverter):
         print(func.parseString("fn a,b,100"))  # -> ['fn', ['a', 'b', '100']]
     """
     def __init__( self, expr ):
-        super(Group,self).__init__( expr )
+        super().__init__( expr )
         self.saveAsList = True
 
     def postParse( self, instring, loc, tokenlist ):
@@ -4312,7 +4312,7 @@ class Dict(TokenConverter):
     See more examples at L{ParseResults} of accessing fields by results name.
     """
     def __init__( self, expr ):
-        super(Dict,self).__init__( expr )
+        super().__init__( expr )
         self.saveAsList = True
 
     def postParse( self, instring, loc, tokenlist ):
@@ -4366,7 +4366,7 @@ def suppress( self ):
         return self
 
 
-class OnlyOnce(object):
+class OnlyOnce:
     """
     Wrapper for parse actions, to ensure they are only called once.
     """
diff --git a/doc/tutorial/text_analytics/data/languages/fetch_data.py b/doc/tutorial/text_analytics/data/languages/fetch_data.py
index 5c5c36a322caf..0bee5e1f4586f 100644
--- a/doc/tutorial/text_analytics/data/languages/fetch_data.py
+++ b/doc/tutorial/text_analytics/data/languages/fetch_data.py
@@ -17,23 +17,23 @@
 import codecs
 
 pages = {
-    u'ar': u'http://ar.wikipedia.org/wiki/%D9%88%D9%8A%D9%83%D9%8A%D8%A8%D9%8A%D8%AF%D9%8A%D8%A7',
-    u'de': u'http://de.wikipedia.org/wiki/Wikipedia',
-    u'en': u'https://en.wikipedia.org/wiki/Wikipedia',
-    u'es': u'http://es.wikipedia.org/wiki/Wikipedia',
-    u'fr': u'http://fr.wikipedia.org/wiki/Wikip%C3%A9dia',
-    u'it': u'http://it.wikipedia.org/wiki/Wikipedia',
-    u'ja': u'http://ja.wikipedia.org/wiki/Wikipedia',
-    u'nl': u'http://nl.wikipedia.org/wiki/Wikipedia',
-    u'pl': u'http://pl.wikipedia.org/wiki/Wikipedia',
-    u'pt': u'http://pt.wikipedia.org/wiki/Wikip%C3%A9dia',
-    u'ru': u'http://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8%D0%BF%D0%B5%D0%B4%D0%B8%D1%8F',
+    'ar': 'http://ar.wikipedia.org/wiki/%D9%88%D9%8A%D9%83%D9%8A%D8%A8%D9%8A%D8%AF%D9%8A%D8%A7',   # noqa: E501
+    'de': 'http://de.wikipedia.org/wiki/Wikipedia',
+    'en': 'https://en.wikipedia.org/wiki/Wikipedia',
+    'es': 'http://es.wikipedia.org/wiki/Wikipedia',
+    'fr': 'http://fr.wikipedia.org/wiki/Wikip%C3%A9dia',
+    'it': 'http://it.wikipedia.org/wiki/Wikipedia',
+    'ja': 'http://ja.wikipedia.org/wiki/Wikipedia',
+    'nl': 'http://nl.wikipedia.org/wiki/Wikipedia',
+    'pl': 'http://pl.wikipedia.org/wiki/Wikipedia',
+    'pt': 'http://pt.wikipedia.org/wiki/Wikip%C3%A9dia',
+    'ru': 'http://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8%D0%BF%D0%B5%D0%B4%D0%B8%D1%8F',  # noqa: E501
 #    u'zh': u'http://zh.wikipedia.org/wiki/Wikipedia',
 }
 
-html_folder = u'html'
-text_folder = u'paragraphs'
-short_text_folder = u'short_paragraphs'
+html_folder = 'html'
+text_folder = 'paragraphs'
+short_text_folder = 'short_paragraphs'
 n_words_per_short_text = 5
 
 
@@ -93,7 +93,7 @@
         groups = np.array_split(words, n_groups)
 
         for group in groups:
-            small_content = u" ".join(group)
+            small_content = " ".join(group)
 
             short_text_filename = os.path.join(short_text_lang_folder,
                                                '%s_%04d.txt' % (lang, j))
diff --git a/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py b/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
index 0b56cfc10d837..438481120d126 100644
--- a/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
+++ b/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
@@ -52,11 +52,11 @@
 
 # Predict the result on some short new sentences:
 sentences = [
-    u'This is a language detection test.',
-    u'Ceci est un test de d\xe9tection de la langue.',
-    u'Dies ist ein Test, um die Sprache zu erkennen.',
+    'This is a language detection test.',
+    'Ceci est un test de d\xe9tection de la langue.',
+    'Dies ist ein Test, um die Sprache zu erkennen.',
 ]
 predicted = clf.predict(sentences)
 
 for s, p in zip(sentences, predicted):
-    print(u'The language of "%s" is "%s"' % (s, dataset.target_names[p]))
+    print('The language of "%s" is "%s"' % (s, dataset.target_names[p]))
diff --git a/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py b/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py
index 910b4dc50427d..defb7dc11630a 100644
--- a/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py
+++ b/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py
@@ -60,11 +60,11 @@
 
 # Predict the result on some short new sentences:
 sentences = [
-    u'This is a language detection test.',
-    u'Ceci est un test de d\xe9tection de la langue.',
-    u'Dies ist ein Test, um die Sprache zu erkennen.',
+    'This is a language detection test.',
+    'Ceci est un test de d\xe9tection de la langue.',
+    'Dies ist ein Test, um die Sprache zu erkennen.',
 ]
 predicted = clf.predict(sentences)
 
 for s, p in zip(sentences, predicted):
-    print(u'The language of "%s" is "%s"' % (s, dataset.target_names[p]))
+    print('The language of "%s" is "%s"' % (s, dataset.target_names[p]))
diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py
index 8dafc4fae120f..cf53190b45869 100644
--- a/examples/applications/plot_out_of_core_classification.py
+++ b/examples/applications/plot_out_of_core_classification.py
@@ -221,7 +221,7 @@ def get_minibatch(doc_iter, size, pos_class=positive_class):
     Note: size is before excluding invalid docs with no topics assigned.
 
     """
-    data = [(u'{title}\n\n{body}'.format(**doc), pos_class in doc['topics'])
+    data = [('{title}\n\n{body}'.format(**doc), pos_class in doc['topics'])
             for doc in itertools.islice(doc_iter, size)
             if doc['topics']]
     if not len(data):
diff --git a/examples/applications/svm_gui.py b/examples/applications/svm_gui.py
index 7c5c9bbf305b8..a44b996b079c6 100644
--- a/examples/applications/svm_gui.py
+++ b/examples/applications/svm_gui.py
@@ -44,7 +44,7 @@
 x_min, x_max = -50, 50
 
 
-class Model(object):
+class Model:
     """The Model which hold the data. It implements the
     observable in the observer pattern and notifies the
     registered observers on change event.
@@ -76,7 +76,7 @@ def dump_svmlight_file(self, file):
         dump_svmlight_file(X, y, file)
 
 
-class Controller(object):
+class Controller:
     def __init__(self, model):
         self.model = model
         self.kernel = Tk.IntVar()
@@ -139,7 +139,7 @@ def refit(self):
             self.fit()
 
 
-class View(object):
+class View:
     """Test docstring. """
     def __init__(self, root, controller):
         f = Figure()
@@ -245,7 +245,7 @@ def plot_decision_surface(self, surface, type):
             raise ValueError("surface type unknown")
 
 
-class ControllBar(object):
+class ControllBar:
     def __init__(self, root, controller):
         fm = Tk.Frame(root)
         kernel_group = Tk.Frame(fm)
diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py
index 27f3844a23094..d0e66b60ff436 100644
--- a/examples/applications/wikipedia_principal_eigenvector.py
+++ b/examples/applications/wikipedia_principal_eigenvector.py
@@ -112,7 +112,7 @@ def get_redirects(redirects_filename):
     for l, source in enumerate(redirects.keys()):
         transitive_target = None
         target = redirects[source]
-        seen = set([source])
+        seen = {source}
         while True:
             transitive_target = target
             target = redirects.get(target)
@@ -172,7 +172,7 @@ def get_adjacency_matrix(redirects_filename, page_links_filename, limit=None):
 # stop after 5M links to make it possible to work in RAM
 X, redirects, index_map = get_adjacency_matrix(
     redirects_filename, page_links_filename, limit=5000000)
-names = dict((i, name) for name, i in index_map.items())
+names = {i: name for name, i in index_map.items()}
 
 print("Computing the principal singular vectors using randomized_svd")
 t0 = time()
diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py
index 430c37a651197..5298d3fa35449 100644
--- a/examples/bicluster/plot_bicluster_newsgroups.py
+++ b/examples/bicluster/plot_bicluster_newsgroups.py
@@ -51,7 +51,7 @@ def number_normalizer(tokens):
 
 class NumberNormalizingVectorizer(TfidfVectorizer):
     def build_tokenizer(self):
-        tokenize = super(NumberNormalizingVectorizer, self).build_tokenizer()
+        tokenize = super().build_tokenizer()
         return lambda doc: list(number_normalizer(tokenize(doc)))
 
 
diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py
index 0af239e197cf0..1c11dd138e1ff 100644
--- a/examples/ensemble/plot_bias_variance.py
+++ b/examples/ensemble/plot_bias_variance.py
@@ -147,8 +147,8 @@ def generate(n_samples, noise, n_repeat=1):
     y_bias = (f(X_test) - np.mean(y_predict, axis=1)) ** 2
     y_var = np.var(y_predict, axis=1)
 
-    print("{0}: {1:.4f} (error) = {2:.4f} (bias^2) "
-          " + {3:.4f} (var) + {4:.4f} (noise)".format(name,
+    print("{}: {:.4f} (error) = {:.4f} (bias^2) "
+          " + {:.4f} (var) + {:.4f} (noise)".format(name,
                                                       np.mean(y_error),
                                                       np.mean(y_bias),
                                                       np.mean(y_var),
diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py
index 343bae08ef4a6..924639e24e5b4 100644
--- a/examples/ensemble/plot_gradient_boosting_quantile.py
+++ b/examples/ensemble/plot_gradient_boosting_quantile.py
@@ -65,8 +65,8 @@ def f(x):
 # the MSE
 fig = plt.figure()
 plt.plot(xx, f(xx), 'g:', label=r'$f(x) = x\,\sin(x)$')
-plt.plot(X, y, 'b.', markersize=10, label=u'Observations')
-plt.plot(xx, y_pred, 'r-', label=u'Prediction')
+plt.plot(X, y, 'b.', markersize=10, label='Observations')
+plt.plot(xx, y_pred, 'r-', label='Prediction')
 plt.plot(xx, y_upper, 'k-')
 plt.plot(xx, y_lower, 'k-')
 plt.fill(np.concatenate([xx, xx[::-1]]),
diff --git a/examples/exercises/plot_cv_diabetes.py b/examples/exercises/plot_cv_diabetes.py
index d68fd21bd70a3..a6011902e6c3c 100644
--- a/examples/exercises/plot_cv_diabetes.py
+++ b/examples/exercises/plot_cv_diabetes.py
@@ -70,7 +70,7 @@
 print("subsets of the data:")
 for k, (train, test) in enumerate(k_fold.split(X, y)):
     lasso_cv.fit(X[train], y[train])
-    print("[fold {0}] alpha: {1:.5f}, score: {2:.5f}".
+    print("[fold {}] alpha: {:.5f}, score: {:.5f}".
           format(k, lasso_cv.alpha_, lasso_cv.score(X[test], y[test])))
 print()
 print("Answer: Not very much since we obtained different alphas for different")
diff --git a/examples/gaussian_process/plot_gpr_noisy_targets.py b/examples/gaussian_process/plot_gpr_noisy_targets.py
index 455b26ceef6a5..50992c19337b3 100644
--- a/examples/gaussian_process/plot_gpr_noisy_targets.py
+++ b/examples/gaussian_process/plot_gpr_noisy_targets.py
@@ -63,8 +63,8 @@ def f(x):
 # the MSE
 plt.figure()
 plt.plot(x, f(x), 'r:', label=r'$f(x) = x\,\sin(x)$')
-plt.plot(X, y, 'r.', markersize=10, label=u'Observations')
-plt.plot(x, y_pred, 'b-', label=u'Prediction')
+plt.plot(X, y, 'r.', markersize=10, label='Observations')
+plt.plot(x, y_pred, 'b-', label='Prediction')
 plt.fill(np.concatenate([x, x[::-1]]),
          np.concatenate([y_pred - 1.9600 * sigma,
                         (y_pred + 1.9600 * sigma)[::-1]]),
@@ -99,8 +99,8 @@ def f(x):
 # the MSE
 plt.figure()
 plt.plot(x, f(x), 'r:', label=r'$f(x) = x\,\sin(x)$')
-plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations')
-plt.plot(x, y_pred, 'b-', label=u'Prediction')
+plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label='Observations')
+plt.plot(x, y_pred, 'b-', label='Prediction')
 plt.fill(np.concatenate([x, x[::-1]]),
          np.concatenate([y_pred - 1.9600 * sigma,
                         (y_pred + 1.9600 * sigma)[::-1]]),
diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py
index 14a0efccc01da..b9704ee435db2 100644
--- a/examples/mixture/plot_gmm_covariances.py
+++ b/examples/mixture/plot_gmm_covariances.py
@@ -83,9 +83,9 @@ def make_ellipses(gmm, ax):
 n_classes = len(np.unique(y_train))
 
 # Try GMMs using different types of covariances.
-estimators = dict((cov_type, GaussianMixture(n_components=n_classes,
-                   covariance_type=cov_type, max_iter=20, random_state=0))
-                  for cov_type in ['spherical', 'diag', 'tied', 'full'])
+estimators = {cov_type: GaussianMixture(n_components=n_classes,
+              covariance_type=cov_type, max_iter=20, random_state=0)
+              for cov_type in ['spherical', 'diag', 'tied', 'full']}
 
 n_estimators = len(estimators)
 
diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py
index b40dc91fc4d8f..d6aea44e6c546 100644
--- a/examples/model_selection/plot_nested_cross_validation_iris.py
+++ b/examples/model_selection/plot_nested_cross_validation_iris.py
@@ -90,7 +90,7 @@
 
 score_difference = non_nested_scores - nested_scores
 
-print("Average difference of {0:6f} with std. dev. of {1:6f}."
+print("Average difference of {:6f} with std. dev. of {:6f}."
       .format(score_difference.mean(), score_difference.std()))
 
 # Plot scores on each trial for nested and non-nested CV
diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py
index 936e56921d27c..6dcf0de514b7e 100644
--- a/examples/model_selection/plot_precision_recall.py
+++ b/examples/model_selection/plot_precision_recall.py
@@ -129,7 +129,7 @@
 from sklearn.metrics import average_precision_score
 average_precision = average_precision_score(y_test, y_score)
 
-print('Average precision-recall score: {0:0.2f}'.format(
+print('Average precision-recall score: {:0.2f}'.format(
       average_precision))
 
 ###############################################################################
@@ -153,7 +153,7 @@
 plt.ylabel('Precision')
 plt.ylim([0.0, 1.05])
 plt.xlim([0.0, 1.0])
-plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
+plt.title('2-class Precision-Recall curve: AP={:0.2f}'.format(
           average_precision))
 
 ###############################################################################
@@ -205,7 +205,7 @@
     y_score.ravel())
 average_precision["micro"] = average_precision_score(Y_test, y_score,
                                                      average="micro")
-print('Average precision score, micro-averaged over all classes: {0:0.2f}'
+print('Average precision score, micro-averaged over all classes: {:0.2f}'
       .format(average_precision["micro"]))
 
 ###############################################################################
@@ -224,7 +224,7 @@
 plt.ylim([0.0, 1.05])
 plt.xlim([0.0, 1.0])
 plt.title(
-    'Average precision score, micro-averaged over all classes: AP={0:0.2f}'
+    'Average precision score, micro-averaged over all classes: AP={:0.2f}'
     .format(average_precision["micro"]))
 
 ###############################################################################
@@ -243,19 +243,19 @@
     x = np.linspace(0.01, 1)
     y = f_score * x / (2 * x - f_score)
     l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
-    plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))
+    plt.annotate('f1={:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))
 
 lines.append(l)
 labels.append('iso-f1 curves')
 l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
 lines.append(l)
-labels.append('micro-average Precision-recall (area = {0:0.2f})'
+labels.append('micro-average Precision-recall (area = {:0.2f})'
               ''.format(average_precision["micro"]))
 
 for i, color in zip(range(n_classes), colors):
     l, = plt.plot(recall[i], precision[i], color=color, lw=2)
     lines.append(l)
-    labels.append('Precision-recall for class {0} (area = {1:0.2f})'
+    labels.append('Precision-recall for class {} (area = {:0.2f})'
                   ''.format(i, average_precision[i]))
 
 fig = plt.gcf()
diff --git a/examples/model_selection/plot_randomized_search.py b/examples/model_selection/plot_randomized_search.py
index 2429c92e2642b..3f95af1692e63 100644
--- a/examples/model_selection/plot_randomized_search.py
+++ b/examples/model_selection/plot_randomized_search.py
@@ -43,11 +43,11 @@ def report(results, n_top=3):
     for i in range(1, n_top + 1):
         candidates = np.flatnonzero(results['rank_test_score'] == i)
         for candidate in candidates:
-            print("Model with rank: {0}".format(i))
-            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
+            print("Model with rank: {}".format(i))
+            print("Mean validation score: {:.3f} (std: {:.3f})".format(
                   results['mean_test_score'][candidate],
                   results['std_test_score'][candidate]))
-            print("Parameters: {0}".format(results['params'][candidate]))
+            print("Parameters: {}".format(results['params'][candidate]))
             print("")
 
 
diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py
index 475d7b4aba7a6..3503349a2d993 100644
--- a/examples/model_selection/plot_roc.py
+++ b/examples/model_selection/plot_roc.py
@@ -123,19 +123,19 @@
 # Plot all ROC curves
 plt.figure()
 plt.plot(fpr["micro"], tpr["micro"],
-         label='micro-average ROC curve (area = {0:0.2f})'
+         label='micro-average ROC curve (area = {:0.2f})'
                ''.format(roc_auc["micro"]),
          color='deeppink', linestyle=':', linewidth=4)
 
 plt.plot(fpr["macro"], tpr["macro"],
-         label='macro-average ROC curve (area = {0:0.2f})'
+         label='macro-average ROC curve (area = {:0.2f})'
                ''.format(roc_auc["macro"]),
          color='navy', linestyle=':', linewidth=4)
 
 colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
 for i, color in zip(range(n_classes), colors):
     plt.plot(fpr[i], tpr[i], color=color, lw=lw,
-             label='ROC curve of class {0} (area = {1:0.2f})'
+             label='ROC curve of class {} (area = {:0.2f})'
              ''.format(i, roc_auc[i]))
 
 plt.plot([0, 1], [0, 1], 'k--', lw=lw)
diff --git a/examples/neighbors/plot_digits_kde_sampling.py b/examples/neighbors/plot_digits_kde_sampling.py
index ca44c96f13021..3707ec3bae332 100644
--- a/examples/neighbors/plot_digits_kde_sampling.py
+++ b/examples/neighbors/plot_digits_kde_sampling.py
@@ -30,7 +30,7 @@
 grid = GridSearchCV(KernelDensity(), params, cv=5)
 grid.fit(data)
 
-print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
+print("best bandwidth: {}".format(grid.best_estimator_.bandwidth))
 
 # use the best estimator to compute the kernel density estimate
 kde = grid.best_estimator_
diff --git a/examples/neighbors/plot_kde_1d.py b/examples/neighbors/plot_kde_1d.py
index 8e1673dd48944..8070c2a792819 100644
--- a/examples/neighbors/plot_kde_1d.py
+++ b/examples/neighbors/plot_kde_1d.py
@@ -139,9 +139,9 @@ def format_func(x, loc):
     kde = KernelDensity(kernel=kernel, bandwidth=0.5).fit(X)
     log_dens = kde.score_samples(X_plot)
     ax.plot(X_plot[:, 0], np.exp(log_dens), '-',
-            label="kernel = '{0}'".format(kernel))
+            label="kernel = '{}'".format(kernel))
 
-ax.text(6, 0.38, "N={0} points".format(N))
+ax.text(6, 0.38, "N={} points".format(N))
 
 ax.legend(loc='upper left')
 ax.plot(X[:, 0], -0.005 - 0.01 * np.random.random(X.shape[0]), '+k')
diff --git a/setup.py b/setup.py
index cce21f5883c5a..dd6ae2fc7913e 100755
--- a/setup.py
+++ b/setup.py
@@ -49,12 +49,12 @@
 # We need to import setuptools early, if we want setuptools features,
 # as it monkey-patches the 'setup' function
 # For some commands, use setuptools
-SETUPTOOLS_COMMANDS = set([
+SETUPTOOLS_COMMANDS = {
     'develop', 'release', 'bdist_egg', 'bdist_rpm',
     'bdist_wininst', 'install_egg_info', 'build_sphinx',
     'egg_info', 'easy_install', 'upload', 'bdist_wheel',
     '--single-version-externally-managed',
-])
+}
 if SETUPTOOLS_COMMANDS.intersection(sys.argv):
     import setuptools
 
@@ -63,8 +63,8 @@
         include_package_data=True,
         extras_require={
             'alldeps': (
-                'numpy >= {0}'.format(NUMPY_MIN_VERSION),
-                'scipy >= {0}'.format(SCIPY_MIN_VERSION),
+                'numpy >= {}'.format(NUMPY_MIN_VERSION),
+                'scipy >= {}'.format(SCIPY_MIN_VERSION),
             ),
         },
     )
@@ -111,7 +111,7 @@ def run(self):
 # to PyPI at release time.
 # The URL of the artifact repositories are configured in the setup.cfg file.
 
-WHEELHOUSE_UPLOADER_COMMANDS = set(['fetch_artifacts', 'upload_all'])
+WHEELHOUSE_UPLOADER_COMMANDS = {'fetch_artifacts', 'upload_all'}
 if WHEELHOUSE_UPLOADER_COMMANDS.intersection(sys.argv):
     import wheelhouse_uploader.cmd
 
@@ -190,8 +190,8 @@ def setup_package():
                                  ],
                     cmdclass=cmdclass,
                     install_requires=[
-                        'numpy>={0}'.format(NUMPY_MIN_VERSION),
-                        'scipy>={0}'.format(SCIPY_MIN_VERSION)
+                        'numpy>={}'.format(NUMPY_MIN_VERSION),
+                        'scipy>={}'.format(SCIPY_MIN_VERSION)
                     ],
                     **extra_setuptools_args)
 
@@ -214,7 +214,7 @@ def setup_package():
         metadata['version'] = VERSION
     else:
         numpy_status = get_numpy_status()
-        numpy_req_str = "scikit-learn requires NumPy >= {0}.\n".format(
+        numpy_req_str = "scikit-learn requires NumPy >= {}.\n".format(
             NUMPY_MIN_VERSION)
 
         instructions = ("Installation instructions are available on the "
@@ -224,12 +224,12 @@ def setup_package():
         if numpy_status['up_to_date'] is False:
             if numpy_status['version']:
                 raise ImportError("Your installation of Numerical Python "
-                                  "(NumPy) {0} is out-of-date.\n{1}{2}"
+                                  "(NumPy) {} is out-of-date.\n{}{}"
                                   .format(numpy_status['version'],
                                           numpy_req_str, instructions))
             else:
                 raise ImportError("Numerical Python (NumPy) is not "
-                                  "installed.\n{0}{1}"
+                                  "installed.\n{}{}"
                                   .format(numpy_req_str, instructions))
 
         from numpy.distutils.core import setup
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index aafc8a34b2a13..cb15c9bc1d650 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -26,7 +26,7 @@
 
 # Make sure that DeprecationWarning within this package always gets printed
 warnings.filterwarnings('always', category=DeprecationWarning,
-                        module=r'^{0}\.'.format(re.escape(__name__)))
+                        module=r'^{}\.'.format(re.escape(__name__)))
 
 # PEP0440 compatible formatted version, see:
 # https://www.python.org/dev/peps/pep-0440/
diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py
index 8629da1531c8a..8d250498989ff 100644
--- a/sklearn/_build_utils/__init__.py
+++ b/sklearn/_build_utils/__init__.py
@@ -74,7 +74,7 @@ def maybe_cythonize_extensions(top_path, config):
         try:
             import Cython
             if LooseVersion(Cython.__version__) < CYTHON_MIN_VERSION:
-                message += ' Your version of Cython was {0}.'.format(
+                message += ' Your version of Cython was {}.'.format(
                     Cython.__version__)
                 raise ValueError(message)
             from Cython.Build import cythonize
diff --git a/sklearn/base.py b/sklearn/base.py
index e7ea9428c29b9..e07068f502250 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -126,7 +126,7 @@ def _pprint(params, offset=0, printer=repr):
 
 
 ###############################################################################
-class BaseEstimator(object):
+class BaseEstimator:
     """Base class for all estimators in scikit-learn
 
     Notes
@@ -244,7 +244,7 @@ def __repr__(self):
 
     def __getstate__(self):
         try:
-            state = super(BaseEstimator, self).__getstate__()
+            state = super().__getstate__()
         except AttributeError:
             state = self.__dict__.copy()
 
@@ -258,19 +258,19 @@ def __setstate__(self, state):
             pickle_version = state.pop("_sklearn_version", "pre-0.18")
             if pickle_version != __version__:
                 warnings.warn(
-                    "Trying to unpickle estimator {0} from version {1} when "
-                    "using version {2}. This might lead to breaking code or "
+                    "Trying to unpickle estimator {} from version {} when "
+                    "using version {}. This might lead to breaking code or "
                     "invalid results. Use at your own risk.".format(
                         self.__class__.__name__, pickle_version, __version__),
                     UserWarning)
         try:
-            super(BaseEstimator, self).__setstate__(state)
+            super().__setstate__(state)
         except AttributeError:
             self.__dict__.update(state)
 
 
 ###############################################################################
-class ClassifierMixin(object):
+class ClassifierMixin:
     """Mixin class for all classifiers in scikit-learn."""
     _estimator_type = "classifier"
 
@@ -303,7 +303,7 @@ def score(self, X, y, sample_weight=None):
 
 
 ###############################################################################
-class RegressorMixin(object):
+class RegressorMixin:
     """Mixin class for all regression estimators in scikit-learn."""
     _estimator_type = "regressor"
 
@@ -344,7 +344,7 @@ def score(self, X, y, sample_weight=None):
 
 
 ###############################################################################
-class ClusterMixin(object):
+class ClusterMixin:
     """Mixin class for all cluster estimators in scikit-learn."""
     _estimator_type = "clusterer"
 
@@ -370,7 +370,7 @@ def fit_predict(self, X, y=None):
         return self.labels_
 
 
-class BiclusterMixin(object):
+class BiclusterMixin:
     """Mixin class for all bicluster estimators in scikit-learn"""
 
     @property
@@ -446,7 +446,7 @@ def get_submatrix(self, i, data):
 
 
 ###############################################################################
-class TransformerMixin(object):
+class TransformerMixin:
     """Mixin class for all transformers in scikit-learn."""
 
     def fit_transform(self, X, y=None, **fit_params):
@@ -479,7 +479,7 @@ def fit_transform(self, X, y=None, **fit_params):
             return self.fit(X, y, **fit_params).transform(X)
 
 
-class DensityMixin(object):
+class DensityMixin:
     """Mixin class for all density estimators in scikit-learn."""
     _estimator_type = "DensityEstimator"
 
@@ -497,7 +497,7 @@ def score(self, X, y=None):
         pass
 
 
-class OutlierMixin(object):
+class OutlierMixin:
     """Mixin class for all outlier detection estimators in scikit-learn."""
     _estimator_type = "outlier_detector"
 
@@ -524,7 +524,7 @@ def fit_predict(self, X, y=None):
 
 
 ###############################################################################
-class MetaEstimatorMixin(object):
+class MetaEstimatorMixin:
     """Mixin class for all meta estimators in scikit-learn."""
     # this is just a tag for the moment
 
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index b563cdee143cb..43948346da709 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -248,7 +248,7 @@ def predict(self, X):
         return self.classes_[np.argmax(self.predict_proba(X), axis=1)]
 
 
-class _CalibratedClassifier(object):
+class _CalibratedClassifier:
     """Probability calibration with isotonic regression or sigmoid.
 
     It assumes that base_estimator has already been fit, and trains the
diff --git a/sklearn/cluster/bicluster.py b/sklearn/cluster/bicluster.py
index 567b9c063f189..455abde6e1b78 100644
--- a/sklearn/cluster/bicluster.py
+++ b/sklearn/cluster/bicluster.py
@@ -103,8 +103,8 @@ def __init__(self, n_clusters=3, svd_method="randomized",
     def _check_parameters(self):
         legal_svd_methods = ('randomized', 'arpack')
         if self.svd_method not in legal_svd_methods:
-            raise ValueError("Unknown SVD method: '{0}'. svd_method must be"
-                             " one of {1}.".format(self.svd_method,
+            raise ValueError("Unknown SVD method: '{}'. svd_method must be"
+                             " one of {}.".format(self.svd_method,
                                                    legal_svd_methods))
 
     def fit(self, X, y=None):
@@ -281,7 +281,7 @@ class SpectralCoclustering(BaseSpectral):
     def __init__(self, n_clusters=3, svd_method='randomized',
                  n_svd_vecs=None, mini_batch=False, init='k-means++',
                  n_init=10, n_jobs=None, random_state=None):
-        super(SpectralCoclustering, self).__init__(n_clusters,
+        super().__init__(n_clusters,
                                                    svd_method,
                                                    n_svd_vecs,
                                                    mini_batch,
@@ -429,7 +429,7 @@ def __init__(self, n_clusters=3, method='bistochastic',
                  n_components=6, n_best=3, svd_method='randomized',
                  n_svd_vecs=None, mini_batch=False, init='k-means++',
                  n_init=10, n_jobs=None, random_state=None):
-        super(SpectralBiclustering, self).__init__(n_clusters,
+        super().__init__(n_clusters,
                                                    svd_method,
                                                    n_svd_vecs,
                                                    mini_batch,
@@ -442,11 +442,11 @@ def __init__(self, n_clusters=3, method='bistochastic',
         self.n_best = n_best
 
     def _check_parameters(self):
-        super(SpectralBiclustering, self)._check_parameters()
+        super()._check_parameters()
         legal_methods = ('bistochastic', 'scale', 'log')
         if self.method not in legal_methods:
-            raise ValueError("Unknown method: '{0}'. method must be"
-                             " one of {1}.".format(self.method, legal_methods))
+            raise ValueError("Unknown method: '{}'. method must be"
+                             " one of {}.".format(self.method, legal_methods))
         try:
             int(self.n_clusters)
         except TypeError:
diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
index 4b5f72ada9ad3..6ddccc16c1467 100644
--- a/sklearn/cluster/birch.py
+++ b/sklearn/cluster/birch.py
@@ -86,7 +86,7 @@ def _split_node(node, threshold, branching_factor):
     return new_subcluster1, new_subcluster2
 
 
-class _CFNode(object):
+class _CFNode:
     """Each node in a CFTree is called a CFNode.
 
     The CFNode can have a maximum of branching_factor
@@ -239,7 +239,7 @@ def insert_cf_subcluster(self, subcluster):
                 return True
 
 
-class _CFSubcluster(object):
+class _CFSubcluster:
     """Each subcluster in a CFNode is called a CFSubcluster.
 
     A CFSubcluster can have a CFNode has its child.
diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
index ebed5f9b34db2..b0cb1f68e6e44 100644
--- a/sklearn/cluster/hierarchical.py
+++ b/sklearn/cluster/hierarchical.py
@@ -929,7 +929,7 @@ def __init__(self, n_clusters=2, affinity="euclidean",
                  memory=None,
                  connectivity=None, compute_full_tree='auto',
                  linkage='ward', pooling_func=np.mean):
-        super(FeatureAgglomeration, self).__init__(
+        super().__init__(
             n_clusters=n_clusters, memory=memory, connectivity=connectivity,
             compute_full_tree=compute_full_tree, linkage=linkage,
             affinity=affinity)
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index ea31f042c0419..97e95f21c6035 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -1458,7 +1458,7 @@ def __init__(self, n_clusters=8, init='k-means++', max_iter=100,
                  random_state=None, tol=0.0, max_no_improvement=10,
                  init_size=None, n_init=3, reassignment_ratio=0.01):
 
-        super(MiniBatchKMeans, self).__init__(
+        super().__init__(
             n_clusters=n_clusters, init=init, max_iter=max_iter,
             verbose=verbose, random_state=random_state, tol=tol, n_init=n_init)
 
diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 9f47bae36aed5..5c5287e50bc73 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -725,7 +725,7 @@ def _automatic_cluster(reachability_plot, ordering,
     return root_node
 
 
-class _TreeNode(object):
+class _TreeNode:
     # automatic cluster helper classes and functions
     def __init__(self, points, start, end, parent_node):
         self.points = points
diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index 94176c16372dd..651f4d5bece49 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -122,14 +122,14 @@ def test_bin_seeds():
     # found
     ground_truth = {(1., 1.), (2., 1.), (0., 0.)}
     test_bins = get_bin_seeds(X, 1, 1)
-    test_result = set(tuple(p) for p in test_bins)
+    test_result = {tuple(p) for p in test_bins}
     assert len(ground_truth.symmetric_difference(test_result)) == 0
 
     # With a bin coarseness of 1.0 and min_bin_freq of 2, 2 bins should be
     # found
     ground_truth = {(1., 1.), (2., 1.)}
     test_bins = get_bin_seeds(X, 1, 2)
-    test_result = set(tuple(p) for p in test_bins)
+    test_result = {tuple(p) for p in test_bins}
     assert len(ground_truth.symmetric_difference(test_result)) == 0
 
     # With a bin size of 0.01 and min_bin_freq of 1, 6 bins should be found
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index ae1cf0cde3a57..03e6b5ddaa567 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -313,8 +313,8 @@ def named_transformers_(self):
 
         """
         # Use Bunch object to improve autocomplete
-        return Bunch(**dict([(name, trans) for name, trans, _
-                             in self.transformers_]))
+        return Bunch(**{name: trans for name, trans, _
+                        in self.transformers_})
 
     def get_feature_names(self):
         """Get feature names from all transformers.
@@ -374,7 +374,7 @@ def _validate_output(self, result):
         for Xs, name in zip(result, names):
             if not getattr(Xs, 'ndim', 0) == 2:
                 raise ValueError(
-                    "The output of the '{0}' transformer should be 2D (scipy "
+                    "The output of the '{}' transformer should be 2D (scipy "
                     "matrix, array, or pandas DataFrame).".format(name))
 
     def _fit_transform(self, X, y, func, fitted=False):
diff --git a/sklearn/compose/tests/test_target.py b/sklearn/compose/tests/test_target.py
index edfbd8ec108a9..9b0a4366cc1b6 100644
--- a/sklearn/compose/tests/test_target.py
+++ b/sklearn/compose/tests/test_target.py
@@ -246,11 +246,11 @@ class DummyCheckerListRegressor(DummyRegressor):
 
     def fit(self, X, y, sample_weight=None):
         assert isinstance(X, list)
-        return super(DummyCheckerListRegressor, self).fit(X, y, sample_weight)
+        return super().fit(X, y, sample_weight)
 
     def predict(self, X):
         assert isinstance(X, list)
-        return super(DummyCheckerListRegressor, self).predict(X)
+        return super().predict(X)
 
 
 def test_transform_target_regressor_ensure_y_array():
diff --git a/sklearn/covariance/elliptic_envelope.py b/sklearn/covariance/elliptic_envelope.py
index 8f1936aeb2f72..86ef3c7dcf921 100644
--- a/sklearn/covariance/elliptic_envelope.py
+++ b/sklearn/covariance/elliptic_envelope.py
@@ -88,7 +88,7 @@ class EllipticEnvelope(MinCovDet, OutlierMixin):
     def __init__(self, store_precision=True, assume_centered=False,
                  support_fraction=None, contamination=0.1,
                  random_state=None):
-        super(EllipticEnvelope, self).__init__(
+        super().__init__(
             store_precision=store_precision,
             assume_centered=assume_centered,
             support_fraction=support_fraction,
@@ -107,7 +107,7 @@ def fit(self, X, y=None):
             not used, present for API consistency by convention.
 
         """
-        super(EllipticEnvelope, self).fit(X)
+        super().fit(X)
         self.offset_ = np.percentile(-self.dist_, 100. * self.contamination)
         return self
 
diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
index 0e3ec397d4a5b..a59764c90bd04 100644
--- a/sklearn/covariance/graph_lasso_.py
+++ b/sklearn/covariance/graph_lasso_.py
@@ -337,7 +337,7 @@ class GraphicalLasso(EmpiricalCovariance):
 
     def __init__(self, alpha=.01, mode='cd', tol=1e-4, enet_tol=1e-4,
                  max_iter=100, verbose=False, assume_centered=False):
-        super(GraphicalLasso, self).__init__(assume_centered=assume_centered)
+        super().__init__(assume_centered=assume_centered)
         self.alpha = alpha
         self.mode = mode
         self.tol = tol
@@ -581,7 +581,7 @@ class GraphicalLassoCV(GraphicalLasso):
     def __init__(self, alphas=4, n_refinements=4, cv='warn', tol=1e-4,
                  enet_tol=1e-4, max_iter=100, mode='cd', n_jobs=None,
                  verbose=False, assume_centered=False):
-        super(GraphicalLassoCV, self).__init__(
+        super().__init__(
             mode=mode, tol=tol, verbose=verbose, enet_tol=enet_tol,
             max_iter=max_iter, assume_centered=assume_centered)
         self.alphas = alphas
diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
index 94804ccac60d6..e7ec2a84fff18 100644
--- a/sklearn/covariance/shrunk_covariance_.py
+++ b/sklearn/covariance/shrunk_covariance_.py
@@ -126,7 +126,7 @@ class ShrunkCovariance(EmpiricalCovariance):
     """
     def __init__(self, store_precision=True, assume_centered=False,
                  shrinkage=0.1):
-        super(ShrunkCovariance, self).__init__(store_precision=store_precision,
+        super().__init__(store_precision=store_precision,
                                                assume_centered=assume_centered)
         self.shrinkage = shrinkage
 
@@ -401,7 +401,7 @@ class LedoitWolf(EmpiricalCovariance):
     """
     def __init__(self, store_precision=True, assume_centered=False,
                  block_size=1000):
-        super(LedoitWolf, self).__init__(store_precision=store_precision,
+        super().__init__(store_precision=store_precision,
                                          assume_centered=assume_centered)
         self.block_size = block_size
 
diff --git a/sklearn/cross_decomposition/cca_.py b/sklearn/cross_decomposition/cca_.py
index 47ff08e27631c..7ff305ad7222c 100644
--- a/sklearn/cross_decomposition/cca_.py
+++ b/sklearn/cross_decomposition/cca_.py
@@ -101,7 +101,7 @@ class CCA(_PLS):
 
     def __init__(self, n_components=2, scale=True,
                  max_iter=500, tol=1e-06, copy=True):
-        super(CCA, self).__init__(n_components=n_components, scale=scale,
+        super().__init__(n_components=n_components, scale=scale,
                                   deflation_mode="canonical", mode="B",
                                   norm_y_weights=True, algorithm="nipals",
                                   max_iter=max_iter, tol=tol, copy=copy)
diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py
index ea35089de6637..459f297e4be2b 100644
--- a/sklearn/cross_decomposition/pls_.py
+++ b/sklearn/cross_decomposition/pls_.py
@@ -588,7 +588,7 @@ class PLSRegression(_PLS):
 
     def __init__(self, n_components=2, scale=True,
                  max_iter=500, tol=1e-06, copy=True):
-        super(PLSRegression, self).__init__(
+        super().__init__(
             n_components=n_components, scale=scale,
             deflation_mode="regression", mode="A",
             norm_y_weights=False, max_iter=max_iter, tol=tol,
@@ -734,7 +734,7 @@ class PLSCanonical(_PLS):
 
     def __init__(self, n_components=2, scale=True, algorithm="nipals",
                  max_iter=500, tol=1e-06, copy=True):
-        super(PLSCanonical, self).__init__(
+        super().__init__(
             n_components=n_components, scale=scale,
             deflation_mode="canonical", mode="A",
             norm_y_weights=True, algorithm=algorithm,
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index fa195ce030298..be1dec6ac1c13 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -611,7 +611,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         raise ValueError('Mix of nominal and non-nominal targets is not '
                          'currently supported')
 
-    description = u"{}\n\nDownloaded from openml.org.".format(
+    description = "{}\n\nDownloaded from openml.org.".format(
         data_description.pop('description'))
 
     # reshape y back to 1-D array, if there is only 1 target column; back
diff --git a/sklearn/datasets/rcv1.py b/sklearn/datasets/rcv1.py
index 66566c389baf3..965ff1fbcb063 100644
--- a/sklearn/datasets/rcv1.py
+++ b/sklearn/datasets/rcv1.py
@@ -208,7 +208,7 @@ def fetch_rcv1(data_home=None, subset='all', download_if_missing=True,
         category_names = {}
         with GzipFile(filename=topics_archive_path, mode='rb') as f:
             for line in f:
-                line_components = line.decode("ascii").split(u" ")
+                line_components = line.decode("ascii").split(" ")
                 if len(line_components) == 3:
                     cat, doc, _ = line_components
                     if cat not in category_names:
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 4cda24c7398b2..2f58d746f7c6c 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -152,7 +152,7 @@ def _monkey_patch_webbased_functions(context,
     path_suffix = '.gz'
     read_fn = gzip.open
 
-    class MockHTTPResponse(object):
+    class MockHTTPResponse:
         def __init__(self, data, is_gzip):
             self.data = data
             self.is_gzip = is_gzip
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index 57627cc834c95..ea12c9f8e3a12 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -38,7 +38,7 @@ def test_fetch_rcv1():
     assert_equal(103, len(cat_list))
 
     # test ordering of categories
-    first_categories = [u'C11', u'C12', u'C13', u'C14', u'C15', u'C151']
+    first_categories = ['C11', 'C12', 'C13', 'C14', 'C15', 'C151']
     assert_array_equal(first_categories, cat_list[:6])
 
     # test number of sample for some categories
diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py
index c8b0fbd571145..39c73aff2e41c 100644
--- a/sklearn/datasets/tests/test_samples_generator.py
+++ b/sklearn/datasets/tests/test_samples_generator.py
@@ -101,7 +101,7 @@ def test_make_classification_informative_features():
 
             # Cluster by sign, viewed as strings to allow uniquing
             signs = np.sign(X)
-            signs = signs.view(dtype='|S{0}'.format(signs.strides[0]))
+            signs = signs.view(dtype='|S{}'.format(signs.strides[0]))
             unique_signs, cluster_index = np.unique(signs,
                                                     return_inverse=True)
 
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index f9ee5e42fbbc8..df2786e3a1c92 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -463,7 +463,7 @@ def my_g(x):
     def __init__(self, n_components=None, algorithm='parallel', whiten=True,
                  fun='logcosh', fun_args=None, max_iter=200, tol=1e-4,
                  w_init=None, random_state=None):
-        super(FastICA, self).__init__()
+        super().__init__()
         if max_iter < 1:
             raise ValueError("max_iter should be greater than 1, got "
                              "(max_iter={})".format(max_iter))
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 31e99c024ae9a..ebb237ebd4904 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -405,7 +405,7 @@ def _fit(self, X):
         elif self._fit_svd_solver in ['arpack', 'randomized']:
             return self._fit_truncated(X, n_components, self._fit_svd_solver)
         else:
-            raise ValueError("Unrecognized svd_solver='{0}'"
+            raise ValueError("Unrecognized svd_solver='{}'"
                              "".format(self._fit_svd_solver))
 
     def _fit_full(self, X, n_components):
diff --git a/sklearn/decomposition/sparse_pca.py b/sklearn/decomposition/sparse_pca.py
index 5bc2107f7f31c..499442e52886b 100644
--- a/sklearn/decomposition/sparse_pca.py
+++ b/sklearn/decomposition/sparse_pca.py
@@ -348,7 +348,7 @@ def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01,
                  n_iter=100, callback=None, batch_size=3, verbose=False,
                  shuffle=True, n_jobs=None, method='lars', random_state=None,
                  normalize_components=False):
-        super(MiniBatchSparsePCA, self).__init__(
+        super().__init__(
             n_components=n_components, alpha=alpha, verbose=verbose,
             ridge_alpha=ridge_alpha, n_jobs=n_jobs, method=method,
             random_state=random_state,
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index fc9001258eb44..afc553bed3ee8 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -349,7 +349,7 @@ def score(self, X, y, sample_weight=None):
         """
         if X is None:
             X = np.zeros(shape=(len(y), 1))
-        return super(DummyClassifier, self).score(X, y, sample_weight)
+        return super().score(X, y, sample_weight)
 
 
 class DummyRegressor(BaseEstimator, RegressorMixin):
@@ -548,4 +548,4 @@ def score(self, X, y, sample_weight=None):
         """
         if X is None:
             X = np.zeros(shape=(len(y), 1))
-        return super(DummyRegressor, self).score(X, y, sample_weight)
+        return super().score(X, y, sample_weight)
diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index 754193cfe2913..aece56b4fd049 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -202,7 +202,7 @@ def __init__(self,
                  n_jobs=None,
                  random_state=None,
                  verbose=0):
-        super(BaseBagging, self).__init__(
+        super().__init__(
             base_estimator=base_estimator,
             n_estimators=n_estimators)
 
@@ -565,7 +565,7 @@ def __init__(self,
                  random_state=None,
                  verbose=0):
 
-        super(BaggingClassifier, self).__init__(
+        super().__init__(
             base_estimator,
             n_estimators=n_estimators,
             max_samples=max_samples,
@@ -580,7 +580,7 @@ def __init__(self,
 
     def _validate_estimator(self):
         """Check the estimator and set the base_estimator_ attribute."""
-        super(BaggingClassifier, self)._validate_estimator(
+        super()._validate_estimator(
             default=DecisionTreeClassifier())
 
     def _set_oob_score(self, X, y):
@@ -681,8 +681,8 @@ def predict_proba(self, X):
 
         if self.n_features_ != X.shape[1]:
             raise ValueError("Number of features of the model must "
-                             "match the input. Model n_features is {0} and "
-                             "input n_features is {1}."
+                             "match the input. Model n_features is {} and "
+                             "input n_features is {}."
                              "".format(self.n_features_, X.shape[1]))
 
         # Parallel loop
@@ -732,8 +732,8 @@ def predict_log_proba(self, X):
 
             if self.n_features_ != X.shape[1]:
                 raise ValueError("Number of features of the model must "
-                                 "match the input. Model n_features is {0} "
-                                 "and input n_features is {1} "
+                                 "match the input. Model n_features is {} "
+                                 "and input n_features is {} "
                                  "".format(self.n_features_, X.shape[1]))
 
             # Parallel loop
@@ -790,8 +790,8 @@ def decision_function(self, X):
 
         if self.n_features_ != X.shape[1]:
             raise ValueError("Number of features of the model must "
-                             "match the input. Model n_features is {0} and "
-                             "input n_features is {1} "
+                             "match the input. Model n_features is {} and "
+                             "input n_features is {} "
                              "".format(self.n_features_, X.shape[1]))
 
         # Parallel loop
@@ -935,7 +935,7 @@ def __init__(self,
                  n_jobs=None,
                  random_state=None,
                  verbose=0):
-        super(BaggingRegressor, self).__init__(
+        super().__init__(
             base_estimator,
             n_estimators=n_estimators,
             max_samples=max_samples,
@@ -990,7 +990,7 @@ def predict(self, X):
 
     def _validate_estimator(self):
         """Check the estimator and set the base_estimator_ attribute."""
-        super(BaggingRegressor, self)._validate_estimator(
+        super()._validate_estimator(
             default=DecisionTreeRegressor())
 
     def _set_oob_score(self, X, y):
diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
index 1ca2ef8b20492..7cdb3f09755b8 100644
--- a/sklearn/ensemble/base.py
+++ b/sklearn/ensemble/base.py
@@ -101,11 +101,11 @@ def _validate_estimator(self, default=None):
         `base_estimator_` attribute."""
         if not isinstance(self.n_estimators, (numbers.Integral, np.integer)):
             raise ValueError("n_estimators must be an integer, "
-                             "got {0}.".format(type(self.n_estimators)))
+                             "got {}.".format(type(self.n_estimators)))
 
         if self.n_estimators <= 0:
             raise ValueError("n_estimators must be greater than zero, "
-                             "got {0}.".format(self.n_estimators))
+                             "got {}.".format(self.n_estimators))
 
         if self.base_estimator is not None:
             self.base_estimator_ = self.base_estimator
@@ -122,8 +122,8 @@ def _make_estimator(self, append=True, random_state=None):
         sub-estimators.
         """
         estimator = clone(self.base_estimator_)
-        estimator.set_params(**dict((p, getattr(self, p))
-                                    for p in self.estimator_params))
+        estimator.set_params(**{p: getattr(self, p)
+                                for p in self.estimator_params})
 
         if random_state is not None:
             _set_random_states(estimator, random_state)
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 6f6a9fa8e0cd8..6d6091d869bad 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -141,7 +141,7 @@ def __init__(self,
                  verbose=0,
                  warm_start=False,
                  class_weight=None):
-        super(BaseForest, self).__init__(
+        super().__init__(
             base_estimator=base_estimator,
             n_estimators=n_estimators,
             estimator_params=estimator_params)
@@ -411,7 +411,7 @@ def __init__(self,
                  verbose=0,
                  warm_start=False,
                  class_weight=None):
-        super(ForestClassifier, self).__init__(
+        super().__init__(
             base_estimator,
             n_estimators=n_estimators,
             estimator_params=estimator_params,
@@ -651,7 +651,7 @@ def __init__(self,
                  random_state=None,
                  verbose=0,
                  warm_start=False):
-        super(ForestRegressor, self).__init__(
+        super().__init__(
             base_estimator,
             n_estimators=n_estimators,
             estimator_params=estimator_params,
@@ -1008,7 +1008,7 @@ def __init__(self,
                  verbose=0,
                  warm_start=False,
                  class_weight=None):
-        super(RandomForestClassifier, self).__init__(
+        super().__init__(
             base_estimator=DecisionTreeClassifier(),
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
@@ -1267,7 +1267,7 @@ def __init__(self,
                  random_state=None,
                  verbose=0,
                  warm_start=False):
-        super(RandomForestRegressor, self).__init__(
+        super().__init__(
             base_estimator=DecisionTreeRegressor(),
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
@@ -1523,7 +1523,7 @@ def __init__(self,
                  verbose=0,
                  warm_start=False,
                  class_weight=None):
-        super(ExtraTreesClassifier, self).__init__(
+        super().__init__(
             base_estimator=ExtraTreeClassifier(),
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
@@ -1746,7 +1746,7 @@ def __init__(self,
                  random_state=None,
                  verbose=0,
                  warm_start=False):
-        super(ExtraTreesRegressor, self).__init__(
+        super().__init__(
             base_estimator=ExtraTreeRegressor(),
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
@@ -1921,7 +1921,7 @@ def __init__(self,
                  random_state=None,
                  verbose=0,
                  warm_start=False):
-        super(RandomTreesEmbedding, self).__init__(
+        super().__init__(
             base_estimator=ExtraTreeRegressor(),
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
@@ -2002,7 +2002,7 @@ def fit_transform(self, X, y=None, sample_weight=None):
 
         rnd = check_random_state(self.random_state)
         y = rnd.uniform(size=X.shape[0])
-        super(RandomTreesEmbedding, self).fit(X, y,
+        super().fit(X, y,
                                               sample_weight=sample_weight)
 
         self.one_hot_encoder_ = OneHotEncoder(sparse=self.sparse_output,
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 7d0b3e451e34f..ac6f949bf3899 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -61,7 +61,7 @@
 from ..exceptions import NotFittedError
 
 
-class QuantileEstimator(object):
+class QuantileEstimator:
     """An estimator predicting the alpha-quantile of the training targets.
 
     Parameters
@@ -114,7 +114,7 @@ def predict(self, X):
         return y
 
 
-class MeanEstimator(object):
+class MeanEstimator:
     """An estimator predicting the mean of the training targets."""
     def fit(self, X, y, sample_weight=None):
         """Fit the estimator.
@@ -155,7 +155,7 @@ def predict(self, X):
         return y
 
 
-class LogOddsEstimator(object):
+class LogOddsEstimator:
     """An estimator predicting the log odds ratio."""
     scale = 1.0
 
@@ -210,7 +210,7 @@ class ScaledLogOddsEstimator(LogOddsEstimator):
     scale = 0.5
 
 
-class PriorProbabilityEstimator(object):
+class PriorProbabilityEstimator:
     """An estimator predicting the probability of each
     class in the training data.
     """
@@ -253,7 +253,7 @@ def predict(self, X):
         return y
 
 
-class ZeroEstimator(object):
+class ZeroEstimator:
     """An estimator that simply predicts zero. """
 
     def fit(self, X, y, sample_weight=None):
@@ -299,7 +299,7 @@ def predict(self, X):
         return y
 
 
-class LossFunction(object, metaclass=ABCMeta):
+class LossFunction(metaclass=ABCMeta):
     """Abstract base class for various loss functions.
 
     Parameters
@@ -418,7 +418,7 @@ def __init__(self, n_classes):
         if n_classes != 1:
             raise ValueError("``n_classes`` must be 1 for regression but "
                              "was %r" % n_classes)
-        super(RegressionLossFunction, self).__init__(n_classes)
+        super().__init__(n_classes)
 
 
 class LeastSquaresError(RegressionLossFunction):
@@ -580,7 +580,7 @@ class HuberLossFunction(RegressionLossFunction):
     """
 
     def __init__(self, n_classes, alpha=0.9):
-        super(HuberLossFunction, self).__init__(n_classes)
+        super().__init__(n_classes)
         self.alpha = alpha
         self.gamma = None
 
@@ -678,7 +678,7 @@ class QuantileLossFunction(RegressionLossFunction):
         The percentile
     """
     def __init__(self, n_classes, alpha=0.9):
-        super(QuantileLossFunction, self).__init__(n_classes)
+        super().__init__(n_classes)
         self.alpha = alpha
         self.percentile = alpha * 100.0
 
@@ -771,10 +771,10 @@ class BinomialDeviance(ClassificationLossFunction):
     """
     def __init__(self, n_classes):
         if n_classes != 2:
-            raise ValueError("{0:s} requires 2 classes; got {1:d} class(es)"
+            raise ValueError("{:s} requires 2 classes; got {:d} class(es)"
                              .format(self.__class__.__name__, n_classes))
         # we only need to fit one tree for binary clf.
-        super(BinomialDeviance, self).__init__(1)
+        super().__init__(1)
 
     def init_estimator(self):
         return LogOddsEstimator()
@@ -865,9 +865,9 @@ class MultinomialDeviance(ClassificationLossFunction):
 
     def __init__(self, n_classes):
         if n_classes < 3:
-            raise ValueError("{0:s} requires more than 2 classes.".format(
+            raise ValueError("{:s} requires more than 2 classes.".format(
                 self.__class__.__name__))
-        super(MultinomialDeviance, self).__init__(n_classes)
+        super().__init__(n_classes)
 
     def init_estimator(self):
         return PriorProbabilityEstimator()
@@ -960,10 +960,10 @@ class ExponentialLoss(ClassificationLossFunction):
     """
     def __init__(self, n_classes):
         if n_classes != 2:
-            raise ValueError("{0:s} requires 2 classes; got {1:d} class(es)"
+            raise ValueError("{:s} requires 2 classes; got {:d} class(es)"
                              .format(self.__class__.__name__, n_classes))
         # we only need to fit one tree for binary clf.
-        super(ExponentialLoss, self).__init__(1)
+        super().__init__(1)
 
     def init_estimator(self):
         return ScaledLogOddsEstimator()
@@ -1043,7 +1043,7 @@ def _score_to_decision(self, score):
 INIT_ESTIMATORS = {'zero': ZeroEstimator}
 
 
-class VerboseReporter(object):
+class VerboseReporter:
     """Reports verbose output to stdout.
 
     Parameters
@@ -1106,9 +1106,9 @@ def update(self, j, est):
             remaining_time = ((est.n_estimators - (j + 1)) *
                               (time() - self.start_time) / float(i + 1))
             if remaining_time > 60:
-                remaining_time = '{0:.2f}m'.format(remaining_time / 60.0)
+                remaining_time = '{:.2f}m'.format(remaining_time / 60.0)
             else:
-                remaining_time = '{0:.2f}s'.format(remaining_time)
+                remaining_time = '{:.2f}s'.format(remaining_time)
             print(self.verbose_fmt.format(iter=j + 1,
                                           train_score=est.train_score_[j],
                                           oob_impr=oob_impr,
@@ -1220,7 +1220,7 @@ def _check_params(self):
 
         if (self.loss not in self._SUPPORTED_LOSS
                 or self.loss not in LOSS_FUNCTIONS):
-            raise ValueError("Loss '{0:s}' not supported. ".format(self.loss))
+            raise ValueError("Loss '{:s}' not supported. ".format(self.loss))
 
         if self.loss == 'deviance':
             loss_class = (MultinomialDeviance
@@ -1580,7 +1580,7 @@ def _init_decision_function(self, X):
         self._check_initialized()
         X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)
         if X.shape[1] != self.n_features_:
-            raise ValueError("X.shape[1] should be {0:d}, not {1:d}.".format(
+            raise ValueError("X.shape[1] should be {:d}, not {:d}.".format(
                 self.n_features_, X.shape[1]))
         score = self.init_.predict(X).astype(np.float64)
         return score
@@ -1948,7 +1948,7 @@ def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
                  presort='auto', validation_fraction=0.1,
                  n_iter_no_change=None, tol=1e-4):
 
-        super(GradientBoostingClassifier, self).__init__(
+        super().__init__(
             loss=loss, learning_rate=learning_rate, n_estimators=n_estimators,
             criterion=criterion, min_samples_split=min_samples_split,
             min_samples_leaf=min_samples_leaf,
@@ -2402,7 +2402,7 @@ def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
                  warm_start=False, presort='auto', validation_fraction=0.1,
                  n_iter_no_change=None, tol=1e-4):
 
-        super(GradientBoostingRegressor, self).__init__(
+        super().__init__(
             loss=loss, learning_rate=learning_rate, n_estimators=n_estimators,
             criterion=criterion, min_samples_split=min_samples_split,
             min_samples_leaf=min_samples_leaf,
@@ -2474,6 +2474,6 @@ def apply(self, X):
             return the index of the leaf x ends up in each estimator.
         """
 
-        leaves = super(GradientBoostingRegressor, self).apply(X)
+        leaves = super().apply(X)
         leaves = leaves.reshape(X.shape[0], self.estimators_.shape[0])
         return leaves
diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py
index 9ca7af6ab9eaf..8490f046cf540 100644
--- a/sklearn/ensemble/iforest.py
+++ b/sklearn/ensemble/iforest.py
@@ -165,7 +165,7 @@ def __init__(self,
                  behaviour='old',
                  random_state=None,
                  verbose=0):
-        super(IsolationForest, self).__init__(
+        super().__init__(
             base_estimator=ExtraTreeRegressor(
                 max_features=1,
                 splitter='random',
@@ -266,7 +266,7 @@ def fit(self, X, y=None, sample_weight=None):
 
         self.max_samples_ = max_samples
         max_depth = int(np.ceil(np.log2(max(max_samples, 2))))
-        super(IsolationForest, self)._fit(X, y, max_samples,
+        super()._fit(X, y, max_samples,
                                           max_depth=max_depth,
                                           sample_weight=sample_weight)
 
@@ -381,8 +381,8 @@ def score_samples(self, X):
         X = check_array(X, accept_sparse='csr')
         if self.n_features_ != X.shape[1]:
             raise ValueError("Number of features of the model must "
-                             "match the input. Model n_features is {0} and "
-                             "input n_features is {1}."
+                             "match the input. Model n_features is {} and "
+                             "input n_features is {}."
                              "".format(self.n_features_, X.shape[1]))
         n_samples = X.shape[0]
 
diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py
index 9460b29184df7..7e0e978590402 100644
--- a/sklearn/ensemble/partial_dependence.py
+++ b/sklearn/ensemble/partial_dependence.py
@@ -307,7 +307,7 @@ def convert_feature(fx):
             names.append(l)
     except IndexError:
         raise ValueError('All entries of features must be less than '
-                         'len(feature_names) = {0}, got {1}.'
+                         'len(feature_names) = {}, got {}.'
                          .format(len(feature_names), i))
 
     # compute PD functions
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index a31c75c7524f6..67d09a449c35b 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -85,7 +85,7 @@ class CustomSVC(SVC):
         """SVC variant that records the nature of the training set"""
 
         def fit(self, X, y):
-            super(CustomSVC, self).fit(X, y)
+            super().fit(X, y)
             self.data_type_ = type(X)
             return self
 
@@ -173,7 +173,7 @@ class CustomSVR(SVR):
         """SVC variant that records the nature of the training set"""
 
         def fit(self, X, y):
-            super(CustomSVR, self).fit(X, y)
+            super().fit(X, y)
             self.data_type_ = type(X)
             return self
 
@@ -477,8 +477,8 @@ def test_parallel_classification():
 
     X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1))))
     assert_raise_message(ValueError, "Number of features of the model "
-                         "must match the input. Model n_features is {0} "
-                         "and input n_features is {1} "
+                         "must match the input. Model n_features is {} "
+                         "and input n_features is {} "
                          "".format(X_test.shape[1], X_err.shape[1]),
                          ensemble.decision_function, X_err)
 
@@ -631,8 +631,8 @@ def test_warm_start(random_state=42):
                                   warm_start=False)
     clf_no_ws.fit(X, y)
 
-    assert_equal(set([tree.random_state for tree in clf_ws]),
-                 set([tree.random_state for tree in clf_no_ws]))
+    assert_equal({tree.random_state for tree in clf_ws},
+                 {tree.random_state for tree in clf_no_ws})
 
 
 def test_warm_start_smaller_n_estimators():
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index 22b11be2a0b8a..60bc1fa25184b 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -776,7 +776,7 @@ def check_min_samples_split(name):
     node_samples = est.estimators_[0].tree_.n_node_samples[node_idx]
 
     assert_greater(np.min(node_samples), len(X) * 0.5 - 1,
-                   "Failed with {0}".format(name))
+                   "Failed with {}".format(name))
 
     est = ForestEstimator(min_samples_split=0.5, n_estimators=1,
                           random_state=0)
@@ -785,7 +785,7 @@ def check_min_samples_split(name):
     node_samples = est.estimators_[0].tree_.n_node_samples[node_idx]
 
     assert_greater(np.min(node_samples), len(X) * 0.5 - 1,
-                   "Failed with {0}".format(name))
+                   "Failed with {}".format(name))
 
 
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
@@ -813,7 +813,7 @@ def check_min_samples_leaf(name):
     # drop inner nodes
     leaf_count = node_counts[node_counts != 0]
     assert_greater(np.min(leaf_count), 4,
-                   "Failed with {0}".format(name))
+                   "Failed with {}".format(name))
 
     est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1,
                           random_state=0)
@@ -823,7 +823,7 @@ def check_min_samples_leaf(name):
     # drop inner nodes
     leaf_count = node_counts[node_counts != 0]
     assert_greater(np.min(leaf_count), len(X) * 0.25 - 1,
-                   "Failed with {0}".format(name))
+                   "Failed with {}".format(name))
 
 
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
@@ -858,8 +858,8 @@ def check_min_weight_fraction_leaf(name):
         assert_greater_equal(
             np.min(leaf_weights),
             total_weight * est.min_weight_fraction_leaf,
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}".format(
+            "Failed with {} "
+            "min_weight_fraction_leaf={}".format(
                 name, est.min_weight_fraction_leaf))
 
 
@@ -1108,11 +1108,11 @@ def check_warm_start(name, random_state=42):
                                 warm_start=False)
     clf_no_ws.fit(X, y)
 
-    assert_equal(set([tree.random_state for tree in clf_ws]),
-                 set([tree.random_state for tree in clf_no_ws]))
+    assert_equal({tree.random_state for tree in clf_ws},
+                 {tree.random_state for tree in clf_no_ws})
 
     assert_array_equal(clf_ws.apply(X), clf_no_ws.apply(X),
-                       err_msg="Failed with {0}".format(name))
+                       err_msg="Failed with {}".format(name))
 
 
 @pytest.mark.parametrize('name', FOREST_ESTIMATORS)
@@ -1311,11 +1311,11 @@ def test_nestimators_future_warning(forest):
 class MyBackend(DEFAULT_JOBLIB_BACKEND):
     def __init__(self, *args, **kwargs):
         self.count = 0
-        super(MyBackend, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
 
     def start_call(self):
         self.count += 1
-        return super(MyBackend, self).start_call()
+        return super().start_call()
 
 
 register_parallel_backend('testing', MyBackend)
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index d61e55262a3bb..053ba7450cbb1 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -59,7 +59,7 @@ def test_samme_proba():
 
     # _samme_proba calls estimator.predict_proba.
     # Make a mock object so I can control what gets returned.
-    class MockEstimator(object):
+    class MockEstimator:
         def predict_proba(self, X):
             assert_array_equal(X.shape, probs.shape)
             return probs
@@ -127,7 +127,7 @@ def test_iris():
         # Check we used multiple estimators
         assert_greater(len(clf.estimators_), 1)
         # Check for distinct random states (see issue #7408)
-        assert_equal(len(set(est.random_state for est in clf.estimators_)),
+        assert_equal(len({est.random_state for est in clf.estimators_}),
                      len(clf.estimators_))
 
     # Somewhat hacky regression test: prior to
@@ -148,7 +148,7 @@ def test_boston():
     # Check we used multiple estimators
     assert len(reg.estimators_) > 1
     # Check for distinct random states (see issue #7408)
-    assert_equal(len(set(est.random_state for est in reg.estimators_)),
+    assert_equal(len({est.random_state for est in reg.estimators_}),
                  len(reg.estimators_))
 
 
@@ -328,7 +328,7 @@ class CustomSVC(SVC):
 
         def fit(self, X, y, sample_weight=None):
             """Modification on fit caries data type for later verification."""
-            super(CustomSVC, self).fit(X, y, sample_weight=sample_weight)
+            super().fit(X, y, sample_weight=sample_weight)
             self.data_type_ = type(X)
             return self
 
@@ -425,7 +425,7 @@ class CustomSVR(SVR):
 
         def fit(self, X, y, sample_weight=None):
             """Modification on fit caries data type for later verification."""
-            super(CustomSVR, self).fit(X, y, sample_weight=sample_weight)
+            super().fit(X, y, sample_weight=sample_weight)
             self.data_type_ = type(X)
             return self
 
diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
index 39df07bb27468..6c7dd84547df2 100644
--- a/sklearn/ensemble/voting_classifier.py
+++ b/sklearn/ensemble/voting_classifier.py
@@ -327,7 +327,7 @@ def set_params(self, **params):
         eclf.set_params(rf=None)
 
         """
-        super(VotingClassifier, self)._set_params('estimators', **params)
+        super()._set_params('estimators', **params)
         return self
 
     def get_params(self, deep=True):
@@ -339,8 +339,7 @@ def get_params(self, deep=True):
             Setting it to True gets the various classifiers and the parameters
             of the classifiers as well
         """
-        return super(VotingClassifier,
-                     self)._get_params('estimators', deep=deep)
+        return super()._get_params('estimators', deep=deep)
 
     def _predict(self, X):
         """Collect results from clf.predict calls. """
diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
index 2145724b08b5b..f82a0d882b873 100644
--- a/sklearn/ensemble/weight_boosting.py
+++ b/sklearn/ensemble/weight_boosting.py
@@ -62,7 +62,7 @@ def __init__(self,
                  learning_rate=1.,
                  random_state=None):
 
-        super(BaseWeightBoosting, self).__init__(
+        super().__init__(
             base_estimator=base_estimator,
             n_estimators=n_estimators,
             estimator_params=estimator_params)
@@ -376,7 +376,7 @@ def __init__(self,
                  algorithm='SAMME.R',
                  random_state=None):
 
-        super(AdaBoostClassifier, self).__init__(
+        super().__init__(
             base_estimator=base_estimator,
             n_estimators=n_estimators,
             learning_rate=learning_rate,
@@ -409,11 +409,11 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("algorithm %s is not supported" % self.algorithm)
 
         # Fit
-        return super(AdaBoostClassifier, self).fit(X, y, sample_weight)
+        return super().fit(X, y, sample_weight)
 
     def _validate_estimator(self):
         """Check the estimator and set the base_estimator_ attribute."""
-        super(AdaBoostClassifier, self)._validate_estimator(
+        super()._validate_estimator(
             default=DecisionTreeClassifier(max_depth=1))
 
         #  SAMME-R requires predict_proba-enabled base estimators
@@ -921,7 +921,7 @@ def __init__(self,
                  loss='linear',
                  random_state=None):
 
-        super(AdaBoostRegressor, self).__init__(
+        super().__init__(
             base_estimator=base_estimator,
             n_estimators=n_estimators,
             learning_rate=learning_rate,
@@ -956,11 +956,11 @@ def fit(self, X, y, sample_weight=None):
                 "loss must be 'linear', 'square', or 'exponential'")
 
         # Fit
-        return super(AdaBoostRegressor, self).fit(X, y, sample_weight)
+        return super().fit(X, y, sample_weight)
 
     def _validate_estimator(self):
         """Check the estimator and set the base_estimator_ attribute."""
-        super(AdaBoostRegressor, self)._validate_estimator(
+        super()._validate_estimator(
             default=DecisionTreeRegressor(max_depth=3))
 
     def _boost(self, iboost, X, y, sample_weight, random_state):
diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py
index 82f504542f9a9..07354e70cd78f 100644
--- a/sklearn/externals/_arff.py
+++ b/sklearn/externals/_arff.py
@@ -303,7 +303,7 @@ class BadAttributeFormat(ArffException):
 class BadDataFormat(ArffException):
     '''Error raised when some data instance is in an invalid format.'''
     def __init__(self, value):
-        super(BadDataFormat, self).__init__()
+        super().__init__()
         self.message = (
             'Bad @DATA instance format in line %d: ' +
             ('%s' % value)
@@ -319,7 +319,7 @@ class BadAttributeName(ArffException):
     declaration.'''
 
     def __init__(self, value, value2):
-        super(BadAttributeName, self).__init__()
+        super().__init__()
         self.message = (
             ('Bad @ATTRIBUTE name %s at line' % value) +
             ' %d, this name is already in use in line' +
@@ -331,7 +331,7 @@ class BadNominalValue(ArffException):
     declared into it respective attribute declaration.'''
 
     def __init__(self, value):
-        super(BadNominalValue, self).__init__()
+        super().__init__()
         self.message = (
             ('Data value %s not found in nominal declaration, ' % value)
             + 'at line %d.'
@@ -340,7 +340,7 @@ def __init__(self, value):
 class BadNominalFormatting(ArffException):
     '''Error raised when a nominal value with space is not properly quoted.'''
     def __init__(self, value):
-        super(BadNominalFormatting, self).__init__()
+        super().__init__()
         self.message = (
             ('Nominal data value "%s" not properly quoted in line ' % value) +
             '%d.'
@@ -360,7 +360,7 @@ class BadLayout(ArffException):
     message = 'Invalid layout of the ARFF file, at line %d.'
 
     def __init__(self, msg=''):
-        super(BadLayout, self).__init__()
+        super().__init__()
         if msg:
             self.message = BadLayout.message + ' ' + msg.replace('%', '%%')
 
@@ -384,11 +384,11 @@ def __str__(self):
 # INTERNAL ====================================================================
 def encode_string(s):
     if _RE_QUOTE_CHARS.search(s):
-        return u"'%s'" % _RE_ESCAPE_CHARS.sub(r'\\', s)
+        return "'%s'" % _RE_ESCAPE_CHARS.sub(r'\\', s)
     return s
 
 
-class EncodedNominalConversor(object):
+class EncodedNominalConversor:
     def __init__(self, values):
         self.values = {v: i for i, v in enumerate(values)}
         self.values[0] = 0
@@ -400,7 +400,7 @@ def __call__(self, value):
             raise BadNominalValue(value)
 
 
-class NominalConversor(object):
+class NominalConversor:
     def __init__(self, values):
         self.values = set(values)
         self.zero_value = values[0]
@@ -417,7 +417,7 @@ def __call__(self, value):
         return unicode(value)
 
 
-class Data(object):
+class Data:
     '''Internal helper class to allow for different matrix types without
     making the code a huge collection of if statements.'''
     def __init__(self):
@@ -475,14 +475,14 @@ def encode_data(self, data, attributes):
 
             new_data = []
             for value in inst:
-                if value is None or value == u'' or value != value:
+                if value is None or value == '' or value != value:
                     s = '?'
                 else:
                     s = encode_string(unicode(value))
                 new_data.append(s)
 
             current_row += 1
-            yield u','.join(new_data)
+            yield ','.join(new_data)
 
 class COOData(Data):
     def __init__(self):
@@ -532,7 +532,7 @@ def encode_data(self, data, attributes):
             if row > current_row:
                 # Add empty rows if necessary
                 while current_row < row:
-                    yield " ".join([u"{", u','.join(new_data), u"}"])
+                    yield " ".join(["{", ','.join(new_data), "}"])
                     new_data = []
                     current_row += 1
 
@@ -542,13 +542,13 @@ def encode_data(self, data, attributes):
                     (current_row, col + 1, num_attributes)
                 )
 
-            if v is None or v == u'' or v != v:
+            if v is None or v == '' or v != v:
                 s = '?'
             else:
                 s = encode_string(unicode(v))
             new_data.append("%d %s" % (col, s))
 
-        yield " ".join([u"{", u','.join(new_data), u"}"])
+        yield " ".join(["{", ','.join(new_data), "}"])
 
 class LODData(Data):
     def __init__(self):
@@ -586,14 +586,14 @@ def encode_data(self, data, attributes):
 
             for col in sorted(row):
                 v = row[col]
-                if v is None or v == u'' or v != v:
+                if v is None or v == '' or v != v:
                     s = '?'
                 else:
                     s = encode_string(unicode(v))
                 new_data.append("%d %s" % (col, s))
 
             current_row += 1
-            yield " ".join([u"{", u','.join(new_data), u"}"])
+            yield " ".join(["{", ','.join(new_data), "}"])
 
 def _get_data_object_for_decoding(matrix_type):
     if matrix_type == DENSE:
@@ -620,7 +620,7 @@ def _get_data_object_for_encoding(matrix):
 # =============================================================================
 
 # ADVANCED INTERFACE ==========================================================
-class ArffDecoder(object):
+class ArffDecoder:
     '''An ARFF decoder.'''
 
     def __init__(self):
@@ -737,10 +737,10 @@ def _decode(self, s, encode_nominal=False, matrix_type=DENSE):
 
         # Create the return object
         obj = {
-            u'description': u'',
-            u'relation': u'',
-            u'attributes': [],
-            u'data': []
+            'description': '',
+            'relation': '',
+            'attributes': [],
+            'data': []
         }
         attribute_names = {}
 
@@ -851,7 +851,7 @@ def decode(self, s, encode_nominal=False, return_type=DENSE):
             raise e
 
 
-class ArffEncoder(object):
+class ArffEncoder:
     '''An ARFF encoder.'''
 
     def _encode_comment(self, s=''):
@@ -867,9 +867,9 @@ def _encode_comment(self, s=''):
         :return: a string with the encoded comment line.
         '''
         if s:
-            return u'%s %s'%(_TK_COMMENT, s)
+            return '%s %s'%(_TK_COMMENT, s)
         else:
-            return u'%s' % _TK_COMMENT
+            return '%s' % _TK_COMMENT
 
     def _encode_relation(self, name):
         '''(INTERNAL) Decodes a relation line.
@@ -885,7 +885,7 @@ def _encode_relation(self, name):
                 name = '"%s"'%name
                 break
 
-        return u'%s %s'%(_TK_RELATION, name)
+        return '%s %s'%(_TK_RELATION, name)
 
     def _encode_attribute(self, name, type_):
         '''(INTERNAL) Encodes an attribute line.
@@ -918,10 +918,10 @@ def _encode_attribute(self, name, type_):
         if isinstance(type_, (tuple, list)):
             type_tmp = []
             for i in range(len(type_)):
-                type_tmp.append(u'%s' % encode_string(type_[i]))
-            type_ = u'{%s}'%(u', '.join(type_tmp))
+                type_tmp.append('%s' % encode_string(type_[i]))
+            type_ = '{%s}'%(', '.join(type_tmp))
 
-        return u'%s %s %s'%(_TK_ATTRIBUTE, name, type_)
+        return '%s %s %s'%(_TK_ATTRIBUTE, name, type_)
 
     def encode(self, obj):
         '''Encodes a given object to an ARFF file.
@@ -931,7 +931,7 @@ def encode(self, obj):
         '''
         data = [row for row in self.iter_encode(obj)]
 
-        return u'\n'.join(data)
+        return '\n'.join(data)
 
     def iter_encode(self, obj):
         '''The iterative version of `arff.ArffEncoder.encode`.
@@ -952,7 +952,7 @@ def iter_encode(self, obj):
             raise BadObject('Relation name not found or with invalid value.')
 
         yield self._encode_relation(obj['relation'])
-        yield u''
+        yield ''
 
         # ATTRIBUTES
         if not obj.get('attributes'):
@@ -983,7 +983,7 @@ def iter_encode(self, obj):
                 attribute_names.add(attr[0])
 
             yield self._encode_attribute(attr[0], attr[1])
-        yield u''
+        yield ''
         attributes = obj['attributes']
 
         # DATA
@@ -993,7 +993,7 @@ def iter_encode(self, obj):
             for line in data.encode_data(obj.get('data'), attributes):
                 yield line
 
-        yield u''
+        yield ''
 
 # =============================================================================
 
@@ -1042,7 +1042,7 @@ def dump(obj, fp):
 
     last_row = next(generator)
     for row in generator:
-        fp.write(last_row + u'\n')
+        fp.write(last_row + '\n')
         last_row = row
     fp.write(last_row)
 
diff --git a/sklearn/externals/joblib/_dask.py b/sklearn/externals/joblib/_dask.py
index 98f8a65db3263..0145a2a589204 100644
--- a/sklearn/externals/joblib/_dask.py
+++ b/sklearn/externals/joblib/_dask.py
@@ -83,7 +83,7 @@ def _funcname(x):
     return funcname(x)
 
 
-class Batch(object):
+class Batch:
     def __init__(self, tasks):
         self.tasks = tasks
 
diff --git a/sklearn/externals/joblib/_memmapping_reducer.py b/sklearn/externals/joblib/_memmapping_reducer.py
index 5ba78195b22cd..d3340a7945efb 100644
--- a/sklearn/externals/joblib/_memmapping_reducer.py
+++ b/sklearn/externals/joblib/_memmapping_reducer.py
@@ -252,7 +252,7 @@ def reduce_memmap(a):
         return (loads, (dumps(np.asarray(a), protocol=HIGHEST_PROTOCOL),))
 
 
-class ArrayMemmapReducer(object):
+class ArrayMemmapReducer:
     """Reducer callable to dump large arrays to memmap files.
 
     Parameters
diff --git a/sklearn/externals/joblib/_parallel_backends.py b/sklearn/externals/joblib/_parallel_backends.py
index 0f0bcf0ab4213..712e488a8e071 100644
--- a/sklearn/externals/joblib/_parallel_backends.py
+++ b/sklearn/externals/joblib/_parallel_backends.py
@@ -193,7 +193,7 @@ def get_nested_backend(self):
         return get_active_backend()
 
 
-class PoolManagerMixin(object):
+class PoolManagerMixin:
     """A helper class for managing pool of workers."""
 
     _pool = None
@@ -234,7 +234,7 @@ def abort_everything(self, ensure_ready=True):
                            **self.parallel._backend_args)
 
 
-class AutoBatchingMixin(object):
+class AutoBatchingMixin:
     """A helper class for automagically batching jobs."""
 
     # In seconds, should be big enough to hide multiprocessing dispatching
@@ -418,7 +418,7 @@ def effective_n_jobs(self, n_jobs):
                     stacklevel=3)
             return 1
 
-        return super(MultiprocessingBackend, self).effective_n_jobs(n_jobs)
+        return super().effective_n_jobs(n_jobs)
 
     def configure(self, n_jobs=1, parallel=None, prefer=None, require=None,
                   **memmappingpool_args):
@@ -450,7 +450,7 @@ def configure(self, n_jobs=1, parallel=None, prefer=None, require=None,
 
     def terminate(self):
         """Shutdown the process or thread pool"""
-        super(MultiprocessingBackend, self).terminate()
+        super().terminate()
         if self.JOBLIB_SPAWNED_PROCESS in os.environ:
             del os.environ[self.JOBLIB_SPAWNED_PROCESS]
 
@@ -542,7 +542,7 @@ def abort_everything(self, ensure_ready=True):
             self.configure(n_jobs=self.parallel.n_jobs, parallel=self.parallel)
 
 
-class ImmediateResult(object):
+class ImmediateResult:
     def __init__(self, batch):
         # Don't delay the application, to avoid keeping the input
         # arguments in memory
@@ -552,7 +552,7 @@ def get(self):
         return self.results
 
 
-class SafeFunction(object):
+class SafeFunction:
     """Wrapper that handles the serialization of exception tracebacks.
 
     If an exception is triggered when calling the inner function, a copy of
diff --git a/sklearn/externals/joblib/_store_backends.py b/sklearn/externals/joblib/_store_backends.py
index 9196f0a7746a1..4a7e4cb2ac05b 100644
--- a/sklearn/externals/joblib/_store_backends.py
+++ b/sklearn/externals/joblib/_store_backends.py
@@ -136,7 +136,7 @@ def configure(self, location, verbose=0, backend_options=dict()):
         """
 
 
-class StoreBackendMixin(object):
+class StoreBackendMixin:
     """Class providing all logic for managing the store in a generic way.
 
     The StoreBackend subclass has to implement 3 methods: create_location,
@@ -153,9 +153,9 @@ def load_item(self, path, verbose=1, msg=None):
 
         if verbose > 1:
             if verbose < 10:
-                print('{0}...'.format(msg))
+                print('{}...'.format(msg))
             else:
-                print('{0} from {1}'.format(msg, full_path))
+                print('{} from {}'.format(msg, full_path))
 
         mmap_mode = (None if not hasattr(self, 'mmap_mode')
                      else self.mmap_mode)
@@ -283,7 +283,7 @@ def reduce_store_size(self, bytes_limit):
 
         for item in items_to_delete:
             if self.verbose > 10:
-                print('Deleting item {0}'.format(item))
+                print('Deleting item {}'.format(item))
             try:
                 self.clear_location(item.path)
             except OSError:
diff --git a/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py b/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py
index bf92569c1e8c0..586fba53ca87f 100644
--- a/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py
+++ b/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py
@@ -783,7 +783,7 @@ def __getitem__(self, item):
 
     def save_attrgetter(self, obj):
         """attrgetter serializer"""
-        class Dummy(object):
+        class Dummy:
             def __init__(self, attrs, index=None):
                 self.attrs = attrs
                 self.index = index
@@ -1028,7 +1028,7 @@ def instance(cls):
 
 
 @instance
-class _empty_cell_value(object):
+class _empty_cell_value:
     """sentinel for empty closures
     """
     @classmethod
diff --git a/sklearn/externals/joblib/externals/loky/_base.py b/sklearn/externals/joblib/externals/loky/_base.py
index 92422bbf3f2a4..3c16f2648adab 100644
--- a/sklearn/externals/joblib/externals/loky/_base.py
+++ b/sklearn/externals/joblib/externals/loky/_base.py
@@ -76,7 +76,7 @@ class TimeoutError(Error):
         """The operation exceeded the given deadline."""
         pass
 
-    class _Waiter(object):
+    class _Waiter:
         """Provides the event that wait() and as_completed() block on."""
         def __init__(self):
             self.event = threading.Event()
@@ -95,37 +95,37 @@ class _AsCompletedWaiter(_Waiter):
         """Used by as_completed()."""
 
         def __init__(self):
-            super(_AsCompletedWaiter, self).__init__()
+            super().__init__()
             self.lock = threading.Lock()
 
         def add_result(self, future):
             with self.lock:
-                super(_AsCompletedWaiter, self).add_result(future)
+                super().add_result(future)
                 self.event.set()
 
         def add_exception(self, future):
             with self.lock:
-                super(_AsCompletedWaiter, self).add_exception(future)
+                super().add_exception(future)
                 self.event.set()
 
         def add_cancelled(self, future):
             with self.lock:
-                super(_AsCompletedWaiter, self).add_cancelled(future)
+                super().add_cancelled(future)
                 self.event.set()
 
     class _FirstCompletedWaiter(_Waiter):
         """Used by wait(return_when=FIRST_COMPLETED)."""
 
         def add_result(self, future):
-            super(_FirstCompletedWaiter, self).add_result(future)
+            super().add_result(future)
             self.event.set()
 
         def add_exception(self, future):
-            super(_FirstCompletedWaiter, self).add_exception(future)
+            super().add_exception(future)
             self.event.set()
 
         def add_cancelled(self, future):
-            super(_FirstCompletedWaiter, self).add_cancelled(future)
+            super().add_cancelled(future)
             self.event.set()
 
     class _AllCompletedWaiter(_Waiter):
@@ -135,7 +135,7 @@ def __init__(self, num_pending_calls, stop_on_exception):
             self.num_pending_calls = num_pending_calls
             self.stop_on_exception = stop_on_exception
             self.lock = threading.Lock()
-            super(_AllCompletedWaiter, self).__init__()
+            super().__init__()
 
         def _decrement_pending_calls(self):
             with self.lock:
@@ -144,21 +144,21 @@ def _decrement_pending_calls(self):
                     self.event.set()
 
         def add_result(self, future):
-            super(_AllCompletedWaiter, self).add_result(future)
+            super().add_result(future)
             self._decrement_pending_calls()
 
         def add_exception(self, future):
-            super(_AllCompletedWaiter, self).add_exception(future)
+            super().add_exception(future)
             if self.stop_on_exception:
                 self.event.set()
             else:
                 self._decrement_pending_calls()
 
         def add_cancelled(self, future):
-            super(_AllCompletedWaiter, self).add_cancelled(future)
+            super().add_cancelled(future)
             self._decrement_pending_calls()
 
-    class _AcquireFutures(object):
+    class _AcquireFutures:
         """A context manager that does an ordered acquire of Future conditions.
         """
 
@@ -220,9 +220,9 @@ def as_completed(fs, timeout=None):
 
         fs = set(fs)
         with _AcquireFutures(fs):
-            finished = set(
+            finished = {
                     f for f in fs
-                    if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
+                    if f._state in [CANCELLED_AND_NOTIFIED, FINISHED]}
             pending = fs - finished
             waiter = _create_and_install_waiters(fs, _AS_COMPLETED)
 
@@ -284,8 +284,8 @@ def wait(fs, timeout=None, return_when=ALL_COMPLETED):
             futures.
         """
         with _AcquireFutures(fs):
-            done = set(f for f in fs
-                       if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
+            done = {f for f in fs
+                       if f._state in [CANCELLED_AND_NOTIFIED, FINISHED]}
             not_done = set(fs) - done
 
             if (return_when == FIRST_COMPLETED) and done:
@@ -308,7 +308,7 @@ def wait(fs, timeout=None, return_when=ALL_COMPLETED):
         done.update(waiter.finished_futures)
         return DoneAndNotDoneFutures(done, set(fs) - done)
 
-    class _BaseFuture(object):
+    class _BaseFuture:
         """Represents the result of an asynchronous computation."""
 
         def __init__(self):
@@ -536,7 +536,7 @@ def set_exception(self, exception):
                 self._condition.notify_all()
             self._invoke_callbacks()
 
-    class Executor(object):
+    class Executor:
         """This is an abstract base class for concrete asynchronous executors.
         """
 
diff --git a/sklearn/externals/joblib/externals/loky/backend/_win_reduction.py b/sklearn/externals/joblib/externals/loky/backend/_win_reduction.py
index 142e6e7c80ddc..72fd0f079933c 100644
--- a/sklearn/externals/joblib/externals/loky/backend/_win_reduction.py
+++ b/sklearn/externals/joblib/externals/loky/backend/_win_reduction.py
@@ -21,7 +21,7 @@
 
 
 if sys.version_info[:2] >= (3, 4) and sys.platform == 'win32':
-    class DupHandle(object):
+    class DupHandle:
         def __init__(self, handle, access, pid=None):
             # duplicate handle for process with given pid
             if pid is None:
diff --git a/sklearn/externals/joblib/externals/loky/backend/fork_exec.py b/sklearn/externals/joblib/externals/loky/backend/fork_exec.py
index eee2a1c80a231..acd3dcfae596b 100644
--- a/sklearn/externals/joblib/externals/loky/backend/fork_exec.py
+++ b/sklearn/externals/joblib/externals/loky/backend/fork_exec.py
@@ -19,11 +19,11 @@ def close_fds(keep_fds):  # pragma: no cover
 
     # We try to retrieve all the open fds
     try:
-        open_fds = set(int(fd) for fd in os.listdir('/proc/self/fd'))
+        open_fds = {int(fd) for fd in os.listdir('/proc/self/fd')}
     except FileNotFoundError:
         import resource
         max_nfds = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
-        open_fds = set(fd for fd in range(3, max_nfds))
+        open_fds = {fd for fd in range(3, max_nfds)}
         open_fds.add(0)
 
     for i in open_fds - keep_fds:
diff --git a/sklearn/externals/joblib/externals/loky/backend/popen_loky_posix.py b/sklearn/externals/joblib/externals/loky/backend/popen_loky_posix.py
index 35a5907d21559..e953a47ff59a9 100644
--- a/sklearn/externals/joblib/externals/loky/backend/popen_loky_posix.py
+++ b/sklearn/externals/joblib/externals/loky/backend/popen_loky_posix.py
@@ -27,7 +27,7 @@
     # Wrapper for an fd used while launching a process
     #
 
-    class _DupFd(object):
+    class _DupFd:
         def __init__(self, fd):
             self.fd = reduction._mk_inheritable(fd)
 
@@ -40,7 +40,7 @@ def detach(self):
 
     __all__.append('Popen')
 
-    class Popen(object):
+    class Popen:
         method = 'loky'
         DupFd = _DupFd
 
diff --git a/sklearn/externals/joblib/externals/loky/backend/process.py b/sklearn/externals/joblib/externals/loky/backend/process.py
index f6a00c90e363c..bf9886e23af11 100644
--- a/sklearn/externals/joblib/externals/loky/backend/process.py
+++ b/sklearn/externals/joblib/externals/loky/backend/process.py
@@ -17,12 +17,12 @@ class LokyProcess(BaseProcess):
     def __init__(self, group=None, target=None, name=None, args=(),
                  kwargs={}, daemon=None, init_main_module=False):
         if sys.version_info < (3, 3):
-            super(LokyProcess, self).__init__(
+            super().__init__(
                 group=group, target=target, name=name, args=args,
                 kwargs=kwargs)
             self.daemon = daemon
         else:
-            super(LokyProcess, self).__init__(
+            super().__init__(
                 group=group, target=target, name=name, args=args,
                 kwargs=kwargs, daemon=daemon)
         self.authkey = self.authkey
@@ -76,7 +76,7 @@ def authkey(self, authkey):
         def _bootstrap(self):
             from .context import set_start_method
             set_start_method(self._start_method)
-            super(LokyProcess, self)._bootstrap()
+            super()._bootstrap()
 
 
 class LokyInitMainProcess(LokyProcess):
@@ -84,7 +84,7 @@ class LokyInitMainProcess(LokyProcess):
 
     def __init__(self, group=None, target=None, name=None, args=(),
                  kwargs={}, daemon=None):
-        super(LokyInitMainProcess, self).__init__(
+        super().__init__(
             group=group, target=target, name=name, args=args, kwargs=kwargs,
             daemon=daemon, init_main_module=True)
 
diff --git a/sklearn/externals/joblib/externals/loky/backend/queues.py b/sklearn/externals/joblib/externals/loky/backend/queues.py
index 0f9dfeae63877..b9bbb10bfa941 100644
--- a/sklearn/externals/joblib/externals/loky/backend/queues.py
+++ b/sklearn/externals/joblib/externals/loky/backend/queues.py
@@ -185,7 +185,7 @@ def _on_queue_feeder_error(self, e, obj):
     if sys.version_info[:2] < (3, 4):
         # Compat for python2.7/3.3 that use _send instead of _send_bytes
         def _after_fork(self):
-            super(Queue, self)._after_fork()
+            super()._after_fork()
             self._send_bytes = self._writer.send_bytes
 
 
diff --git a/sklearn/externals/joblib/externals/loky/backend/reduction.py b/sklearn/externals/joblib/externals/loky/backend/reduction.py
index 2a8347590a67e..91f89f384943e 100644
--- a/sklearn/externals/joblib/externals/loky/backend/reduction.py
+++ b/sklearn/externals/joblib/externals/loky/backend/reduction.py
@@ -37,7 +37,7 @@
 # permits to use instance base custom reducers. Only CustomizablePickler
 # should be used.
 
-class _ReducerRegistry(object):
+class _ReducerRegistry:
     """Registry for custom reducers.
 
     HIGHEST_PROTOCOL is selected by default as this pickler is used
diff --git a/sklearn/externals/joblib/externals/loky/backend/semaphore_tracker.py b/sklearn/externals/joblib/externals/loky/backend/semaphore_tracker.py
index 7d3f23e5f8e4f..d78b4e10c241d 100644
--- a/sklearn/externals/joblib/externals/loky/backend/semaphore_tracker.py
+++ b/sklearn/externals/joblib/externals/loky/backend/semaphore_tracker.py
@@ -44,7 +44,7 @@
 VERBOSE = False
 
 
-class SemaphoreTracker(object):
+class SemaphoreTracker:
 
     def __init__(self):
         self._lock = threading.Lock()
@@ -126,7 +126,7 @@ def unregister(self, name):
         self._send('UNREGISTER', name)
 
     def _send(self, cmd, name):
-        msg = '{0}:{1}\n'.format(cmd, name).encode('ascii')
+        msg = '{}:{}\n'.format(cmd, name).encode('ascii')
         if len(name) > 512:
             # posix guarantees that writes to a pipe of less than PIPE_BUF
             # bytes are atomic, and that PIPE_BUF >= 512
diff --git a/sklearn/externals/joblib/externals/loky/backend/semlock.py b/sklearn/externals/joblib/externals/loky/backend/semlock.py
index 2d35f6a2715a5..d83378164274f 100644
--- a/sklearn/externals/joblib/externals/loky/backend/semlock.py
+++ b/sklearn/externals/joblib/externals/loky/backend/semlock.py
@@ -143,7 +143,7 @@ def _sem_timedwait(handle, timeout):
         delay += 1e-3
 
 
-class SemLock(object):
+class SemLock:
     """ctypes wrapper to the unix semaphore"""
 
     _rand = tempfile._RandomNameSequence()
diff --git a/sklearn/externals/joblib/externals/loky/backend/synchronize.py b/sklearn/externals/joblib/externals/loky/backend/synchronize.py
index 4773b9dc87c5e..e8778349b1b20 100644
--- a/sklearn/externals/joblib/externals/loky/backend/synchronize.py
+++ b/sklearn/externals/joblib/externals/loky/backend/synchronize.py
@@ -56,7 +56,7 @@
 # Base class for semaphores and mutexes; wraps `_multiprocessing.SemLock`
 #
 
-class SemLock(object):
+class SemLock:
 
     _rand = tempfile._RandomNameSequence()
 
@@ -171,7 +171,7 @@ def __repr__(self):
 class Lock(SemLock):
 
     def __init__(self):
-        super(Lock, self).__init__(SEMAPHORE, 1, 1)
+        super().__init__(SEMAPHORE, 1, 1)
 
     def __repr__(self):
         try:
@@ -197,7 +197,7 @@ def __repr__(self):
 class RLock(SemLock):
 
     def __init__(self):
-        super(RLock, self).__init__(RECURSIVE_MUTEX, 1, 1)
+        super().__init__(RECURSIVE_MUTEX, 1, 1)
 
     def __repr__(self):
         try:
@@ -221,7 +221,7 @@ def __repr__(self):
 # Condition variable
 #
 
-class Condition(object):
+class Condition:
 
     def __init__(self, lock=None):
         self._lock = lock or RLock()
@@ -345,7 +345,7 @@ def wait_for(self, predicate, timeout=None):
 # Event
 #
 
-class Event(object):
+class Event:
 
     def __init__(self):
         self._cond = Condition(Lock())
diff --git a/sklearn/externals/joblib/externals/loky/cloudpickle_wrapper.py b/sklearn/externals/joblib/externals/loky/cloudpickle_wrapper.py
index 9edf9240f21f4..e6dbf2b4ff850 100644
--- a/sklearn/externals/joblib/externals/loky/cloudpickle_wrapper.py
+++ b/sklearn/externals/joblib/externals/loky/cloudpickle_wrapper.py
@@ -11,7 +11,7 @@
 WRAP_CACHE = dict()
 
 
-class CloudpickledObjectWrapper(object):
+class CloudpickledObjectWrapper:
     def __init__(self, obj, keep_wrapper=False):
         self._obj = obj
         self._keep_wrapper = keep_wrapper
diff --git a/sklearn/externals/joblib/externals/loky/process_executor.py b/sklearn/externals/joblib/externals/loky/process_executor.py
index 73672a8aa850f..a3fa66f5911f9 100644
--- a/sklearn/externals/joblib/externals/loky/process_executor.py
+++ b/sklearn/externals/joblib/externals/loky/process_executor.py
@@ -159,7 +159,7 @@ def clear(self):
             self._reader.recv_bytes()
 
 
-class _ExecutorFlags(object):
+class _ExecutorFlags:
     """necessary references to maintain executor states without preventing gc
 
     It permits to keep the information needed by queue_management_thread
@@ -237,7 +237,7 @@ def _rebuild_exc(exc, tb):
     return exc
 
 
-class _WorkItem(object):
+class _WorkItem:
 
     __slots__ = ["future", "fn", "args", "kwargs"]
 
@@ -248,7 +248,7 @@ def __init__(self, future, fn, args, kwargs):
         self.kwargs = kwargs
 
 
-class _ResultItem(object):
+class _ResultItem:
 
     def __init__(self, work_id, exception=None, result=None):
         self.work_id = work_id
@@ -256,7 +256,7 @@ def __init__(self, work_id, exception=None, result=None):
         self.result = result
 
 
-class _CallItem(object):
+class _CallItem:
 
     def __init__(self, work_id, fn, args, kwargs):
         self.work_id = work_id
@@ -283,7 +283,7 @@ def __init__(self, max_size=0, ctx=None, pending_work_items=None,
         self.thread_wakeup = thread_wakeup
         self.pending_work_items = pending_work_items
         self.running_work_items = running_work_items
-        super(_SafeQueue, self).__init__(max_size, reducers=reducers, ctx=ctx)
+        super().__init__(max_size, reducers=reducers, ctx=ctx)
 
     def _on_queue_feeder_error(self, e, obj):
         if isinstance(obj, _CallItem):
@@ -309,7 +309,7 @@ def _on_queue_feeder_error(self, e, obj):
                 del work_item
             self.thread_wakeup.wakeup()
         else:
-            super(_SafeQueue, self)._on_queue_feeder_error(e, obj)
+            super()._on_queue_feeder_error(e, obj)
 
 
 def _get_chunks(chunksize, *iterables):
@@ -1070,7 +1070,7 @@ def map(self, fn, *iterables, **kwargs):
         if chunksize < 1:
             raise ValueError("chunksize must be >= 1.")
 
-        results = super(ProcessPoolExecutor, self).map(
+        results = super().map(
             partial(_process_chunk, fn), _get_chunks(chunksize, *iterables),
             timeout=timeout)
         return _chain_from_iterable_of_lists(results)
diff --git a/sklearn/externals/joblib/externals/loky/reusable_executor.py b/sklearn/externals/joblib/externals/loky/reusable_executor.py
index 30b217fd4113c..7857d769d2cb9 100644
--- a/sklearn/externals/joblib/externals/loky/reusable_executor.py
+++ b/sklearn/externals/joblib/externals/loky/reusable_executor.py
@@ -138,7 +138,7 @@ class _ReusablePoolExecutor(ProcessPoolExecutor):
     def __init__(self, submit_resize_lock, max_workers=None, context=None,
                  timeout=None, executor_id=0, job_reducers=None,
                  result_reducers=None, initializer=None, initargs=()):
-        super(_ReusablePoolExecutor, self).__init__(
+        super().__init__(
             max_workers=max_workers, context=context, timeout=timeout,
             job_reducers=job_reducers, result_reducers=result_reducers,
             initializer=initializer, initargs=initargs)
@@ -147,7 +147,7 @@ def __init__(self, submit_resize_lock, max_workers=None, context=None,
 
     def submit(self, fn, *args, **kwargs):
         with self._submit_resize_lock:
-            return super(_ReusablePoolExecutor, self).submit(
+            return super().submit(
                 fn, *args, **kwargs)
 
     def _resize(self, max_workers):
@@ -201,5 +201,5 @@ def _setup_queues(self, job_reducers, result_reducers):
         # As this executor can be resized, use a large queue size to avoid
         # underestimating capacity and introducing overhead
         queue_size = 2 * cpu_count() + EXTRA_QUEUED_CALLS
-        super(_ReusablePoolExecutor, self)._setup_queues(
+        super()._setup_queues(
             job_reducers, result_reducers, queue_size=queue_size)
diff --git a/sklearn/externals/joblib/hashing.py b/sklearn/externals/joblib/hashing.py
index 88bd6cfdefeab..6519ae46076ab 100644
--- a/sklearn/externals/joblib/hashing.py
+++ b/sklearn/externals/joblib/hashing.py
@@ -24,7 +24,7 @@
     Pickler = pickle.Pickler
 
 
-class _ConsistentSet(object):
+class _ConsistentSet:
     """ Class used to ensure the hash of Sets is preserved
         whatever the order of its items.
     """
@@ -42,7 +42,7 @@ def __init__(self, set_sequence):
             self._sequence = sorted((hash(e) for e in set_sequence))
 
 
-class _MyHash(object):
+class _MyHash:
     """ Class used to hash objects that won't normally pickle """
 
     def __init__(self, *args):
diff --git a/sklearn/externals/joblib/logger.py b/sklearn/externals/joblib/logger.py
index f30efef8535d2..645015556283d 100644
--- a/sklearn/externals/joblib/logger.py
+++ b/sklearn/externals/joblib/logger.py
@@ -60,7 +60,7 @@ def pformat(obj, indent=0, depth=3):
 ###############################################################################
 # class `Logger`
 ###############################################################################
-class Logger(object):
+class Logger:
     """ Base class for logging messages.
     """
 
@@ -88,7 +88,7 @@ def format(self, obj, indent=0):
 ###############################################################################
 # class `PrintTime`
 ###############################################################################
-class PrintTime(object):
+class PrintTime:
     """ Print and log messages while keeping track of time.
     """
 
diff --git a/sklearn/externals/joblib/memory.py b/sklearn/externals/joblib/memory.py
index f744aaae2196a..89532e2ccb0db 100644
--- a/sklearn/externals/joblib/memory.py
+++ b/sklearn/externals/joblib/memory.py
@@ -87,11 +87,11 @@ def register_store_backend(backend_name, backend):
     """
     if not isinstance(backend_name, _basestring):
         raise ValueError("Store backend name should be a string, "
-                         "'{0}' given.".format(backend_name))
+                         "'{}' given.".format(backend_name))
     if backend is None or not issubclass(backend, StoreBackendBase):
         raise ValueError("Store backend should inherit "
                          "StoreBackendBase, "
-                         "'{0}' given.".format(backend))
+                         "'{}' given.".format(backend))
 
     _STORE_BACKENDS[backend_name] = backend
 
@@ -116,7 +116,7 @@ def _store_backend_factory(backend, location, verbose=0, backend_options=None):
         # By default, we assume the FileSystemStoreBackend can be used if no
         # matching backend could be found.
         if obj is None:
-            raise TypeError('Unknown location {0} or backend {1}'.format(
+            raise TypeError('Unknown location {} or backend {}'.format(
                             location, backend))
 
         # The store backend is configured with the extra named parameters,
@@ -163,10 +163,10 @@ def _format_load_msg(func_id, args_id, timestamp=None, metadata=None):
         pass
 
     if timestamp is not None:
-        ts_string = "{0: <16}".format(format_time(time.time() - timestamp))
+        ts_string = "{: <16}".format(format_time(time.time() - timestamp))
     else:
         ts_string = ""
-    return '[Memory]{0}: Loading {1}'.format(ts_string, str(signature))
+    return '[Memory]{}: Loading {}'.format(ts_string, str(signature))
 
 
 # An in-memory store to avoid looking at the disk-based function
@@ -282,7 +282,7 @@ def __getstate__(self):
         return state
 
 
-class NotMemorizedResult(object):
+class NotMemorizedResult:
     """Class representing an arbitrary value.
 
     This class is a replacement for MemorizedResult when there is no cache.
@@ -323,7 +323,7 @@ def __setstate__(self, state):
 ###############################################################################
 # class `NotMemorizedFunc`
 ###############################################################################
-class NotMemorizedFunc(object):
+class NotMemorizedFunc:
     """No-op object decorating a function.
 
     This class replaces MemorizedFunc when there is no cache. It provides an
@@ -345,7 +345,7 @@ def call_and_shelve(self, *args, **kwargs):
         return NotMemorizedResult(self.func(*args, **kwargs))
 
     def __repr__(self):
-        return '{0}(func={1})'.format(self.__class__.__name__, self.func)
+        return '{}(func={})'.format(self.__class__.__name__, self.func)
 
     def clear(self, warn=True):
         # Argument "warn" is for compatibility with MemorizedFunc.clear
@@ -481,8 +481,8 @@ def _cached_call(self, args, kwargs, shelving=False):
                 self.store_backend.contains_item([func_id, args_id])):
             if self._verbose > 10:
                 _, name = get_func_name(self.func)
-                self.warn('Computing func {0}, argument hash {1} '
-                          'in location {2}'
+                self.warn('Computing func {}, argument hash {} '
+                          'in location {}'
                           .format(name, args_id,
                                   self.store_backend.
                                   get_cached_func_info([func_id])['location']))
@@ -590,7 +590,7 @@ def _write_func_code(self, func_code, first_line):
         # file. This is bad practice, but joblib should be robust to bad
         # practice.
         func_id = _build_func_identifier(self.func)
-        func_code = u'%s %i\n%s' % (FIRST_LINE_TEXT, first_line, func_code)
+        func_code = '%s %i\n%s' % (FIRST_LINE_TEXT, first_line, func_code)
         self.store_backend.store_cached_func_code([func_id], func_code)
 
         # Also store in the in-memory store of function hashes
@@ -656,13 +656,13 @@ def _check_previous_func_code(self, stacklevel=2):
                                      win_characters=False)
         if old_first_line == first_line == -1 or func_name == '<lambda>':
             if not first_line == -1:
-                func_description = ("{0} ({1}:{2})"
+                func_description = ("{} ({}:{})"
                                     .format(func_name, source_file,
                                             first_line))
             else:
                 func_description = func_name
             warnings.warn(JobLibCollisionWarning(
-                "Cannot detect name collisions for function '{0}'"
+                "Cannot detect name collisions for function '{}'"
                 .format(func_description)), stacklevel=stacklevel)
 
         # Fetch the code at the old location and compare it. If it is the
@@ -694,7 +694,7 @@ def _check_previous_func_code(self, stacklevel=2):
         # XXX: Should be using warnings, and giving stacklevel
         if self._verbose > 10:
             _, func_name = get_func_name(self.func, resolv_alias=False)
-            self.warn("Function {0} (identified by {1}) has changed"
+            self.warn("Function {} (identified by {}) has changed"
                       ".".format(func_name, func_id))
         self.clear(warn=True)
         return False
@@ -752,7 +752,7 @@ def _persist_input(self, duration, args, kwargs, this_duration_limit=0.5):
         argument_dict = filter_args(self.func, self.ignore,
                                     args, kwargs)
 
-        input_repr = dict((k, repr(v)) for k, v in argument_dict.items())
+        input_repr = {k: repr(v) for k, v in argument_dict.items()}
         # This can fail due to race-conditions with multiple
         # concurrent joblibs removing the file or the directory
         metadata = {"duration": duration, "input_args": input_repr}
diff --git a/sklearn/externals/joblib/numpy_pickle.py b/sklearn/externals/joblib/numpy_pickle.py
index bae0df31fa9c2..f4a7a410f7a73 100644
--- a/sklearn/externals/joblib/numpy_pickle.py
+++ b/sklearn/externals/joblib/numpy_pickle.py
@@ -43,7 +43,7 @@
 # Utility objects for persistence.
 
 
-class NumpyArrayWrapper(object):
+class NumpyArrayWrapper:
     """An object to be persisted instead of numpy arrays.
 
     This object is used to hack into the pickle machinery and read numpy
diff --git a/sklearn/externals/joblib/numpy_pickle_compat.py b/sklearn/externals/joblib/numpy_pickle_compat.py
index ba8ab827914e0..8cedf6010aac1 100644
--- a/sklearn/externals/joblib/numpy_pickle_compat.py
+++ b/sklearn/externals/joblib/numpy_pickle_compat.py
@@ -76,7 +76,7 @@ def write_zfile(file_handle, data, compress=1):
 # Utility objects for persistence.
 
 
-class NDArrayWrapper(object):
+class NDArrayWrapper:
     """An object to be persisted instead of numpy arrays.
 
     The only thing this object does, is to carry the filename in which
diff --git a/sklearn/externals/joblib/parallel.py b/sklearn/externals/joblib/parallel.py
index df28678ad95fb..598f4fc45b05a 100644
--- a/sklearn/externals/joblib/parallel.py
+++ b/sklearn/externals/joblib/parallel.py
@@ -119,7 +119,7 @@ def get_active_backend(prefer=None, require=None, verbose=0):
     return backend, DEFAULT_N_JOBS
 
 
-class parallel_backend(object):
+class parallel_backend:
     """Change the default backend used by Parallel inside a with block.
 
     If ``backend`` is a string it must match a previously registered
@@ -203,7 +203,7 @@ def unregister(self):
         DEFAULT_MP_CONTEXT = mp.get_context(method=method)
 
 
-class BatchedCalls(object):
+class BatchedCalls:
     """Wrap a sequence of (func, args, kwargs) tuples as a single callable"""
 
     def __init__(self, iterator_slice, backend_and_jobs, pickle_cache=None):
@@ -281,7 +281,7 @@ def delayed_function(*args, **kwargs):
 
 
 ###############################################################################
-class BatchCompletionCallBack(object):
+class BatchCompletionCallBack:
     """Callback used by joblib.Parallel's multiprocessing backend.
 
     This callable is executed by the parent process whenever a worker process
diff --git a/sklearn/externals/joblib/pool.py b/sklearn/externals/joblib/pool.py
index 396a3dfb4efcc..bd0f66add7961 100644
--- a/sklearn/externals/joblib/pool.py
+++ b/sklearn/externals/joblib/pool.py
@@ -99,7 +99,7 @@ def dispatcher(self, obj):
             self.dispatch_table[type] = reduce_func
 
 
-class CustomizablePicklingQueue(object):
+class CustomizablePicklingQueue:
     """Locked Pipe implementation that uses a customizable pickler.
 
     This class is an alternative to the multiprocessing implementation
@@ -204,7 +204,7 @@ def __init__(self, processes=None, forward_reducers=None,
         self._backward_reducers = backward_reducers
         poolargs = dict(processes=processes)
         poolargs.update(kwargs)
-        super(PicklingPool, self).__init__(**poolargs)
+        super().__init__(**poolargs)
 
     def _setup_queues(self):
         context = getattr(self, '_ctx', mp)
@@ -310,13 +310,13 @@ def __init__(self, processes=None, temp_folder=None, max_nbytes=1e6,
             forward_reducers=forward_reducers,
             backward_reducers=backward_reducers)
         poolargs.update(kwargs)
-        super(MemmappingPool, self).__init__(**poolargs)
+        super().__init__(**poolargs)
 
     def terminate(self):
         n_retries = 10
         for i in range(n_retries):
             try:
-                super(MemmappingPool, self).terminate()
+                super().terminate()
                 break
             except OSError as e:
                 if isinstance(e, WindowsError):
diff --git a/sklearn/externals/six.py b/sklearn/externals/six.py
index 85898ec71275f..f3e04c03e897b 100644
--- a/sklearn/externals/six.py
+++ b/sklearn/externals/six.py
@@ -52,7 +52,7 @@
         MAXSIZE = int((1 << 31) - 1)
     else:
         # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
-        class X(object):
+        class X:
             def __len__(self):
                 return 1 << 31
         try:
@@ -77,7 +77,7 @@ def _import_module(name):
     return sys.modules[name]
 
 
-class _LazyDescr(object):
+class _LazyDescr:
 
     def __init__(self, name):
         self.name = name
@@ -93,7 +93,7 @@ def __get__(self, obj, tp):
 class MovedModule(_LazyDescr):
 
     def __init__(self, name, old, new=None):
-        super(MovedModule, self).__init__(name)
+        super().__init__(name)
         if PY3:
             if new is None:
                 new = name
@@ -108,7 +108,7 @@ def _resolve(self):
 class MovedAttribute(_LazyDescr):
 
     def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
-        super(MovedAttribute, self).__init__(name)
+        super().__init__(name)
         if PY3:
             if new_mod is None:
                 new_mod = name
@@ -408,7 +408,7 @@ def get_unbound_function(unbound):
     def create_bound_method(func, obj):
         return types.MethodType(func, obj, obj.__class__)
 
-    class Iterator(object):
+    class Iterator:
 
         def next(self):
             return type(self).__next__(self)
diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py
index a37819b6da64b..29d4ae58e57c5 100644
--- a/sklearn/feature_extraction/dict_vectorizer.py
+++ b/sklearn/feature_extraction/dict_vectorizer.py
@@ -125,7 +125,7 @@ def fit(self, X, y=None):
 
         if self.sort:
             feature_names.sort()
-            vocab = dict((f, i) for i, f in enumerate(feature_names))
+            vocab = {f: i for i, f in enumerate(feature_names)}
 
         self.feature_names_ = feature_names
         self.vocabulary_ = vocab
diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py
index e3472682da7b2..919f5856ebb86 100644
--- a/sklearn/feature_extraction/tests/test_feature_hasher.py
+++ b/sklearn/feature_extraction/tests/test_feature_hasher.py
@@ -15,7 +15,7 @@ def test_feature_hasher_dicts():
     assert_equal("dict", h.input_type)
 
     raw_X = [{"foo": "bar", "dada": 42, "tzara": 37},
-             {"foo": "baz", "gaga": u"string1"}]
+             {"foo": "baz", "gaga": "string1"}]
     X1 = FeatureHasher(n_features=16).transform(raw_X)
     gen = (iter(d.items()) for d in raw_X)
     X2 = FeatureHasher(n_features=16, input_type="pair").transform(gen)
@@ -59,7 +59,7 @@ def test_feature_hasher_pairs():
 
 def test_feature_hasher_pairs_with_string_values():
     raw_X = (iter(d.items()) for d in [{"foo": 1, "bar": "a"},
-                                       {"baz": u"abc", "quux": 4, "foo": -1}])
+                                       {"baz": "abc", "quux": 4, "foo": -1}])
     h = FeatureHasher(n_features=16, input_type="pair")
     x1, x2 = h.transform(raw_X).toarray()
     x1_nz = sorted(np.abs(x1[x1 != 0]))
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 004f771126724..2a3d0dcbaafaf 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -67,7 +67,7 @@ def uppercase(s):
 
 
 def strip_eacute(s):
-    return s.replace(u'é', 'e')
+    return s.replace('é', 'e')
 
 
 def split_tokenize(s):
@@ -80,11 +80,11 @@ def lazy_analyze(s):
 
 def test_strip_accents():
     # check some classical latin accentuated symbols
-    a = u'àáâãäåçèéêë'
+    a = 'àáâãäåçèéêë'
     expected = 'aaaaaaceeee'
     assert_equal(strip_accents_unicode(a), expected)
 
-    a = u'ìíîïñòóôõöùúûüý'
+    a = 'ìíîïñòóôõöùúûüý'
     expected = 'iiiinooooouuuuy'
     assert_equal(strip_accents_unicode(a), expected)
 
@@ -94,18 +94,18 @@ def test_strip_accents():
     assert_equal(strip_accents_unicode(a), expected)
 
     # mix letters accentuated and not
-    a = u"this is à test"
+    a = "this is à test"
     expected = 'this is a test'
     assert_equal(strip_accents_unicode(a), expected)
 
 
 def test_to_ascii():
     # check some classical latin accentuated symbols
-    a = u'àáâãäåçèéêë'
+    a = 'àáâãäåçèéêë'
     expected = 'aaaaaaceeee'
     assert_equal(strip_accents_ascii(a), expected)
 
-    a = u"ìíîïñòóôõöùúûüý"
+    a = "ìíîïñòóôõöùúûüý"
     expected = 'iiiinooooouuuuy'
     assert_equal(strip_accents_ascii(a), expected)
 
@@ -115,7 +115,7 @@ def test_to_ascii():
     assert_equal(strip_accents_ascii(a), expected)
 
     # mix letters accentuated and not
-    a = u"this is à test"
+    a = "this is à test"
     expected = 'this is a test'
     assert_equal(strip_accents_ascii(a), expected)
 
@@ -123,8 +123,8 @@ def test_to_ascii():
 @pytest.mark.parametrize('Vectorizer', (CountVectorizer, HashingVectorizer))
 def test_word_analyzer_unigrams(Vectorizer):
     wa = Vectorizer(strip_accents='ascii').build_analyzer()
-    text = (u"J'ai mangé du kangourou  ce midi, "
-            u"c'était pas très bon.")
+    text = ("J'ai mangé du kangourou  ce midi, "
+            "c'était pas très bon.")
     expected = ['ai', 'mange', 'du', 'kangourou', 'ce', 'midi',
                 'etait', 'pas', 'tres', 'bon']
     assert_equal(wa(text), expected)
@@ -142,8 +142,8 @@ def test_word_analyzer_unigrams(Vectorizer):
 
     # with custom preprocessor
     wa = Vectorizer(preprocessor=uppercase).build_analyzer()
-    text = (u"J'ai mangé du kangourou  ce midi, "
-            u" c'était pas très bon.")
+    text = ("J'ai mangé du kangourou  ce midi, "
+            " c'était pas très bon.")
     expected = ['AI', 'MANGE', 'DU', 'KANGOUROU', 'CE', 'MIDI',
                 'ETAIT', 'PAS', 'TRES', 'BON']
     assert_equal(wa(text), expected)
@@ -151,8 +151,8 @@ def test_word_analyzer_unigrams(Vectorizer):
     # with custom tokenizer
     wa = Vectorizer(tokenizer=split_tokenize,
                     strip_accents='ascii').build_analyzer()
-    text = (u"J'ai mangé du kangourou  ce midi, "
-            u"c'était pas très bon.")
+    text = ("J'ai mangé du kangourou  ce midi, "
+            "c'était pas très bon.")
     expected = ["j'ai", 'mange', 'du', 'kangourou', 'ce', 'midi,',
                 "c'etait", 'pas', 'tres', 'bon.']
     assert_equal(wa(text), expected)
@@ -162,7 +162,7 @@ def test_word_analyzer_unigrams_and_bigrams():
     wa = CountVectorizer(analyzer="word", strip_accents='unicode',
                          ngram_range=(1, 2)).build_analyzer()
 
-    text = u"J'ai mangé du kangourou  ce midi, c'était pas très bon."
+    text = "J'ai mangé du kangourou  ce midi, c'était pas très bon."
     expected = ['ai', 'mange', 'du', 'kangourou', 'ce', 'midi',
                 'etait', 'pas', 'tres', 'bon', 'ai mange', 'mange du',
                 'du kangourou', 'kangourou ce', 'ce midi', 'midi etait',
@@ -173,7 +173,7 @@ def test_word_analyzer_unigrams_and_bigrams():
 def test_unicode_decode_error():
     # decode_error default to strict, so this should fail
     # First, encode (as bytes) a unicode string.
-    text = u"J'ai mangé du kangourou  ce midi, c'était pas très bon."
+    text = "J'ai mangé du kangourou  ce midi, c'était pas très bon."
     text_bytes = text.encode('utf-8')
 
     # Then let the Analyzer try to decode it as ascii. It should fail,
@@ -190,7 +190,7 @@ def test_char_ngram_analyzer():
     cnga = CountVectorizer(analyzer='char', strip_accents='unicode',
                            ngram_range=(3, 6)).build_analyzer()
 
-    text = u"J'ai mangé du kangourou  ce midi, c'était pas très bon"
+    text = "J'ai mangé du kangourou  ce midi, c'était pas très bon"
     expected = ["j'a", "'ai", 'ai ', 'i m', ' ma']
     assert_equal(cnga(text)[:5], expected)
     expected = ['s tres', ' tres ', 'tres b', 'res bo', 'es bon']
@@ -583,8 +583,8 @@ def test_feature_names():
 @pytest.mark.parametrize('Vectorizer', (CountVectorizer, TfidfVectorizer))
 def test_vectorizer_max_features(Vectorizer):
     expected_vocabulary = {'burger', 'beer', 'salad', 'pizza'}
-    expected_stop_words = {u'celeri', u'tomato', u'copyright', u'coke',
-                           u'sparkling', u'water', u'the'}
+    expected_stop_words = {'celeri', 'tomato', 'copyright', 'coke',
+                           'sparkling', 'water', 'the'}
 
     # test bounded number of extracted features
     vectorizer = Vectorizer(max_df=0.6, max_features=4)
@@ -833,9 +833,9 @@ def test_vectorizer_pipeline_cross_validation():
 def test_vectorizer_unicode():
     # tests that the count vectorizer works with cyrillic.
     document = (
-        u"Машинное обучение — обширный подраздел искусственного "
-        u"интеллекта, изучающий методы построения алгоритмов, "
-        u"способных обучаться."
+        "Машинное обучение — обширный подраздел искусственного "
+        "интеллекта, изучающий методы построения алгоритмов, "
+        "способных обучаться."
         )
 
     vect = CountVectorizer()
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 20a5ee14248e7..e9a963f07c952 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -113,7 +113,7 @@ def _check_stop_list(stop):
         return frozenset(stop)
 
 
-class VectorizerMixin(object):
+class VectorizerMixin:
     """Provides common code for text vectorizers (tokenization logic)."""
 
     _white_spaces = re.compile(r"\s\s+")
@@ -1498,7 +1498,7 @@ def __init__(self, input='content', encoding='utf-8',
                  dtype=np.float64, norm='l2', use_idf=True, smooth_idf=True,
                  sublinear_tf=False):
 
-        super(TfidfVectorizer, self).__init__(
+        super().__init__(
             input=input, encoding=encoding, decode_error=decode_error,
             strip_accents=strip_accents, lowercase=lowercase,
             preprocessor=preprocessor, tokenizer=tokenizer, analyzer=analyzer,
@@ -1580,7 +1580,7 @@ def fit(self, raw_documents, y=None):
         self : TfidfVectorizer
         """
         self._check_params()
-        X = super(TfidfVectorizer, self).fit_transform(raw_documents)
+        X = super().fit_transform(raw_documents)
         self._tfidf.fit(X)
         return self
 
@@ -1601,7 +1601,7 @@ def fit_transform(self, raw_documents, y=None):
             Tf-idf-weighted document-term matrix.
         """
         self._check_params()
-        X = super(TfidfVectorizer, self).fit_transform(raw_documents)
+        X = super().fit_transform(raw_documents)
         self._tfidf.fit(X)
         # X is already a transformed view of raw_documents so
         # we set copy to False
@@ -1629,5 +1629,5 @@ def transform(self, raw_documents, copy=True):
         """
         check_is_fitted(self, '_tfidf', 'The tfidf vector is not fitted')
 
-        X = super(TfidfVectorizer, self).transform(raw_documents)
+        X = super().transform(raw_documents)
         return self._tfidf.transform(X, copy=False)
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
index c7d360f728b1a..1f912d7c8e6a7 100644
--- a/sklearn/feature_selection/tests/test_rfe.py
+++ b/sklearn/feature_selection/tests/test_rfe.py
@@ -24,7 +24,7 @@
 from sklearn.metrics import get_scorer
 
 
-class MockClassifier(object):
+class MockClassifier:
     """
     Dummy classifier to test recursive feature elimination
     """
diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
index 653466733921a..554cb3d392b29 100644
--- a/sklearn/feature_selection/univariate_selection.py
+++ b/sklearn/feature_selection/univariate_selection.py
@@ -420,7 +420,7 @@ class SelectPercentile(_BaseFilter):
     """
 
     def __init__(self, score_func=f_classif, percentile=10):
-        super(SelectPercentile, self).__init__(score_func)
+        super().__init__(score_func)
         self.percentile = percentile
 
     def _check_params(self, X, y):
@@ -504,7 +504,7 @@ class SelectKBest(_BaseFilter):
     """
 
     def __init__(self, score_func=f_classif, k=10):
-        super(SelectKBest, self).__init__(score_func)
+        super().__init__(score_func)
         self.k = k
 
     def _check_params(self, X, y):
@@ -583,7 +583,7 @@ class SelectFpr(_BaseFilter):
     """
 
     def __init__(self, score_func=f_classif, alpha=5e-2):
-        super(SelectFpr, self).__init__(score_func)
+        super().__init__(score_func)
         self.alpha = alpha
 
     def _get_support_mask(self):
@@ -649,7 +649,7 @@ class SelectFdr(_BaseFilter):
     """
 
     def __init__(self, score_func=f_classif, alpha=5e-2):
-        super(SelectFdr, self).__init__(score_func)
+        super().__init__(score_func)
         self.alpha = alpha
 
     def _get_support_mask(self):
@@ -712,7 +712,7 @@ class SelectFwe(_BaseFilter):
     """
 
     def __init__(self, score_func=f_classif, alpha=5e-2):
-        super(SelectFwe, self).__init__(score_func)
+        super().__init__(score_func)
         self.alpha = alpha
 
     def _get_support_mask(self):
@@ -786,7 +786,7 @@ class GenericUnivariateSelect(_BaseFilter):
                         'fwe': SelectFwe}
 
     def __init__(self, score_func=f_classif, mode='percentile', param=1e-5):
-        super(GenericUnivariateSelect, self).__init__(score_func)
+        super().__init__(score_func)
         self.mode = mode
         self.param = param
 
diff --git a/sklearn/gaussian_process/gpc.py b/sklearn/gaussian_process/gpc.py
index bca6bc506de32..79bae04b4da7f 100644
--- a/sklearn/gaussian_process/gpc.py
+++ b/sklearn/gaussian_process/gpc.py
@@ -191,7 +191,7 @@ def fit(self, X, y):
                              "y contains classes %s"
                              % (self.__class__.__name__, self.classes_))
         elif self.classes_.size == 1:
-            raise ValueError("{0:s} requires 2 classes; got {1:d} class"
+            raise ValueError("{:s} requires 2 classes; got {:d} class"
                              .format(self.__class__.__name__,
                                      self.classes_.size))
 
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index b929b2f2f1b2e..2833966cea1b2 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -102,7 +102,7 @@ def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):
 
         if fixed is None:
             fixed = isinstance(bounds, str) and bounds == "fixed"
-        return super(Hyperparameter, cls).__new__(
+        return super().__new__(
             cls, name, value_type, bounds, n_elements, fixed)
 
     # This is mainly a testing utility to check that two hyperparameters
@@ -325,8 +325,8 @@ def __eq__(self, b):
         return True
 
     def __repr__(self):
-        return "{0}({1})".format(self.__class__.__name__,
-                                 ", ".join(map("{0:.3g}".format, self.theta)))
+        return "{}({})".format(self.__class__.__name__,
+                               ", ".join(map("{:.3g}".format, self.theta)))
 
     @abstractmethod
     def __call__(self, X, Y=None, eval_gradient=False):
@@ -356,7 +356,7 @@ def is_stationary(self):
         """Returns whether the kernel is stationary. """
 
 
-class NormalizedKernelMixin(object):
+class NormalizedKernelMixin:
     """Mixin for kernels which are normalized: k(X, X)=1.
 
     .. versionadded:: 0.18
@@ -382,7 +382,7 @@ def diag(self, X):
         return np.ones(X.shape[0])
 
 
-class StationaryKernelMixin(object):
+class StationaryKernelMixin:
     """Mixin for kernels which are stationary: k(X, Y)= f(X-Y).
 
     .. versionadded:: 0.18
@@ -708,7 +708,7 @@ def diag(self, X):
         return self.k1.diag(X) + self.k2.diag(X)
 
     def __repr__(self):
-        return "{0} + {1}".format(self.k1, self.k2)
+        return "{} + {}".format(self.k1, self.k2)
 
 
 class Product(KernelOperator):
@@ -783,7 +783,7 @@ def diag(self, X):
         return self.k1.diag(X) * self.k2.diag(X)
 
     def __repr__(self):
-        return "{0} * {1}".format(self.k1, self.k2)
+        return "{} * {}".format(self.k1, self.k2)
 
 
 class Exponentiation(Kernel):
@@ -936,7 +936,7 @@ def diag(self, X):
         return self.kernel.diag(X) ** self.exponent
 
     def __repr__(self):
-        return "{0} ** {1}".format(self.kernel, self.exponent)
+        return "{} ** {}".format(self.kernel, self.exponent)
 
     def is_stationary(self):
         """Returns whether the kernel is stationary. """
@@ -1038,7 +1038,7 @@ def diag(self, X):
                        dtype=np.array(self.constant_value).dtype)
 
     def __repr__(self):
-        return "{0:.3g}**2".format(np.sqrt(self.constant_value))
+        return "{:.3g}**2".format(np.sqrt(self.constant_value))
 
 
 class WhiteKernel(StationaryKernelMixin, Kernel):
@@ -1134,7 +1134,7 @@ def diag(self, X):
                        dtype=np.array(self.noise_level).dtype)
 
     def __repr__(self):
-        return "{0}(noise_level={1:.3g})".format(self.__class__.__name__,
+        return "{}(noise_level={:.3g})".format(self.__class__.__name__,
                                                  self.noise_level)
 
 
@@ -1244,11 +1244,11 @@ def __call__(self, X, Y=None, eval_gradient=False):
 
     def __repr__(self):
         if self.anisotropic:
-            return "{0}(length_scale=[{1}])".format(
-                self.__class__.__name__, ", ".join(map("{0:.3g}".format,
+            return "{}(length_scale=[{}])".format(
+                self.__class__.__name__, ", ".join(map("{:.3g}".format,
                                                    self.length_scale)))
         else:  # isotropic
-            return "{0}(length_scale={1:.3g})".format(
+            return "{}(length_scale={:.3g})".format(
                 self.__class__.__name__, np.ravel(self.length_scale)[0])
 
 
@@ -1293,7 +1293,7 @@ class Matern(RBF):
     """
     def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5),
                  nu=1.5):
-        super(Matern, self).__init__(length_scale, length_scale_bounds)
+        super().__init__(length_scale, length_scale_bounds)
         self.nu = nu
 
     def __call__(self, X, Y=None, eval_gradient=False):
@@ -1392,12 +1392,12 @@ def f(theta):  # helper function
 
     def __repr__(self):
         if self.anisotropic:
-            return "{0}(length_scale=[{1}], nu={2:.3g})".format(
+            return "{}(length_scale=[{}], nu={:.3g})".format(
                 self.__class__.__name__,
-                ", ".join(map("{0:.3g}".format, self.length_scale)),
+                ", ".join(map("{:.3g}".format, self.length_scale)),
                 self.nu)
         else:
-            return "{0}(length_scale={1:.3g}, nu={2:.3g})".format(
+            return "{}(length_scale={:.3g}, nu={:.3g})".format(
                 self.__class__.__name__, np.ravel(self.length_scale)[0],
                 self.nu)
 
@@ -1510,7 +1510,7 @@ def __call__(self, X, Y=None, eval_gradient=False):
             return K
 
     def __repr__(self):
-        return "{0}(alpha={1:.3g}, length_scale={2:.3g})".format(
+        return "{}(alpha={:.3g}, length_scale={:.3g})".format(
             self.__class__.__name__, self.alpha, self.length_scale)
 
 
@@ -1623,7 +1623,7 @@ def __call__(self, X, Y=None, eval_gradient=False):
             return K
 
     def __repr__(self):
-        return "{0}(length_scale={1:.3g}, periodicity={2:.3g})".format(
+        return "{}(length_scale={:.3g}, periodicity={:.3g})".format(
             self.__class__.__name__, self.length_scale, self.periodicity)
 
 
@@ -1732,7 +1732,7 @@ def is_stationary(self):
         return False
 
     def __repr__(self):
-        return "{0}(sigma_0={1:.3g})".format(
+        return "{}(sigma_0={:.3g})".format(
             self.__class__.__name__, self.sigma_0)
 
 
@@ -1870,5 +1870,5 @@ def is_stationary(self):
         return self.metric in ["rbf"]
 
     def __repr__(self):
-        return "{0}(gamma={1}, metric={2})".format(
+        return "{}(gamma={}, metric={})".format(
             self.__class__.__name__, self.gamma, self.metric)
diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py
index 33a769b852c59..e5eeae514dcd1 100644
--- a/sklearn/gaussian_process/tests/test_kernels.py
+++ b/sklearn/gaussian_process/tests/test_kernels.py
@@ -85,8 +85,8 @@ def test_kernel_theta(kernel):
     theta_vars = map(lambda s: s[0:-len("_bounds")],
                      filter(lambda s: s.endswith("_bounds"), args))
     assert_equal(
-        set(hyperparameter.name
-            for hyperparameter in kernel.hyperparameters),
+        {hyperparameter.name
+            for hyperparameter in kernel.hyperparameters},
         set(theta_vars))
 
     # Check that values returned in theta are consistent with
diff --git a/sklearn/impute.py b/sklearn/impute.py
index 349af27eeb91e..c4e40297b783d 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -167,8 +167,8 @@ def __init__(self, missing_values=np.nan, strategy="mean",
     def _validate_input(self, X):
         allowed_strategies = ["mean", "median", "most_frequent", "constant"]
         if self.strategy not in allowed_strategies:
-            raise ValueError("Can only use these strategies: {0} "
-                             " got strategy={1}".format(allowed_strategies,
+            raise ValueError("Can only use these strategies: {} "
+                             " got strategy={}".format(allowed_strategies,
                                                         self.strategy))
 
         if self.strategy in ("most_frequent", "constant"):
@@ -186,8 +186,8 @@ def _validate_input(self, X):
                             force_all_finite=force_all_finite, copy=self.copy)
         except ValueError as ve:
             if "could not convert" in str(ve):
-                raise ValueError("Cannot use {0} strategy with non-numeric "
-                                 "data. Received datatype :{1}."
+                raise ValueError("Cannot use {} strategy with non-numeric "
+                                 "data. Received datatype :{}."
                                  "".format(self.strategy, X.dtype.kind))
             else:
                 raise ve
@@ -195,7 +195,7 @@ def _validate_input(self, X):
         _check_inputs_dtype(X, self.missing_values)
         if X.dtype.kind not in ("i", "u", "f", "O"):
             raise ValueError("SimpleImputer does not support data with dtype "
-                             "{0}. Please provide either a numeric array (with"
+                             "{}. Please provide either a numeric array (with"
                              " a floating point or integer dtype) or "
                              "categorical data represented either as an array "
                              "with integer dtype or an array of string values "
@@ -232,7 +232,7 @@ def fit(self, X, y=None):
         if (self.strategy == "constant" and
                 X.dtype.kind in ("i", "u", "f") and
                 not isinstance(fill_value, numbers.Real)):
-            raise ValueError("'fill_value'={0} is invalid. Expected a "
+            raise ValueError("'fill_value'={} is invalid. Expected a "
                              "numerical value when imputing numerical "
                              "data".format(fill_value))
 
diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py
index 7b74048e18594..c4872fb2b8380 100644
--- a/sklearn/isotonic.py
+++ b/sklearn/isotonic.py
@@ -226,7 +226,7 @@ def _build_f(self, X, y):
         # Handle the out_of_bounds argument by setting bounds_error
         if self.out_of_bounds not in ["raise", "nan", "clip"]:
             raise ValueError("The argument ``out_of_bounds`` must be in "
-                             "'nan', 'clip', 'raise'; got {0}"
+                             "'nan', 'clip', 'raise'; got {}"
                              .format(self.out_of_bounds))
 
         bounds_error = self.out_of_bounds == "raise"
@@ -352,7 +352,7 @@ def transform(self, T):
         # Handle the out_of_bounds argument by clipping if needed
         if self.out_of_bounds not in ["raise", "nan", "clip"]:
             raise ValueError("The argument ``out_of_bounds`` must be in "
-                             "'nan', 'clip', 'raise'; got {0}"
+                             "'nan', 'clip', 'raise'; got {}"
                              .format(self.out_of_bounds))
 
         if self.out_of_bounds == "clip":
@@ -376,7 +376,7 @@ def predict(self, T):
 
     def __getstate__(self):
         """Pickle-protocol - return state of the estimator. """
-        state = super(IsotonicRegression, self).__getstate__()
+        state = super().__getstate__()
         # remove interpolation method
         state.pop('f_', None)
         return state
@@ -386,6 +386,6 @@ def __setstate__(self, state):
 
         We need to rebuild the interpolation function.
         """
-        super(IsotonicRegression, self).__setstate__(state)
+        super().__setstate__(state)
         if hasattr(self, '_necessary_X_') and hasattr(self, '_necessary_y_'):
             self._build_f(self._necessary_X_, self._necessary_y_)
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index 4bbae6462e2e8..57b78598f9617 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -302,7 +302,7 @@ def _predict_proba_lr(self, X):
             return prob
 
 
-class SparseCoefMixin(object):
+class SparseCoefMixin:
     """Mixin for converting coef_ to and from CSR format.
 
     L1-regularizing estimators should inherit this.
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 3aac3b480169f..f414c3ba4b32f 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -800,7 +800,7 @@ def _decision_function(self, X):
             return safe_sparse_dot(X, self.coef_.T,
                                    dense_output=True) + self.intercept_
         else:
-            return super(ElasticNet, self)._decision_function(X)
+            return super()._decision_function(X)
 
 
 ###############################################################################
@@ -932,7 +932,7 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
                  precompute=False, copy_X=True, max_iter=1000,
                  tol=1e-4, warm_start=False, positive=False,
                  random_state=None, selection='cyclic'):
-        super(Lasso, self).__init__(
+        super().__init__(
             alpha=alpha, l1_ratio=1.0, fit_intercept=fit_intercept,
             normalize=normalize, precompute=precompute, copy_X=copy_X,
             max_iter=max_iter, tol=tol, warm_start=warm_start,
@@ -1224,9 +1224,9 @@ def fit(self, X, y):
             self.alphas_ = np.asarray(alphas[0])
 
         # Refit the model with the parameters selected
-        common_params = dict((name, value)
-                             for name, value in self.get_params().items()
-                             if name in model.get_params())
+        common_params = {name: value
+                         for name, value in self.get_params().items()
+                         if name in model.get_params()}
         model.set_params(**common_params)
         model.alpha = best_alpha
         model.l1_ratio = best_l1_ratio
@@ -1401,7 +1401,7 @@ def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
                  normalize=False, precompute='auto', max_iter=1000, tol=1e-4,
                  copy_X=True, cv='warn', verbose=False, n_jobs=None,
                  positive=False, random_state=None, selection='cyclic'):
-        super(LassoCV, self).__init__(
+        super().__init__(
             eps=eps, n_alphas=n_alphas, alphas=alphas,
             fit_intercept=fit_intercept, normalize=normalize,
             precompute=precompute, max_iter=max_iter, tol=tol, copy_X=copy_X,
@@ -2282,7 +2282,7 @@ def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
                  normalize=False, max_iter=1000, tol=1e-4, copy_X=True,
                  cv='warn', verbose=False, n_jobs=None, random_state=None,
                  selection='cyclic'):
-        super(MultiTaskLassoCV, self).__init__(
+        super().__init__(
             eps=eps, n_alphas=n_alphas, alphas=alphas,
             fit_intercept=fit_intercept, normalize=normalize,
             max_iter=max_iter, tol=tol, copy_X=copy_X,
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index 2079fe3d11379..d774a6353fa6f 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -1097,7 +1097,7 @@ def __init__(self, fit_intercept=True, verbose=False, max_iter=500,
         self.cv = cv
         self.max_n_alphas = max_n_alphas
         self.n_jobs = n_jobs
-        super(LarsCV, self).__init__(fit_intercept=fit_intercept,
+        super().__init__(fit_intercept=fit_intercept,
                                      verbose=verbose, normalize=normalize,
                                      precompute=precompute,
                                      n_nonzero_coefs=500,
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 3e812902a437d..d593ffbff0eab 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1649,7 +1649,7 @@ def predict_proba(self, X):
                (self.multi_class == 'auto' and (self.classes_.size <= 2 or
                                                 self.solver == 'liblinear')))
         if ovr:
-            return super(LogisticRegression, self)._predict_proba_lr(X)
+            return super()._predict_proba_lr(X)
         else:
             decision = self.decision_function(X)
             if decision.ndim == 1:
@@ -2008,8 +2008,8 @@ def fit(self, X, y, sample_weight=None):
         label_encoder = LabelEncoder().fit(y)
         y = label_encoder.transform(y)
         if isinstance(class_weight, dict):
-            class_weight = dict((label_encoder.transform([cls])[0], v)
-                                for cls, v in class_weight.items())
+            class_weight = {label_encoder.transform([cls])[0]: v
+                            for cls, v in class_weight.items()}
 
         # The original class labels
         classes = self.classes_ = label_encoder.classes_
diff --git a/sklearn/linear_model/passive_aggressive.py b/sklearn/linear_model/passive_aggressive.py
index 3d900f937f35c..cd00d67e9ce8e 100644
--- a/sklearn/linear_model/passive_aggressive.py
+++ b/sklearn/linear_model/passive_aggressive.py
@@ -172,7 +172,7 @@ def __init__(self, C=1.0, fit_intercept=True, max_iter=None, tol=None,
                  n_iter_no_change=5, shuffle=True, verbose=0, loss="hinge",
                  n_jobs=None, random_state=None, warm_start=False,
                  class_weight=None, average=False, n_iter=None):
-        super(PassiveAggressiveClassifier, self).__init__(
+        super().__init__(
             penalty=None,
             fit_intercept=fit_intercept,
             max_iter=max_iter,
@@ -411,7 +411,7 @@ def __init__(self, C=1.0, fit_intercept=True, max_iter=None, tol=None,
                  loss="epsilon_insensitive", epsilon=DEFAULT_EPSILON,
                  random_state=None, warm_start=False,
                  average=False, n_iter=None):
-        super(PassiveAggressiveRegressor, self).__init__(
+        super().__init__(
             penalty=None,
             l1_ratio=0,
             epsilon=epsilon,
diff --git a/sklearn/linear_model/perceptron.py b/sklearn/linear_model/perceptron.py
index d55e4a5f50d86..325103e21c84e 100644
--- a/sklearn/linear_model/perceptron.py
+++ b/sklearn/linear_model/perceptron.py
@@ -154,7 +154,7 @@ def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True,
                  n_jobs=None, random_state=0, early_stopping=False,
                  validation_fraction=0.1, n_iter_no_change=5,
                  class_weight=None, warm_start=False, n_iter=None):
-        super(Perceptron, self).__init__(
+        super().__init__(
             loss="perceptron", penalty=penalty, alpha=alpha, l1_ratio=0,
             fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,
             shuffle=shuffle, verbose=verbose, random_state=random_state,
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 06028f441900c..9e3110b8547ad 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -653,7 +653,7 @@ class Ridge(_BaseRidge, RegressorMixin):
     def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
                  copy_X=True, max_iter=None, tol=1e-3, solver="auto",
                  random_state=None):
-        super(Ridge, self).__init__(alpha=alpha, fit_intercept=fit_intercept,
+        super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                                     normalize=normalize, copy_X=copy_X,
                                     max_iter=max_iter, tol=tol, solver=solver,
                                     random_state=random_state)
@@ -676,7 +676,7 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : returns an instance of self.
         """
-        return super(Ridge, self).fit(X, y, sample_weight=sample_weight)
+        return super().fit(X, y, sample_weight=sample_weight)
 
 
 class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
@@ -802,7 +802,7 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
     def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
                  copy_X=True, max_iter=None, tol=1e-3, class_weight=None,
                  solver="auto", random_state=None):
-        super(RidgeClassifier, self).__init__(
+        super().__init__(
             alpha=alpha, fit_intercept=fit_intercept, normalize=normalize,
             copy_X=copy_X, max_iter=max_iter, tol=tol, solver=solver,
             random_state=random_state)
@@ -849,7 +849,7 @@ def fit(self, X, y, sample_weight=None):
             sample_weight = (sample_weight *
                              compute_sample_weight(self.class_weight, y))
 
-        super(RidgeClassifier, self).fit(X, Y, sample_weight=sample_weight)
+        super().fit(X, Y, sample_weight=sample_weight)
         return self
 
     @property
@@ -1385,7 +1385,7 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
     def __init__(self, alphas=(0.1, 1.0, 10.0), fit_intercept=True,
                  normalize=False, scoring=None, cv=None, class_weight=None,
                  store_cv_values=False):
-        super(RidgeClassifierCV, self).__init__(
+        super().__init__(
             alphas=alphas, fit_intercept=fit_intercept, normalize=normalize,
             scoring=scoring, cv=cv, store_cv_values=store_cv_values)
         self.class_weight = class_weight
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 17aad4521e54e..3b3ab0e95989c 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -44,7 +44,7 @@
 # Default value of ``epsilon`` parameter.
 
 
-class _ValidationScoreCallback(object):
+class _ValidationScoreCallback:
     """Callback for early stopping based on validation score"""
 
     def __init__(self, estimator, X_val, y_val, sample_weight_val,
@@ -100,7 +100,7 @@ def __init__(self, loss, penalty='l2', alpha=0.0001, C=1.0,
         self._validate_params(set_max_iter=False)
 
     def set_params(self, *args, **kwargs):
-        super(BaseSGD, self).set_params(*args, **kwargs)
+        super().set_params(*args, **kwargs)
         self._validate_params(set_max_iter=False)
         return self
 
@@ -493,7 +493,7 @@ def __init__(self, loss="hinge", penalty='l2', alpha=0.0001,
                  class_weight=None, warm_start=False, average=False,
                  n_iter=None):
 
-        super(BaseSGDClassifier, self).__init__(
+        super().__init__(
             loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,
             fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,
             shuffle=shuffle, verbose=verbose, epsilon=epsilon,
@@ -975,7 +975,7 @@ def __init__(self, loss="hinge", penalty='l2', alpha=0.0001, l1_ratio=0.15,
                  power_t=0.5, early_stopping=False, validation_fraction=0.1,
                  n_iter_no_change=5, class_weight=None, warm_start=False,
                  average=False, n_iter=None):
-        super(SGDClassifier, self).__init__(
+        super().__init__(
             loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,
             fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,
             shuffle=shuffle, verbose=verbose, epsilon=epsilon, n_jobs=n_jobs,
@@ -1119,7 +1119,7 @@ def __init__(self, loss="squared_loss", penalty="l2", alpha=0.0001,
                  power_t=0.25, early_stopping=False, validation_fraction=0.1,
                  n_iter_no_change=5, warm_start=False, average=False,
                  n_iter=None):
-        super(BaseSGDRegressor, self).__init__(
+        super().__init__(
             loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,
             fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,
             shuffle=shuffle, verbose=verbose, epsilon=epsilon,
@@ -1574,7 +1574,7 @@ def __init__(self, loss="squared_loss", penalty="l2", alpha=0.0001,
                  power_t=0.25, early_stopping=False, validation_fraction=0.1,
                  n_iter_no_change=5, warm_start=False, average=False,
                  n_iter=None):
-        super(SGDRegressor, self).__init__(
+        super().__init__(
             loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,
             fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,
             shuffle=shuffle, verbose=verbose, epsilon=epsilon,
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index f861c05425d6a..517781c8e6cfd 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -111,7 +111,7 @@ def test_error():
 @pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_logistic_cv_mock_scorer():
 
-    class MockScorer(object):
+    class MockScorer:
         def __init__(self):
             self.calls = 0
             self.scores = [0.1, 0.4, 0.8, 0.5]
diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py
index 7085129163d9b..e46949c256e9f 100644
--- a/sklearn/linear_model/tests/test_perceptron.py
+++ b/sklearn/linear_model/tests/test_perceptron.py
@@ -20,7 +20,7 @@
 X_csr.sort_indices()
 
 
-class MyPerceptron(object):
+class MyPerceptron:
 
     def __init__(self, n_iter=1):
         self.n_iter = n_iter
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index af67b963d3f0c..629933a3bc8e9 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -42,19 +42,19 @@ class SparseSGDClassifier(SGDClassifier):
 
     def fit(self, X, y, *args, **kw):
         X = sp.csr_matrix(X)
-        return super(SparseSGDClassifier, self).fit(X, y, *args, **kw)
+        return super().fit(X, y, *args, **kw)
 
     def partial_fit(self, X, y, *args, **kw):
         X = sp.csr_matrix(X)
-        return super(SparseSGDClassifier, self).partial_fit(X, y, *args, **kw)
+        return super().partial_fit(X, y, *args, **kw)
 
     def decision_function(self, X):
         X = sp.csr_matrix(X)
-        return super(SparseSGDClassifier, self).decision_function(X)
+        return super().decision_function(X)
 
     def predict_proba(self, X):
         X = sp.csr_matrix(X)
-        return super(SparseSGDClassifier, self).predict_proba(X)
+        return super().predict_proba(X)
 
 
 class SparseSGDRegressor(SGDRegressor):
@@ -113,7 +113,7 @@ def decision_function(self, X, *args, **kw):
 ###############################################################################
 # Tests common to classification and regression
 
-class CommonTest(object):
+class CommonTest:
 
     def factory(self, **kwargs):
         if "random_state" not in kwargs:
diff --git a/sklearn/linear_model/theil_sen.py b/sklearn/linear_model/theil_sen.py
index 863a5d54672be..285ca554dbe5a 100644
--- a/sklearn/linear_model/theil_sen.py
+++ b/sklearn/linear_model/theil_sen.py
@@ -318,18 +318,18 @@ def _check_subparams(self, n_samples, n_features):
         if n_subsamples is not None:
             if n_subsamples > n_samples:
                 raise ValueError("Invalid parameter since n_subsamples > "
-                                 "n_samples ({0} > {1}).".format(n_subsamples,
+                                 "n_samples ({} > {}).".format(n_subsamples,
                                                                  n_samples))
             if n_samples >= n_features:
                 if n_dim > n_subsamples:
                     plus_1 = "+1" if self.fit_intercept else ""
-                    raise ValueError("Invalid parameter since n_features{0} "
-                                     "> n_subsamples ({1} > {2})."
+                    raise ValueError("Invalid parameter since n_features{} "
+                                     "> n_subsamples ({} > {})."
                                      "".format(plus_1, n_dim, n_samples))
             else:  # if n_samples < n_features
                 if n_subsamples != n_samples:
                     raise ValueError("Invalid parameter since n_subsamples != "
-                                     "n_samples ({0} != {1}) while n_samples "
+                                     "n_samples ({} != {}) while n_samples "
                                      "< n_features.".format(n_subsamples,
                                                             n_samples))
         else:
@@ -337,7 +337,7 @@ def _check_subparams(self, n_samples, n_features):
 
         if self.max_subpopulation <= 0:
             raise ValueError("Subpopulation must be strictly positive "
-                             "({0} <= 0).".format(self.max_subpopulation))
+                             "({} <= 0).".format(self.max_subpopulation))
 
         all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))
         n_subpopulation = int(min(self.max_subpopulation, all_combinations))
@@ -366,11 +366,11 @@ def fit(self, X, y):
         self.breakdown_ = _breakdown_point(n_samples, n_subsamples)
 
         if self.verbose:
-            print("Breakdown point: {0}".format(self.breakdown_))
-            print("Number of samples: {0}".format(n_samples))
+            print("Breakdown point: {}".format(self.breakdown_))
+            print("Number of samples: {}".format(n_samples))
             tol_outliers = int(self.breakdown_ * n_samples)
-            print("Tolerable outliers: {0}".format(tol_outliers))
-            print("Number of subpopulations: {0}".format(
+            print("Tolerable outliers: {}".format(tol_outliers))
+            print("Number of subpopulations: {}".format(
                 self.n_subpopulation_))
 
         # Determine indices of subpopulation
diff --git a/sklearn/metrics/base.py b/sklearn/metrics/base.py
index 1877ee4e43f7c..e282f10fbb09e 100644
--- a/sklearn/metrics/base.py
+++ b/sklearn/metrics/base.py
@@ -66,12 +66,12 @@ def _average_binary_score(binary_metric, y_true, y_score, average,
     """
     average_options = (None, 'micro', 'macro', 'weighted', 'samples')
     if average not in average_options:
-        raise ValueError('average has to be one of {0}'
+        raise ValueError('average has to be one of {}'
                          ''.format(average_options))
 
     y_type = type_of_target(y_true)
     if y_type not in ("binary", "multilabel-indicator"):
-        raise ValueError("{0} format is not supported".format(y_type))
+        raise ValueError("{} format is not supported".format(y_type))
 
     if y_type == "binary":
         return binary_metric(y_true, y_score, sample_weight=sample_weight)
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 7f5250f8fa3bb..a9d22fbc9af6c 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -78,15 +78,15 @@ def _check_targets(y_true, y_pred):
         y_type = {"multiclass"}
 
     if len(y_type) > 1:
-        raise ValueError("Classification metrics can't handle a mix of {0} "
-                         "and {1} targets".format(type_true, type_pred))
+        raise ValueError("Classification metrics can't handle a mix of {} "
+                         "and {} targets".format(type_true, type_pred))
 
     # We can't have more than one value on y_type => The set is no more needed
     y_type = y_type.pop()
 
     # No metrics support "multiclass-multioutput" format
     if (y_type not in ["binary", "multiclass", "multilabel-indicator"]):
-        raise ValueError("{0} is not supported".format(y_type))
+        raise ValueError("{} is not supported".format(y_type))
 
     if y_type in ["binary", "multiclass"]:
         y_true = column_or_1d(y_true)
@@ -270,7 +270,7 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None):
     check_consistent_length(y_true, y_pred, sample_weight)
 
     n_labels = labels.size
-    label_to_ind = dict((y, x) for x, y in enumerate(labels))
+    label_to_ind = {y: x for x, y in enumerate(labels)}
     # convert yt, yp into index
     y_pred = np.array([label_to_ind.get(x, n_labels + 1) for x in y_pred])
     y_true = np.array([label_to_ind.get(x, n_labels + 1) for x in y_true])
@@ -1074,9 +1074,9 @@ def _prf_divide(numerator, denominator, metric, modifier, average, warn_for):
         axis0, axis1 = axis1, axis0
 
     if metric in warn_for and 'f-score' in warn_for:
-        msg_start = '{0} and F-score are'.format(metric.title())
+        msg_start = '{} and F-score are'.format(metric.title())
     elif metric in warn_for:
-        msg_start = '{0} is'.format(metric.title())
+        msg_start = '{} is'.format(metric.title())
     elif 'f-score' in warn_for:
         msg_start = 'F-score is'
     else:
@@ -1087,7 +1087,7 @@ def _prf_divide(numerator, denominator, metric, modifier, average, warn_for):
     if len(mask) == 1:
         msg = msg.format('due to')
     else:
-        msg = msg.format('in {0}s with'.format(axis1))
+        msg = msg.format('in {}s with'.format(axis1))
     warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
     return result
 
@@ -1713,17 +1713,17 @@ class 2       1.00      0.67      0.80         3
     if target_names is not None and len(labels) != len(target_names):
         if labels_given:
             warnings.warn(
-                "labels size, {0}, does not match size of target_names, {1}"
+                "labels size, {}, does not match size of target_names, {}"
                 .format(len(labels), len(target_names))
             )
         else:
             raise ValueError(
-                "Number of classes, {0}, does not match size of "
-                "target_names, {1}. Try specifying the labels "
+                "Number of classes, {}, does not match size of "
+                "target_names, {}. Try specifying the labels "
                 "parameter".format(len(labels), len(target_names))
             )
     if target_names is None:
-        target_names = [u'%s' % l for l in labels]
+        target_names = ['%s' % l for l in labels]
 
     headers = ["precision", "recall", "f1-score", "support"]
     # compute per-class results without averaging
@@ -1747,13 +1747,13 @@ class 2       1.00      0.67      0.80         3
         longest_last_line_heading = 'weighted avg'
         name_width = max(len(cn) for cn in target_names)
         width = max(name_width, len(longest_last_line_heading), digits)
-        head_fmt = u'{:>{width}s} ' + u' {:>9}' * len(headers)
-        report = head_fmt.format(u'', *headers, width=width)
-        report += u'\n\n'
-        row_fmt = u'{:>{width}s} ' + u' {:>9.{digits}f}' * 3 + u' {:>9}\n'
+        head_fmt = '{:>{width}s} ' + ' {:>9}' * len(headers)
+        report = head_fmt.format('', *headers, width=width)
+        report += '\n\n'
+        row_fmt = '{:>{width}s} ' + ' {:>9.{digits}f}' * 3 + ' {:>9}\n'
         for row in rows:
             report += row_fmt.format(*row, width=width, digits=digits)
-        report += u'\n'
+        report += '\n'
 
     # compute all applicable averages
     for average in average_options:
@@ -1773,9 +1773,9 @@ class 2       1.00      0.67      0.80         3
                 zip(headers, [i.item() for i in avg]))
         else:
             if line_heading == 'accuracy':
-                row_fmt_accuracy = u'{:>{width}s} ' + \
-                        u' {:>9.{digits}}' * 2 + u' {:>9.{digits}f}' + \
-                        u' {:>9}\n'
+                row_fmt_accuracy = '{:>{width}s} ' + \
+                        ' {:>9.{digits}}' * 2 + ' {:>9.{digits}f}' + \
+                        ' {:>9}\n'
                 report += row_fmt_accuracy.format(line_heading, '', '',
                                                   *avg[2:], width=width,
                                                   digits=digits)
@@ -1894,7 +1894,7 @@ def hamming_loss(y_true, y_pred, labels=None, sample_weight=None):
     elif y_type in ["binary", "multiclass"]:
         return _weighted_sum(y_true != y_pred, sample_weight, normalize=True)
     else:
-        raise ValueError("{0} is not supported".format(y_type))
+        raise ValueError("{} is not supported".format(y_type))
 
 
 def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
@@ -1974,13 +1974,13 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
 
     if len(lb.classes_) == 1:
         if labels is None:
-            raise ValueError('y_true contains only one label ({0}). Please '
+            raise ValueError('y_true contains only one label ({}). Please '
                              'provide the true labels explicitly through the '
                              'labels argument.'.format(lb.classes_[0]))
         else:
             raise ValueError('The labels array needs to contain at least two '
                              'labels for log_loss, '
-                             'got {0}.'.format(lb.classes_))
+                             'got {}.'.format(lb.classes_))
 
     transformed_labels = lb.transform(y_true)
 
@@ -2003,16 +2003,16 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
     if len(lb.classes_) != y_pred.shape[1]:
         if labels is None:
             raise ValueError("y_true and y_pred contain different number of "
-                             "classes {0}, {1}. Please provide the true "
+                             "classes {}, {}. Please provide the true "
                              "labels explicitly through the labels argument. "
                              "Classes found in "
-                             "y_true: {2}".format(transformed_labels.shape[1],
+                             "y_true: {}".format(transformed_labels.shape[1],
                                                   y_pred.shape[1],
                                                   lb.classes_))
         else:
             raise ValueError('The number of classes in labels is different '
                              'from that in y_pred. Classes found in '
-                             'labels: {0}'.format(lb.classes_))
+                             'labels: {}'.format(lb.classes_))
 
     # Renormalize
     y_pred /= y_pred.sum(axis=1)[:, np.newaxis]
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 1b1c2674c35ec..11404c51f0a79 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -1577,8 +1577,8 @@ def pairwise_kernels(X, Y=None, metric="linear", filter_params=False,
         func = metric.__call__
     elif metric in PAIRWISE_KERNEL_FUNCTIONS:
         if filter_params:
-            kwds = dict((k, kwds[k]) for k in kwds
-                        if k in KERNEL_PARAMS[metric])
+            kwds = {k: kwds[k] for k in kwds
+                    if k in KERNEL_PARAMS[metric]}
         func = PAIRWISE_KERNEL_FUNCTIONS[metric]
     elif callable(metric):
         func = partial(_pairwise_callable, metric=metric, **kwds)
diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index 10331c87e3e16..0ce83bdce6030 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -394,7 +394,7 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
     y_type = type_of_target(y_true)
     if not (y_type == "binary" or
             (y_type == "multiclass" and pos_label is not None)):
-        raise ValueError("{0} format is not supported".format(y_type))
+        raise ValueError("{} format is not supported".format(y_type))
 
     check_consistent_length(y_true, y_score, sample_weight)
     y_true = column_or_1d(y_true)
@@ -715,7 +715,7 @@ def label_ranking_average_precision_score(y_true, y_score, sample_weight=None):
     y_type = type_of_target(y_true)
     if (y_type != "multilabel-indicator" and
             not (y_type == "binary" and y_true.ndim == 2)):
-        raise ValueError("{0} format is not supported".format(y_type))
+        raise ValueError("{} format is not supported".format(y_type))
 
     y_true = csr_matrix(y_true)
     y_score = -y_score
@@ -794,7 +794,7 @@ def coverage_error(y_true, y_score, sample_weight=None):
 
     y_type = type_of_target(y_true)
     if y_type != "multilabel-indicator":
-        raise ValueError("{0} format is not supported".format(y_type))
+        raise ValueError("{} format is not supported".format(y_type))
 
     if y_true.shape != y_score.shape:
         raise ValueError("y_true and y_score have different shape")
@@ -853,7 +853,7 @@ def label_ranking_loss(y_true, y_score, sample_weight=None):
 
     y_type = type_of_target(y_true)
     if y_type not in ("multilabel-indicator",):
-        raise ValueError("{0} format is not supported".format(y_type))
+        raise ValueError("{} format is not supported".format(y_type))
 
     if y_true.shape != y_score.shape:
         raise ValueError("y_true and y_score have different shape")
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index 649495d9e0894..95dadb27203d6 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -87,7 +87,7 @@ def _check_reg_targets(y_true, y_pred, multioutput):
 
     if y_true.shape[1] != y_pred.shape[1]:
         raise ValueError("y_true and y_pred have different number of output "
-                         "({0}!={1})".format(y_true.shape[1], y_pred.shape[1]))
+                         "({}!={})".format(y_true.shape[1], y_pred.shape[1]))
 
     n_outputs = y_true.shape[1]
     allowed_multioutput_str = ('raw_values', 'uniform_average',
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index f93736ed097a3..3f3808d8ca88a 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -43,7 +43,7 @@
 from ..base import is_regressor
 
 
-class _BaseScorer(object, metaclass=ABCMeta):
+class _BaseScorer(metaclass=ABCMeta):
     def __init__(self, score_func, sign, kwargs):
         self._kwargs = kwargs
         self._score_func = score_func
@@ -172,7 +172,7 @@ def __call__(self, clf, X, y, sample_weight=None):
         """
         y_type = type_of_target(y)
         if y_type not in ("binary", "multilabel-indicator"):
-            raise ValueError("{0} format is not supported".format(y_type))
+            raise ValueError("{} format is not supported".format(y_type))
 
         if is_regressor(clf):
             y_pred = clf.predict(X)
@@ -296,7 +296,7 @@ def check_scoring(estimator, scoring=None, allow_none=False):
     elif isinstance(scoring, Iterable):
         raise ValueError("For evaluating multiple scores, use "
                          "sklearn.model_selection.cross_validate instead. "
-                         "{0} was passed.".format(scoring))
+                         "{} was passed.".format(scoring))
     else:
         raise ValueError("scoring value should either be a callable, string or"
                          " None. %r was passed" % scoring)
@@ -537,6 +537,6 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
                      ('recall', recall_score), ('f1', f1_score)]:
     SCORERS[name] = make_scorer(metric)
     for average in ['macro', 'micro', 'samples', 'weighted']:
-        qualified_name = '{0}_{1}'.format(name, average)
+        qualified_name = '{}_{}'.format(name, average)
         SCORERS[qualified_name] = make_scorer(metric, pos_label=None,
                                               average=average)
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index de72337d4024b..4b4daaecd1c77 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -998,11 +998,11 @@ def test_classification_report_multiclass_with_string_label():
 def test_classification_report_multiclass_with_unicode_label():
     y_true, y_pred, _ = make_prediction(binary=False)
 
-    labels = np.array([u"blue\xa2", u"green\xa2", u"red\xa2"])
+    labels = np.array(["blue\xa2", "green\xa2", "red\xa2"])
     y_true = labels[y_true]
     y_pred = labels[y_pred]
 
-    expected_report = u"""\
+    expected_report = """\
               precision    recall  f1-score   support
 
        blue\xa2       0.83      0.79      0.81        24
@@ -1584,14 +1584,14 @@ def test__check_targets():
             if type1 != type2:
                 assert_raise_message(
                     ValueError,
-                    "Classification metrics can't handle a mix of {0} and {1} "
+                    "Classification metrics can't handle a mix of {} and {} "
                     "targets".format(type1, type2),
                     _check_targets, y1, y2)
 
             else:
                 if type1 not in (BIN, MC, IND):
                     assert_raise_message(ValueError,
-                                         "{0} is not supported".format(type1),
+                                         "{} is not supported".format(type1),
                                          _check_targets, y1, y2)
 
         else:
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index de304feb47847..337b5d83568ea 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -460,7 +460,7 @@ def test_symmetry():
 
     assert_equal(
         SYMMETRIC_METRICS.intersection(NOT_SYMMETRIC_METRICS),
-        set([]))
+        set())
 
     # Symmetric metric
     for name in SYMMETRIC_METRICS:
@@ -633,26 +633,26 @@ def test_classification_invariance_string_vs_numbers_labels(name):
         measure_with_str = metric_str(y1_str, y2_str)
 
         assert_array_equal(measure_with_number, measure_with_str,
-                           err_msg="{0} failed string vs number invariance "
+                           err_msg="{} failed string vs number invariance "
                                    "test".format(name))
 
         measure_with_strobj = metric_str(y1_str.astype('O'),
                                          y2_str.astype('O'))
         assert_array_equal(measure_with_number, measure_with_strobj,
-                           err_msg="{0} failed string object vs number "
+                           err_msg="{} failed string object vs number "
                                    "invariance test".format(name))
 
         if name in METRICS_WITH_LABELS:
             metric_str = partial(metric_str, labels=labels_str)
             measure_with_str = metric_str(y1_str, y2_str)
             assert_array_equal(measure_with_number, measure_with_str,
-                               err_msg="{0} failed string vs number  "
+                               err_msg="{} failed string vs number  "
                                        "invariance test".format(name))
 
             measure_with_strobj = metric_str(y1_str.astype('O'),
                                              y2_str.astype('O'))
             assert_array_equal(measure_with_number, measure_with_strobj,
-                               err_msg="{0} failed string vs number  "
+                               err_msg="{} failed string vs number  "
                                        "invariance test".format(name))
 
 
@@ -678,12 +678,12 @@ def test_thresholded_invariance_string_vs_numbers_labels(name):
             measure_with_number = metric(y1, y2)
             measure_with_str = metric_str(y1_str, y2)
             assert_array_equal(measure_with_number, measure_with_str,
-                               err_msg="{0} failed string vs number "
+                               err_msg="{} failed string vs number "
                                        "invariance test".format(name))
 
             measure_with_strobj = metric_str(y1_str.astype('O'), y2)
             assert_array_equal(measure_with_number, measure_with_strobj,
-                               err_msg="{0} failed string object vs number "
+                               err_msg="{} failed string object vs number "
                                        "invariance test".format(name))
         else:
             # TODO those metrics doesn't support string label yet
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 2ab4b6b72e3a7..5f3ad9cde2497 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -108,7 +108,7 @@ def teardown_module():
     shutil.rmtree(TEMP_FOLDER)
 
 
-class EstimatorWithoutFit(object):
+class EstimatorWithoutFit:
     """Dummy estimator to test scoring validators"""
     pass
 
@@ -119,7 +119,7 @@ def fit(self, X, y):
         return self
 
 
-class EstimatorWithFitAndScore(object):
+class EstimatorWithFitAndScore:
     """Dummy estimator to test scoring validators"""
     def fit(self, X, y):
         return self
@@ -128,7 +128,7 @@ def score(self, X, y):
         return 1.0
 
 
-class EstimatorWithFitAndPredict(object):
+class EstimatorWithFitAndPredict:
     """Dummy estimator to test scoring validators"""
     def fit(self, X, y):
         self.y = y
@@ -138,7 +138,7 @@ def predict(self, X):
         return self.y
 
 
-class DummyScorer(object):
+class DummyScorer:
     """Dummy scorer that always returns 1."""
     def __call__(self, est, X, y):
         return 1
@@ -486,19 +486,19 @@ def test_scorer_sample_weight():
             ignored = scorer(estimator[name], X_test[10:], target[10:])
             unweighted = scorer(estimator[name], X_test, target)
             assert_not_equal(weighted, unweighted,
-                             msg="scorer {0} behaves identically when "
-                             "called with sample weights: {1} vs "
-                             "{2}".format(name, weighted, unweighted))
+                             msg="scorer {} behaves identically when "
+                             "called with sample weights: {} vs "
+                             "{}".format(name, weighted, unweighted))
             assert_almost_equal(weighted, ignored,
-                                err_msg="scorer {0} behaves differently when "
+                                err_msg="scorer {} behaves differently when "
                                 "ignoring samples and setting sample_weight to"
-                                " 0: {1} vs {2}".format(name, weighted,
+                                " 0: {} vs {}".format(name, weighted,
                                                         ignored))
 
         except TypeError as e:
             assert "sample_weight" in str(e), (
-                "scorer {0} raises unhelpful exception when called "
-                "with sample weights: {1}".format(name, str(e)))
+                "scorer {} raises unhelpful exception when called "
+                "with sample weights: {}".format(name, str(e)))
 
 
 @ignore_warnings  # UndefinedMetricWarning for P / R scores
diff --git a/sklearn/mixture/bayesian_mixture.py b/sklearn/mixture/bayesian_mixture.py
index 749422ad1e83f..76295c47c0d28 100644
--- a/sklearn/mixture/bayesian_mixture.py
+++ b/sklearn/mixture/bayesian_mixture.py
@@ -314,7 +314,7 @@ def __init__(self, n_components=1, covariance_type='full', tol=1e-3,
                  degrees_of_freedom_prior=None, covariance_prior=None,
                  random_state=None, warm_start=False, verbose=0,
                  verbose_interval=10):
-        super(BayesianGaussianMixture, self).__init__(
+        super().__init__(
             n_components=n_components, tol=tol, reg_covar=reg_covar,
             max_iter=max_iter, n_init=n_init, init_params=init_params,
             random_state=random_state, warm_start=warm_start,
diff --git a/sklearn/mixture/gaussian_mixture.py b/sklearn/mixture/gaussian_mixture.py
index 4e9b5d5dc904c..9e3bf3cb4c381 100644
--- a/sklearn/mixture/gaussian_mixture.py
+++ b/sklearn/mixture/gaussian_mixture.py
@@ -590,7 +590,7 @@ def __init__(self, n_components=1, covariance_type='full', tol=1e-3,
                  weights_init=None, means_init=None, precisions_init=None,
                  random_state=None, warm_start=False,
                  verbose=0, verbose_interval=10):
-        super(GaussianMixture, self).__init__(
+        super().__init__(
             n_components=n_components, tol=tol, reg_covar=reg_covar,
             max_iter=max_iter, n_init=n_init, init_params=init_params,
             random_state=random_state, warm_start=warm_start,
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 66d34f89d4be7..4d549ccd7b9d1 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -68,7 +68,7 @@ def generate_data(n_samples, n_features, weights, means, precisions,
     return X
 
 
-class RandomData(object):
+class RandomData:
     def __init__(self, rng, n_samples=500, n_components=2, n_features=2,
                  scale=50):
         self.n_samples = n_samples
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 10076c5a967d7..62a075fa4d02f 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -44,7 +44,7 @@
            'ParameterSampler', 'RandomizedSearchCV']
 
 
-class ParameterGrid(object):
+class ParameterGrid:
     """Grid of parameters with a discrete number of values for each.
 
     Can be used to iterate over parameter value combinations with the
@@ -180,7 +180,7 @@ def __getitem__(self, ind):
         raise IndexError('ParameterGrid index out of range')
 
 
-class ParameterSampler(object):
+class ParameterSampler:
     """Generator on parameters sampled from given distributions.
 
     Non-deterministic iterable over random candidate combinations for hyper-
@@ -367,12 +367,12 @@ def _check_param_grid(param_grid):
 
             if (isinstance(v, str) or
                     not isinstance(v, (np.ndarray, Sequence))):
-                raise ValueError("Parameter values for parameter ({0}) need "
+                raise ValueError("Parameter values for parameter ({}) need "
                                  "to be a sequence(but not a string) or"
                                  " np.ndarray.".format(name))
 
             if len(v) == 0:
-                raise ValueError("Parameter values for parameter ({0}) need "
+                raise ValueError("Parameter values for parameter ({}) need "
                                  "to be a non-empty sequence.".format(name))
 
 
@@ -655,8 +655,8 @@ def evaluate_candidates(candidate_params):
                 n_candidates = len(candidate_params)
 
                 if self.verbose > 0:
-                    print("Fitting {0} folds for each of {1} candidates,"
-                          " totalling {2} fits".format(
+                    print("Fitting {} folds for each of {} candidates,"
+                          " totalling {} fits".format(
                               n_splits, n_candidates, n_candidates * n_splits))
 
                 out = parallel(delayed(_fit_and_score)(clone(base_estimator),
@@ -1128,7 +1128,7 @@ def __init__(self, estimator, param_grid, scoring=None,
                  n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
                  pre_dispatch='2*n_jobs', error_score='raise-deprecating',
                  return_train_score=False):
-        super(GridSearchCV, self).__init__(
+        super().__init__(
             estimator=estimator, scoring=scoring,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
@@ -1447,7 +1447,7 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
         self.param_distributions = param_distributions
         self.n_iter = n_iter
         self.random_state = random_state
-        super(RandomizedSearchCV, self).__init__(
+        super().__init__(
             estimator=estimator, scoring=scoring,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 5254d2905cb80..7aef72cfc99ec 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -282,11 +282,11 @@ def __init__(self, n_splits, shuffle, random_state):
             raise ValueError(
                 "k-fold cross-validation requires at least one"
                 " train/test split by setting n_splits=2 or more,"
-                " got n_splits={0}.".format(n_splits))
+                " got n_splits={}.".format(n_splits))
 
         if not isinstance(shuffle, bool):
             raise TypeError("shuffle must be True or False;"
-                            " got {0}".format(shuffle))
+                            " got {}".format(shuffle))
 
         self.n_splits = n_splits
         self.shuffle = shuffle
@@ -324,7 +324,7 @@ def split(self, X, y=None, groups=None):
                  " than the number of samples: n_samples={1}.")
                 .format(self.n_splits, n_samples))
 
-        for train, test in super(_BaseKFold, self).split(X, y, groups):
+        for train, test in super().split(X, y, groups):
             yield train, test
 
     def get_n_splits(self, X=None, y=None, groups=None):
@@ -422,7 +422,7 @@ def __init__(self, n_splits='warn', shuffle=False,
         if n_splits == 'warn':
             warnings.warn(NSPLIT_WARNING, FutureWarning)
             n_splits = 3
-        super(KFold, self).__init__(n_splits, shuffle, random_state)
+        super().__init__(n_splits, shuffle, random_state)
 
     def _iter_test_indices(self, X, y=None, groups=None):
         n_samples = _num_samples(X)
@@ -494,7 +494,7 @@ def __init__(self, n_splits='warn'):
         if n_splits == 'warn':
             warnings.warn(NSPLIT_WARNING, FutureWarning)
             n_splits = 3
-        super(GroupKFold, self).__init__(n_splits, shuffle=False,
+        super().__init__(n_splits, shuffle=False,
                                          random_state=None)
 
     def _iter_test_indices(self, X, y, groups):
@@ -558,7 +558,7 @@ def split(self, X, y=None, groups=None):
         test : ndarray
             The testing set indices for that split.
         """
-        return super(GroupKFold, self).split(X, y, groups)
+        return super().split(X, y, groups)
 
 
 class StratifiedKFold(_BaseKFold):
@@ -622,7 +622,7 @@ def __init__(self, n_splits='warn', shuffle=False, random_state=None):
         if n_splits == 'warn':
             warnings.warn(NSPLIT_WARNING, FutureWarning)
             n_splits = 3
-        super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state)
+        super().__init__(n_splits, shuffle, random_state)
 
     def _make_test_folds(self, X, y=None):
         rng = self.random_state
@@ -716,7 +716,7 @@ def split(self, X, y, groups=None):
         to an integer.
         """
         y = check_array(y, ensure_2d=False, dtype=None)
-        return super(StratifiedKFold, self).split(X, y, groups)
+        return super().split(X, y, groups)
 
 
 class TimeSeriesSplit(_BaseKFold):
@@ -777,7 +777,7 @@ def __init__(self, n_splits='warn', max_train_size=None):
         if n_splits == 'warn':
             warnings.warn(NSPLIT_WARNING, FutureWarning)
             n_splits = 3
-        super(TimeSeriesSplit, self).__init__(n_splits,
+        super().__init__(n_splits,
                                               shuffle=False,
                                               random_state=None)
         self.max_train_size = max_train_size
@@ -933,7 +933,7 @@ def split(self, X, y=None, groups=None):
         test : ndarray
             The testing set indices for that split.
         """
-        return super(LeaveOneGroupOut, self).split(X, y, groups)
+        return super().split(X, y, groups)
 
 
 class LeavePGroupsOut(BaseCrossValidator):
@@ -1064,7 +1064,7 @@ def split(self, X, y=None, groups=None):
         test : ndarray
             The testing set indices for that split.
         """
-        return super(LeavePGroupsOut, self).split(X, y, groups)
+        return super().split(X, y, groups)
 
 
 class _RepeatedSplits(metaclass=ABCMeta):
@@ -1217,7 +1217,7 @@ class RepeatedKFold(_RepeatedSplits):
     RepeatedStratifiedKFold: Repeats Stratified K-Fold n times.
     """
     def __init__(self, n_splits=5, n_repeats=10, random_state=None):
-        super(RepeatedKFold, self).__init__(
+        super().__init__(
             KFold, n_repeats, random_state, n_splits=n_splits)
 
 
@@ -1270,7 +1270,7 @@ class RepeatedStratifiedKFold(_RepeatedSplits):
     RepeatedKFold: Repeats K-Fold n times.
     """
     def __init__(self, n_splits=5, n_repeats=10, random_state=None):
-        super(RepeatedStratifiedKFold, self).__init__(
+        super().__init__(
             StratifiedKFold, n_repeats, random_state, n_splits=n_splits)
 
 
@@ -1492,7 +1492,7 @@ def __init__(self, n_splits=5, test_size="default", train_size=None,
                               FutureWarning)
             test_size = 0.2
 
-        super(GroupShuffleSplit, self).__init__(
+        super().__init__(
             n_splits=n_splits,
             test_size=test_size,
             train_size=train_size,
@@ -1503,8 +1503,7 @@ def _iter_indices(self, X, y, groups):
             raise ValueError("The 'groups' parameter should not be None.")
         groups = check_array(groups, ensure_2d=False, dtype=None)
         classes, group_indices = np.unique(groups, return_inverse=True)
-        for group_train, group_test in super(
-                GroupShuffleSplit, self)._iter_indices(X=classes):
+        for group_train, group_test in super()._iter_indices(X=classes):
             # these are the indices of classes in the partition
             # invert them into data indices
 
@@ -1543,7 +1542,7 @@ def split(self, X, y=None, groups=None):
         split. You can make the results identical by setting ``random_state``
         to an integer.
         """
-        return super(GroupShuffleSplit, self).split(X, y, groups)
+        return super().split(X, y, groups)
 
 
 def _approximate_mode(class_counts, n_draws, rng):
@@ -1680,7 +1679,7 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
 
     def __init__(self, n_splits=10, test_size="default", train_size=None,
                  random_state=None):
-        super(StratifiedShuffleSplit, self).__init__(
+        super().__init__(
             n_splits, test_size, train_size, random_state)
 
     def _iter_indices(self, X, y, groups=None):
@@ -1778,7 +1777,7 @@ def split(self, X, y, groups=None):
         to an integer.
         """
         y = check_array(y, ensure_2d=False, dtype=None)
-        return super(StratifiedShuffleSplit, self).split(X, y, groups)
+        return super().split(X, y, groups)
 
 
 def _validate_shuffle_split_init(test_size, train_size):
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index e5c1f539914f6..15989494fb581 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -489,8 +489,8 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
 
     # Adjust length of sample weights
     fit_params = fit_params if fit_params is not None else {}
-    fit_params = dict([(k, _index_param_value(X, v, train))
-                      for k, v in fit_params.items()])
+    fit_params = {k: _index_param_value(X, v, train)
+                  for k, v in fit_params.items()}
 
     train_scores = {}
     if parameters is not None:
@@ -827,8 +827,8 @@ def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
     """
     # Adjust length of sample weights
     fit_params = fit_params if fit_params is not None else {}
-    fit_params = dict([(k, _index_param_value(X, v, train))
-                      for k, v in fit_params.items()])
+    fit_params = {k: _index_param_value(X, v, train)
+                  for k, v in fit_params.items()}
 
     X_train, y_train = _safe_split(estimator, X, y, train)
     X_test, _ = _safe_split(estimator, X, y, test, train)
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index aade1d21acc16..08675e99d0bf6 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -73,7 +73,7 @@
 
 # Neither of the following two estimators inherit from BaseEstimator,
 # to test hyperparameter search on user-defined classifiers.
-class MockClassifier(object):
+class MockClassifier:
     """Dummy classifier to test the parameter search algorithms"""
     def __init__(self, foo_param=0):
         self.foo_param = foo_param
@@ -156,10 +156,10 @@ def test_parameter_grid():
     # loop to assert we can iterate over the grid multiple times
     for i in range(2):
         # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2)
-        points = set(tuple(chain(*(sorted(p.items())))) for p in grid2)
+        points = {tuple(chain(*(sorted(p.items())))) for p in grid2}
         assert_equal(points,
-                     set(("bar", x, "foo", y)
-                         for x, y in product(params2["bar"], params2["foo"])))
+                     {("bar", x, "foo", y)
+                         for x, y in product(params2["bar"], params2["foo"])})
     assert_grid_iter_equals_getitem(grid2)
 
     # Special case: empty grid (useful to get default estimator settings)
@@ -1657,7 +1657,7 @@ def fit_grid(param_grid):
 
     class CustomSearchCV(BaseSearchCV):
         def __init__(self, estimator, **kwargs):
-            super(CustomSearchCV, self).__init__(estimator, **kwargs)
+            super().__init__(estimator, **kwargs)
 
         def _run_search(self, evaluate):
             results = evaluate([{'max_depth': 1}, {'max_depth': 2}])
@@ -1688,7 +1688,7 @@ def _run_search(self, evaluate):
 def test__custom_fit_no_run_search():
     class NoRunSearchSearchCV(BaseSearchCV):
         def __init__(self, estimator, **kwargs):
-            super(NoRunSearchSearchCV, self).__init__(estimator, **kwargs)
+            super().__init__(estimator, **kwargs)
 
         def fit(self, X, y=None, groups=None, **fit_params):
             return self
@@ -1698,7 +1698,7 @@ def fit(self, X, y=None, groups=None, **fit_params):
 
     class BadSearchCV(BaseSearchCV):
         def __init__(self, estimator, **kwargs):
-            super(BadSearchCV, self).__init__(estimator, **kwargs)
+            super().__init__(estimator, **kwargs)
 
     with pytest.raises(NotImplementedError,
                        match="_run_search not implemented."):
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 96a8341115134..3ecf77caf67f3 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -71,7 +71,7 @@
 digits = load_digits()
 
 
-class MockClassifier(object):
+class MockClassifier:
     """Dummy classifier to test the cross-validation"""
 
     def __init__(self, a=0, allow_nd=False):
@@ -101,12 +101,12 @@ def fit(self, X, Y=None, sample_weight=None, class_prior=None,
         if sample_weight is not None:
             assert sample_weight.shape[0] == X.shape[0], (
                 'MockClassifier extra fit_param sample_weight.shape[0]'
-                ' is {0}, should be {1}'.format(sample_weight.shape[0],
+                ' is {}, should be {}'.format(sample_weight.shape[0],
                                                 X.shape[0]))
         if class_prior is not None:
             assert class_prior.shape[0] == len(np.unique(y)), (
                         'MockClassifier extra fit_param class_prior.shape[0]'
-                        ' is {0}, should be {1}'.format(class_prior.shape[0],
+                        ' is {}, should be {}'.format(class_prior.shape[0],
                                                         len(np.unique(y))))
         if sparse_sample_weight is not None:
             fmt = ('MockClassifier extra fit_param sparse_sample_weight'
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 6cf9b9a79747c..882a99e6b4ba2 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -115,8 +115,7 @@ def _is_training_data(self, X):
 class MockIncrementalImprovingEstimator(MockImprovingEstimator):
     """Dummy classifier that provides partial_fit"""
     def __init__(self, n_max_train_sizes):
-        super(MockIncrementalImprovingEstimator,
-              self).__init__(n_max_train_sizes)
+        super().__init__(n_max_train_sizes)
         self.x = None
 
     def _is_training_data(self, X):
@@ -161,7 +160,7 @@ def predict(self, X):
         raise NotImplementedError
 
 
-class MockClassifier(object):
+class MockClassifier:
     """Dummy classifier to test the cross-validation"""
 
     def __init__(self, a=0, allow_nd=False):
@@ -191,12 +190,12 @@ def fit(self, X, Y=None, sample_weight=None, class_prior=None,
         if sample_weight is not None:
             assert sample_weight.shape[0] == X.shape[0], (
                 'MockClassifier extra fit_param ' 
-                'sample_weight.shape[0] is {0}, should be {1}'
+                'sample_weight.shape[0] is {}, should be {}'
                 .format(sample_weight.shape[0], X.shape[0]))
         if class_prior is not None:
             assert class_prior.shape[0] == len(np.unique(y)), (
                 'MockClassifier extra fit_param class_prior.shape[0]'
-                ' is {0}, should be {1}'.format(class_prior.shape[0],
+                ' is {}, should be {}'.format(class_prior.shape[0],
                                                 len(np.unique(y))))
         if sparse_sample_weight is not None:
             fmt = ('MockClassifier extra fit_param sparse_sample_weight'
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index fdfe1bed0ca9f..4f4e27a03a1d3 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -545,8 +545,8 @@ def partial_fit(self, X, y, classes=None):
                                       (self.n_classes_ - 1) // 2)]
 
         if len(np.setdiff1d(y, self.classes_)):
-            raise ValueError("Mini-batch contains {0} while it "
-                             "must be subset of {1}".format(np.unique(y),
+            raise ValueError("Mini-batch contains {} while it "
+                             "must be subset of {}".format(np.unique(y),
                                                             self.classes_))
 
         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
@@ -721,7 +721,7 @@ def fit(self, X, y):
         """
         X, y = check_X_y(X, y)
         if self.code_size <= 0:
-            raise ValueError("code_size should be greater than 0, got {0}"
+            raise ValueError("code_size should be greater than 0, got {}"
                              "".format(self.code_size))
 
         _check_estimator(self.estimator)
@@ -742,7 +742,7 @@ def fit(self, X, y):
         else:
             self.code_book_[self.code_book_ != 1] = 0
 
-        classes_index = dict((c, i) for i, c in enumerate(self.classes_))
+        classes_index = {c: i for i, c in enumerate(self.classes_)}
 
         Y = np.array([self.code_book_[classes_index[y[i]]]
                       for i in range(X.shape[0])], dtype=np.int)
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index f70b1ff805ba6..82421022a98f1 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -220,7 +220,7 @@ class MultiOutputRegressor(MultiOutputEstimator, RegressorMixin):
     """
 
     def __init__(self, estimator, n_jobs=None):
-        super(MultiOutputRegressor, self).__init__(estimator, n_jobs)
+        super().__init__(estimator, n_jobs)
 
     @if_delegate_has_method('estimator')
     def partial_fit(self, X, y, sample_weight=None):
@@ -244,7 +244,7 @@ def partial_fit(self, X, y, sample_weight=None):
         -------
         self : object
         """
-        super(MultiOutputRegressor, self).partial_fit(
+        super().partial_fit(
             X, y, sample_weight=sample_weight)
 
     def score(self, X, y, sample_weight=None):
@@ -311,7 +311,7 @@ class MultiOutputClassifier(MultiOutputEstimator, ClassifierMixin):
     """
 
     def __init__(self, estimator, n_jobs=None):
-        super(MultiOutputClassifier, self).__init__(estimator, n_jobs)
+        super().__init__(estimator, n_jobs)
 
     def predict_proba(self, X):
         """Probability estimates.
@@ -360,8 +360,8 @@ def score(self, X, y):
             raise ValueError("y must have at least two dimensions for "
                              "multi target classification but has only one")
         if y.shape[1] != n_outputs_:
-            raise ValueError("The number of outputs of Y for fit {0} and"
-                             " score {1} should be same".
+            raise ValueError("The number of outputs of Y for fit {} and"
+                             " score {} should be same".
                              format(n_outputs_, y.shape[1]))
         y_pred = self.predict(X)
         return np.mean(np.all(y == y_pred, axis=1))
@@ -561,7 +561,7 @@ def fit(self, X, Y):
         -------
         self : object
         """
-        super(ClassifierChain, self).fit(X, Y)
+        super().fit(X, Y)
         self.classes_ = [estimator.classes_
                          for chain_idx, estimator
                          in enumerate(self.estimators_)]
@@ -707,5 +707,5 @@ def fit(self, X, Y):
         -------
         self : object
         """
-        super(RegressorChain, self).fit(X, Y)
+        super().fit(X, Y)
         return self
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index f86ee077f7205..dc187f0c31918 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -291,7 +291,7 @@ def _tree_query_parallel_helper(tree, data, n_neighbors, return_distance):
     return tree.query(data, n_neighbors, return_distance)
 
 
-class KNeighborsMixin(object):
+class KNeighborsMixin:
     """Mixin for k-neighbors searches"""
 
     def _kneighbors_reduce_func(self, dist, start,
@@ -580,7 +580,7 @@ def _tree_query_radius_parallel_helper(tree, data, radius, return_distance):
     return tree.query_radius(data, radius, return_distance)
 
 
-class RadiusNeighborsMixin(object):
+class RadiusNeighborsMixin:
     """Mixin for radius-based neighbors searches"""
 
     def _radius_neighbors_reduce_func(self, dist, start,
@@ -854,7 +854,7 @@ def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity'):
                           shape=(n_samples1, n_samples2))
 
 
-class SupervisedFloatMixin(object):
+class SupervisedFloatMixin:
     def fit(self, X, y):
         """Fit the model using X as training data and y as target values
 
@@ -874,7 +874,7 @@ def fit(self, X, y):
         return self._fit(X)
 
 
-class SupervisedIntegerMixin(object):
+class SupervisedIntegerMixin:
     def fit(self, X, y):
         """Fit the model using X as training data and y as target values
 
@@ -917,7 +917,7 @@ def fit(self, X, y):
         return self._fit(X)
 
 
-class UnsupervisedMixin(object):
+class UnsupervisedMixin:
     def fit(self, X, y=None):
         """Fit the model using X as training data
 
diff --git a/sklearn/neighbors/classification.py b/sklearn/neighbors/classification.py
index e84f9751d0089..f35047f9b2c0b 100644
--- a/sklearn/neighbors/classification.py
+++ b/sklearn/neighbors/classification.py
@@ -122,7 +122,7 @@ def __init__(self, n_neighbors=5,
                  p=2, metric='minkowski', metric_params=None, n_jobs=None,
                  **kwargs):
 
-        super(KNeighborsClassifier, self).__init__(
+        super().__init__(
             n_neighbors=n_neighbors,
             algorithm=algorithm,
             leaf_size=leaf_size, metric=metric, p=p,
@@ -326,7 +326,7 @@ def __init__(self, radius=1.0, weights='uniform',
                  algorithm='auto', leaf_size=30, p=2, metric='minkowski',
                  outlier_label=None, metric_params=None, n_jobs=None,
                  **kwargs):
-        super(RadiusNeighborsClassifier, self).__init__(
+        super().__init__(
               radius=radius,
               algorithm=algorithm,
               leaf_size=leaf_size,
diff --git a/sklearn/neighbors/kde.py b/sklearn/neighbors/kde.py
index be5002e579423..ba5b2564a84f9 100644
--- a/sklearn/neighbors/kde.py
+++ b/sklearn/neighbors/kde.py
@@ -91,7 +91,7 @@ def __init__(self, bandwidth=1.0, algorithm='auto',
         if bandwidth <= 0:
             raise ValueError("bandwidth must be positive")
         if kernel not in VALID_KERNELS:
-            raise ValueError("invalid kernel: '{0}'".format(kernel))
+            raise ValueError("invalid kernel: '{}'".format(kernel))
 
     def _choose_algorithm(self, algorithm, metric):
         # given the algorithm string + metric string, choose the optimal
@@ -103,15 +103,15 @@ def _choose_algorithm(self, algorithm, metric):
             elif metric in BallTree.valid_metrics:
                 return 'ball_tree'
             else:
-                raise ValueError("invalid metric: '{0}'".format(metric))
+                raise ValueError("invalid metric: '{}'".format(metric))
         elif algorithm in TREE_DICT:
             if metric not in TREE_DICT[algorithm].valid_metrics:
-                raise ValueError("invalid metric for {0}: "
-                                 "'{1}'".format(TREE_DICT[algorithm],
+                raise ValueError("invalid metric for {}: "
+                                 "'{}'".format(TREE_DICT[algorithm],
                                                 metric))
             return algorithm
         else:
-            raise ValueError("invalid algorithm: '{0}'".format(algorithm))
+            raise ValueError("invalid algorithm: '{}'".format(algorithm))
 
     def fit(self, X, y=None, sample_weight=None):
         """Fit the Kernel Density model on the data.
@@ -131,8 +131,8 @@ def fit(self, X, y=None, sample_weight=None):
             sample_weight = check_array(sample_weight, order='C', dtype=DTYPE,
                                         ensure_2d=False)
             if sample_weight.ndim != 1:
-                raise ValueError("the shape of sample_weight must be ({0},),"
-                                 " but was {1}".format(X.shape[0],
+                raise ValueError("the shape of sample_weight must be ({},),"
+                                 " but was {}".format(X.shape[0],
                                                        sample_weight.shape))
             check_consistent_length(X, sample_weight)
             if sample_weight.min() <= 0:
diff --git a/sklearn/neighbors/lof.py b/sklearn/neighbors/lof.py
index 99a909bbbcd15..5ad2f7e9b7b1d 100644
--- a/sklearn/neighbors/lof.py
+++ b/sklearn/neighbors/lof.py
@@ -150,7 +150,7 @@ class LocalOutlierFactor(NeighborsBase, KNeighborsMixin, UnsupervisedMixin,
     def __init__(self, n_neighbors=20, algorithm='auto', leaf_size=30,
                  metric='minkowski', p=2, metric_params=None,
                  contamination="legacy", novelty=False, n_jobs=None):
-        super(LocalOutlierFactor, self).__init__(
+        super().__init__(
             n_neighbors=n_neighbors,
             algorithm=algorithm,
             leaf_size=leaf_size, metric=metric, p=p,
@@ -243,7 +243,7 @@ def fit(self, X, y=None):
                 raise ValueError("contamination must be in (0, 0.5], "
                                  "got: %f" % self._contamination)
 
-        super(LocalOutlierFactor, self).fit(X)
+        super().fit(X)
 
         n_samples = self._fit_X.shape[0]
         if self.n_neighbors > n_samples:
diff --git a/sklearn/neighbors/regression.py b/sklearn/neighbors/regression.py
index be4f835ff2a20..f32cd7a4b2e5d 100644
--- a/sklearn/neighbors/regression.py
+++ b/sklearn/neighbors/regression.py
@@ -126,7 +126,7 @@ def __init__(self, n_neighbors=5, weights='uniform',
                  algorithm='auto', leaf_size=30,
                  p=2, metric='minkowski', metric_params=None, n_jobs=None,
                  **kwargs):
-        super(KNeighborsRegressor, self).__init__(
+        super().__init__(
               n_neighbors=n_neighbors,
               algorithm=algorithm,
               leaf_size=leaf_size, metric=metric, p=p,
@@ -276,7 +276,7 @@ def __init__(self, radius=1.0, weights='uniform',
                  algorithm='auto', leaf_size=30,
                  p=2, metric='minkowski', metric_params=None, n_jobs=None,
                  **kwargs):
-        super(RadiusNeighborsRegressor, self).__init__(
+        super().__init__(
               radius=radius,
               algorithm=algorithm,
               leaf_size=leaf_size,
diff --git a/sklearn/neighbors/unsupervised.py b/sklearn/neighbors/unsupervised.py
index 9d41b640f9e17..d69e62e385349 100644
--- a/sklearn/neighbors/unsupervised.py
+++ b/sklearn/neighbors/unsupervised.py
@@ -117,7 +117,7 @@ class NearestNeighbors(NeighborsBase, KNeighborsMixin,
     def __init__(self, n_neighbors=5, radius=1.0,
                  algorithm='auto', leaf_size=30, metric='minkowski',
                  p=2, metric_params=None, n_jobs=None, **kwargs):
-        super(NearestNeighbors, self).__init__(
+        super().__init__(
               n_neighbors=n_neighbors,
               radius=radius,
               algorithm=algorithm,
diff --git a/sklearn/neural_network/_stochastic_optimizers.py b/sklearn/neural_network/_stochastic_optimizers.py
index a741ca7695ee0..8f19c7b488acc 100644
--- a/sklearn/neural_network/_stochastic_optimizers.py
+++ b/sklearn/neural_network/_stochastic_optimizers.py
@@ -7,7 +7,7 @@
 import numpy as np
 
 
-class BaseOptimizer(object):
+class BaseOptimizer:
     """Base (Stochastic) gradient descent optimizer
 
     Parameters
@@ -117,7 +117,7 @@ class SGDOptimizer(BaseOptimizer):
 
     def __init__(self, params, learning_rate_init=0.1, lr_schedule='constant',
                  momentum=0.9, nesterov=True, power_t=0.5):
-        super(SGDOptimizer, self).__init__(params, learning_rate_init)
+        super().__init__(params, learning_rate_init)
 
         self.lr_schedule = lr_schedule
         self.momentum = momentum
@@ -230,7 +230,7 @@ class AdamOptimizer(BaseOptimizer):
 
     def __init__(self, params, learning_rate_init=0.001, beta_1=0.9,
                  beta_2=0.999, epsilon=1e-8):
-        super(AdamOptimizer, self).__init__(params, learning_rate_init)
+        super().__init__(params, learning_rate_init)
 
         self.beta_1 = beta_1
         self.beta_2 = beta_2
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index 2b43352347f51..3dce2e02c0efc 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -894,7 +894,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
                  validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
                  epsilon=1e-8, n_iter_no_change=10):
 
-        sup = super(MLPClassifier, self)
+        sup = super()
         sup.__init__(hidden_layer_sizes=hidden_layer_sizes,
                      activation=activation, solver=solver, alpha=alpha,
                      batch_size=batch_size, learning_rate=learning_rate,
@@ -1013,7 +1013,7 @@ def _partial_fit(self, X, y, classes=None):
             else:
                 self._label_binarizer.fit(classes)
 
-        super(MLPClassifier, self)._partial_fit(X, y)
+        super()._partial_fit(X, y)
 
         return self
 
@@ -1275,7 +1275,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
                  validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
                  epsilon=1e-8, n_iter_no_change=10):
 
-        sup = super(MLPRegressor, self)
+        sup = super()
         sup.__init__(hidden_layer_sizes=hidden_layer_sizes,
                      activation=activation, solver=solver, alpha=alpha,
                      batch_size=batch_size, learning_rate=learning_rate,
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 0e592c253fe40..4eb1d6076c966 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -212,8 +212,8 @@ def _fit(self, X, y=None, **fit_params):
 
         fit_transform_one_cached = memory.cache(_fit_transform_one)
 
-        fit_params_steps = dict((name, {}) for name, step in self.steps
-                                if step is not None)
+        fit_params_steps = {name: {} for name, step in self.steps
+                            if step is not None}
         for pname, pval in fit_params.items():
             step, param = pname.split('__', 1)
             fit_params_steps[step][param] = pval
diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index 3dabd0b3e0cda..4b1eb26e07075 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -81,7 +81,7 @@ def _fit(self, X, handle_unknown='error'):
                 if handle_unknown == 'error':
                     diff = _encode_check_unknown(Xi, cats)
                     if diff:
-                        msg = ("Found unknown categories {0} in column {1}"
+                        msg = ("Found unknown categories {} in column {}"
                                " during fit".format(diff, i))
                         raise ValueError(msg)
             self.categories_.append(cats)
@@ -100,7 +100,7 @@ def _transform(self, X, handle_unknown='error'):
 
             if not np.all(valid_mask):
                 if handle_unknown == 'error':
-                    msg = ("Found unknown categories {0} in column {1}"
+                    msg = ("Found unknown categories {} in column {}"
                            " during transform".format(diff, i))
                     raise ValueError(msg)
                 else:
@@ -412,7 +412,7 @@ def fit(self, X, y=None):
         """
         if self.handle_unknown not in ('error', 'ignore'):
             msg = ("handle_unknown should be either 'error' or 'ignore', "
-                   "got {0}.".format(self.handle_unknown))
+                   "got {}.".format(self.handle_unknown))
             raise ValueError(msg)
 
         self._handle_deprecations(X)
@@ -502,7 +502,7 @@ def fit_transform(self, X, y=None):
         """
         if self.handle_unknown not in ('error', 'ignore'):
             msg = ("handle_unknown should be either 'error' or 'ignore', "
-                   "got {0}.".format(self.handle_unknown))
+                   "got {}.".format(self.handle_unknown))
             raise ValueError(msg)
 
         self._handle_deprecations(X)
diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py
index 380af58cc1d40..10fd752986f02 100644
--- a/sklearn/preprocessing/imputation.py
+++ b/sklearn/preprocessing/imputation.py
@@ -138,13 +138,13 @@ def fit(self, X, y=None):
         # Check parameters
         allowed_strategies = ["mean", "median", "most_frequent"]
         if self.strategy not in allowed_strategies:
-            raise ValueError("Can only use these strategies: {0} "
-                             " got strategy={1}".format(allowed_strategies,
+            raise ValueError("Can only use these strategies: {} "
+                             " got strategy={}".format(allowed_strategies,
                                                         self.strategy))
 
         if self.axis not in [0, 1]:
             raise ValueError("Can only impute missing values on axis 0 and 1, "
-                             " got axis={0}".format(self.axis))
+                             " got axis={}".format(self.axis))
 
         # Since two different arrays can be provided in fit(X) and
         # transform(X), the imputation data will be computed in transform()
diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py
index 38b44d71359d2..0dd979a72703b 100644
--- a/sklearn/preprocessing/label.py
+++ b/sklearn/preprocessing/label.py
@@ -372,13 +372,13 @@ class LabelBinarizer(BaseEstimator, TransformerMixin):
 
     def __init__(self, neg_label=0, pos_label=1, sparse_output=False):
         if neg_label >= pos_label:
-            raise ValueError("neg_label={0} must be strictly less than "
-                             "pos_label={1}.".format(neg_label, pos_label))
+            raise ValueError("neg_label={} must be strictly less than "
+                             "pos_label={}.".format(neg_label, pos_label))
 
         if sparse_output and (pos_label == 0 or neg_label != 0):
             raise ValueError("Sparse binarization is only supported with non "
                              "zero pos_label and zero neg_label, got "
-                             "pos_label={0} and neg_label={1}"
+                             "pos_label={} and neg_label={}"
                              "".format(pos_label, neg_label))
 
         self.neg_label = neg_label
@@ -579,13 +579,13 @@ def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False):
         if _num_samples(y) == 0:
             raise ValueError('y has 0 samples: %r' % y)
     if neg_label >= pos_label:
-        raise ValueError("neg_label={0} must be strictly less than "
-                         "pos_label={1}.".format(neg_label, pos_label))
+        raise ValueError("neg_label={} must be strictly less than "
+                         "pos_label={}.".format(neg_label, pos_label))
 
     if (sparse_output and (pos_label == 0 or neg_label != 0)):
         raise ValueError("Sparse binarization is only supported with non "
                          "zero pos_label and zero neg_label, got "
-                         "pos_label={0} and neg_label={1}"
+                         "pos_label={} and neg_label={}"
                          "".format(pos_label, neg_label))
 
     # To account for pos_label == 0 in the dense case
@@ -617,7 +617,7 @@ def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False):
 
     sorted_class = np.sort(classes)
     if (y_type == "multilabel-indicator" and classes.size != y.shape[1]):
-        raise ValueError("classes {0} missmatch with the labels {1}"
+        raise ValueError("classes {} missmatch with the labels {}"
                          "found in the data".format(classes, unique_labels(y)))
 
     if y_type in ("binary", "multiclass"):
@@ -717,7 +717,7 @@ def _inverse_binarize_thresholding(y, output_type, classes, threshold):
     """Inverse label binarization transformation using thresholding."""
 
     if output_type == "binary" and y.ndim == 2 and y.shape[1] > 2:
-        raise ValueError("output_type='binary', but y.shape = {0}".
+        raise ValueError("output_type='binary', but y.shape = {}".
                          format(y.shape))
 
     if output_type != "binary" and y.shape[1] != len(classes):
@@ -754,7 +754,7 @@ def _inverse_binarize_thresholding(y, output_type, classes, threshold):
         return y
 
     else:
-        raise ValueError("{0} format is not supported".format(output_type))
+        raise ValueError("{} format is not supported".format(output_type))
 
 
 class MultiLabelBinarizer(BaseEstimator, TransformerMixin):
@@ -937,7 +937,7 @@ def _transform(self, y, class_mapping):
             indices.extend(index)
             indptr.append(len(indices))
         if unknown:
-            warnings.warn('unknown class(es) {0} will be ignored'
+            warnings.warn('unknown class(es) {} will be ignored'
                           .format(sorted(unknown, key=str)))
         data = np.ones(len(indices), dtype=int)
 
@@ -961,7 +961,7 @@ def inverse_transform(self, yt):
         check_is_fitted(self, 'classes_')
 
         if yt.shape[1] != len(self.classes_):
-            raise ValueError('Expected indicator for {0} classes, but got {1}'
+            raise ValueError('Expected indicator for {} classes, but got {}'
                              .format(len(self.classes_), yt.shape[1]))
 
         if sp.issparse(yt):
@@ -974,6 +974,6 @@ def inverse_transform(self, yt):
             unexpected = np.setdiff1d(yt, [0, 1])
             if len(unexpected) > 0:
                 raise ValueError('Expected only 0s and 1s in label indicator. '
-                                 'Also got {0}'.format(unexpected))
+                                 'Also got {}'.format(unexpected))
             return [tuple(self.classes_.compress(indicators)) for indicators
                     in yt]
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 1a5ad20d32ef4..5122eec1e5233 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -150,8 +150,8 @@ def test_polynomial_feature_names():
     # test some unicode
     poly = PolynomialFeatures(degree=1, include_bias=True).fit(X)
     feature_names = poly.get_feature_names(
-        [u"\u0001F40D", u"\u262E", u"\u05D0"])
-    assert_array_equal([u"1", u"\u0001F40D", u"\u262E", u"\u05D0"],
+        ["\u0001F40D", "\u262E", "\u05D0"])
+    assert_array_equal(["1", "\u0001F40D", "\u262E", "\u05D0"],
                        feature_names)
 
 
diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
index 792de88aa37de..b2aa7653ef7c4 100644
--- a/sklearn/preprocessing/tests/test_encoders.py
+++ b/sklearn/preprocessing/tests/test_encoders.py
@@ -529,12 +529,12 @@ def test_one_hot_encoder_feature_names():
 
 def test_one_hot_encoder_feature_names_unicode():
     enc = OneHotEncoder()
-    X = np.array([[u'c❤t1', u'dat2']], dtype=object).T
+    X = np.array([['c❤t1', 'dat2']], dtype=object).T
     enc.fit(X)
     feature_names = enc.get_feature_names()
-    assert_array_equal([u'x0_c❤t1', u'x0_dat2'], feature_names)
-    feature_names = enc.get_feature_names(input_features=[u'n👍me'])
-    assert_array_equal([u'n👍me_c❤t1', u'n👍me_dat2'], feature_names)
+    assert_array_equal(['x0_c❤t1', 'x0_dat2'], feature_names)
+    feature_names = enc.get_feature_names(input_features=['n👍me'])
+    assert_array_equal(['n👍me_c❤t1', 'n👍me_dat2'], feature_names)
 
 
 @pytest.mark.parametrize("X", [np.array([[1, np.nan]]).T,
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index bac632fc7df65..8e4eb9e8ecd19 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -479,7 +479,7 @@ class GaussianRandomProjection(BaseRandomProjection):
 
     """
     def __init__(self, n_components='auto', eps=0.1, random_state=None):
-        super(GaussianRandomProjection, self).__init__(
+        super().__init__(
             n_components=n_components,
             eps=eps,
             dense_output=True,
@@ -616,7 +616,7 @@ class SparseRandomProjection(BaseRandomProjection):
     """
     def __init__(self, n_components='auto', density='auto', eps=0.1,
                  dense_output=False, random_state=None):
-        super(SparseRandomProjection, self).__init__(
+        super().__init__(
             n_components=n_components,
             eps=eps,
             dense_output=dense_output,
diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py
index cacf51af242a4..ed705469564ee 100644
--- a/sklearn/semi_supervised/label_propagation.py
+++ b/sklearn/semi_supervised/label_propagation.py
@@ -378,7 +378,7 @@ class LabelPropagation(BaseLabelPropagation):
 
     def __init__(self, kernel='rbf', gamma=20, n_neighbors=7,
                  max_iter=1000, tol=1e-3, n_jobs=None):
-        super(LabelPropagation, self).__init__(
+        super().__init__(
             kernel=kernel, gamma=gamma, n_neighbors=n_neighbors,
             max_iter=max_iter, tol=tol, n_jobs=n_jobs, alpha=None)
 
@@ -399,7 +399,7 @@ class distributions will exceed 1 (normalization may be desired).
         return affinity_matrix
 
     def fit(self, X, y):
-        return super(LabelPropagation, self).fit(X, y)
+        return super().fit(X, y)
 
 
 class LabelSpreading(BaseLabelPropagation):
@@ -494,7 +494,7 @@ def __init__(self, kernel='rbf', gamma=20, n_neighbors=7, alpha=0.2,
                  max_iter=30, tol=1e-3, n_jobs=None):
 
         # this one has different base parameters
-        super(LabelSpreading, self).__init__(kernel=kernel, gamma=gamma,
+        super().__init__(kernel=kernel, gamma=gamma,
                                              n_neighbors=n_neighbors,
                                              alpha=alpha, max_iter=max_iter,
                                              tol=tol,
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index fbda065fb4982..7dd8042e55606 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -505,7 +505,7 @@ def __init__(self, kernel, degree, gamma, coef0, tol, C, nu,
                  shrinking, probability, cache_size, class_weight, verbose,
                  max_iter, decision_function_shape, random_state):
         self.decision_function_shape = decision_function_shape
-        super(BaseSVC, self).__init__(
+        super().__init__(
             kernel=kernel, degree=degree, gamma=gamma,
             coef0=coef0, tol=tol, C=C, nu=nu, epsilon=0., shrinking=shrinking,
             probability=probability, cache_size=cache_size,
@@ -571,7 +571,7 @@ def predict(self, X):
         y_pred : array, shape (n_samples,)
             Class labels for samples in X.
         """
-        y = super(BaseSVC, self).predict(X)
+        y = super().predict(X)
         return self.classes_.take(np.asarray(y, dtype=np.intp))
 
     # Hacky way of getting predict_proba to raise an AttributeError when
diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 81980de2703fe..7c3d91d2bc135 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -608,7 +608,7 @@ def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='auto_deprecated',
                  verbose=False, max_iter=-1, decision_function_shape='ovr',
                  random_state=None):
 
-        super(SVC, self).__init__(
+        super().__init__(
             kernel=kernel, degree=degree, gamma=gamma,
             coef0=coef0, tol=tol, C=C, nu=0., shrinking=shrinking,
             probability=probability, cache_size=cache_size,
@@ -775,7 +775,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='auto_deprecated',
                  cache_size=200, class_weight=None, verbose=False, max_iter=-1,
                  decision_function_shape='ovr', random_state=None):
 
-        super(NuSVC, self).__init__(
+        super().__init__(
             kernel=kernel, degree=degree, gamma=gamma,
             coef0=coef0, tol=tol, C=0., nu=nu, shrinking=shrinking,
             probability=probability, cache_size=cache_size,
@@ -903,7 +903,7 @@ def __init__(self, kernel='rbf', degree=3, gamma='auto_deprecated',
                  coef0=0.0, tol=1e-3, C=1.0, epsilon=0.1, shrinking=True,
                  cache_size=200, verbose=False, max_iter=-1):
 
-        super(SVR, self).__init__(
+        super().__init__(
             kernel=kernel, degree=degree, gamma=gamma,
             coef0=coef0, tol=tol, C=C, nu=0., epsilon=epsilon, verbose=verbose,
             shrinking=shrinking, probability=False, cache_size=cache_size,
@@ -1030,7 +1030,7 @@ def __init__(self, nu=0.5, C=1.0, kernel='rbf', degree=3,
                  gamma='auto_deprecated', coef0=0.0, shrinking=True,
                  tol=1e-3, cache_size=200, verbose=False, max_iter=-1):
 
-        super(NuSVR, self).__init__(
+        super().__init__(
             kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,
             tol=tol, C=C, nu=nu, epsilon=0., shrinking=shrinking,
             probability=False, cache_size=cache_size, class_weight=None,
@@ -1138,7 +1138,7 @@ def __init__(self, kernel='rbf', degree=3, gamma='auto_deprecated',
                  coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
                  verbose=False, max_iter=-1, random_state=None):
 
-        super(OneClassSVM, self).__init__(
+        super().__init__(
             kernel, degree, gamma, coef0, tol, 0., nu, 0.,
             shrinking, False, cache_size, None, verbose, max_iter,
             random_state)
@@ -1174,7 +1174,7 @@ def fit(self, X, y=None, sample_weight=None, **params):
             warnings.warn("The random_state parameter is deprecated and will"
                           " be removed in version 0.22.", DeprecationWarning)
 
-        super(OneClassSVM, self).fit(X, np.ones(_num_samples(X)),
+        super().fit(X, np.ones(_num_samples(X)),
                                      sample_weight=sample_weight, **params)
         self.offset_ = -self._intercept_
         return self
@@ -1227,5 +1227,5 @@ def predict(self, X):
         y_pred : array, shape (n_samples,)
             Class labels for samples in X.
         """
-        y = super(OneClassSVM, self).predict(X)
+        y = super().predict(X)
         return np.asarray(y, dtype=np.intp)
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 0db8fdff471b5..ec1c1356c3367 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -65,7 +65,7 @@ def __init__(self, a=None):
         self.a = 1
 
 
-class NoEstimator(object):
+class NoEstimator:
     def __init__(self):
         pass
 
@@ -233,7 +233,7 @@ def test_set_params_passes_all_parameters():
 
     class TestDecisionTree(DecisionTreeClassifier):
         def set_params(self, **kwargs):
-            super(TestDecisionTree, self).set_params(**kwargs)
+            super().set_params(**kwargs)
             # expected_kwargs is in test scope
             assert kwargs == expected_kwargs
             return self
@@ -381,7 +381,7 @@ def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():
         TreeNoVersion.__module__ = module_backup
 
 
-class DontPickleAttributeMixin(object):
+class DontPickleAttributeMixin:
     def __getstate__(self):
         data = self.__dict__.copy()
         data["_attribute_not_pickled"] = None
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 267d3bb06aefc..a8d25a05745d3 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -172,7 +172,7 @@ def test_import_all_consistency():
         for name in getattr(package, '__all__', ()):
             if getattr(package, name, None) is None:
                 raise AttributeError(
-                    "Module '{0}' has no attribute '{1}'".format(
+                    "Module '{}' has no attribute '{}'".format(
                         modname, name))
 
 
@@ -194,15 +194,15 @@ def test_all_tests_are_importable():
                                       \.tests(\.|$)|
                                       \._
                                       ''')
-    lookup = dict((name, ispkg)
+    lookup = {name: ispkg
                   for _, name, ispkg
                   in pkgutil.walk_packages(sklearn.__path__,
-                                           prefix='sklearn.'))
+                                           prefix='sklearn.')}
     missing_tests = [name for name, ispkg in lookup.items()
                      if ispkg
                      and not HAS_TESTS_EXCEPTIONS.search(name)
                      and name + '.tests' not in lookup]
     assert_equal(missing_tests, [],
-                 '{0} do not have `tests` subpackages. Perhaps they require '
+                 '{} do not have `tests` subpackages. Perhaps they require '
                  '__init__.py or an add_subpackage directive in the parent '
                  'setup.py'.format(missing_tests))
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 3bd417d0229a0..6983176bf8a34 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -20,9 +20,9 @@
 
 import pytest
 
-PUBLIC_MODULES = set([pckg[1] for pckg in walk_packages(prefix='sklearn.',
-                                                        path=sklearn.__path__)
-                      if not ("._" in pckg[1] or ".tests." in pckg[1])])
+PUBLIC_MODULES = {pckg[1] for pckg in walk_packages(prefix='sklearn.',
+                                                    path=sklearn.__path__)
+                  if not ("._" in pckg[1] or ".tests." in pckg[1])}
 
 
 # functions to ignore args / docstring of
diff --git a/sklearn/tests/test_impute.py b/sklearn/tests/test_impute.py
index 7131ac3ed0f5f..9521e877554c9 100644
--- a/sklearn/tests/test_impute.py
+++ b/sklearn/tests/test_impute.py
@@ -298,7 +298,7 @@ def test_imputation_most_frequent_pandas(dtype):
     # Test imputation using the most frequent strategy on pandas df
     pd = pytest.importorskip("pandas")
 
-    f = io.StringIO(u"Cat1,Cat2,Cat3,Cat4\n"
+    f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
                     ",i,x,\n"
                     "a,,y,\n"
                     "a,j,,\n"
@@ -411,7 +411,7 @@ def test_imputation_constant_pandas(dtype):
     # Test imputation using the constant strategy on pandas df
     pd = pytest.importorskip("pandas")
 
-    f = io.StringIO(u"Cat1,Cat2,Cat3,Cat4\n"
+    f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
                     ",i,x,\n"
                     "a,,y,\n"
                     "a,j,,\n"
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index 47de7ae374b74..c430912876f6e 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -16,7 +16,7 @@
 from sklearn.exceptions import NotFittedError
 
 
-class DelegatorData(object):
+class DelegatorData:
     def __init__(self, name, construct, skip_methods=(),
                  fit_args=make_classification()):
         self.name = name
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 6a0265600f979..bdb57cf7f3a82 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -548,8 +548,8 @@ def test_ovo_partial_fit_predict():
     # raises error when mini-batch does not have classes from all_classes
     ovo = OneVsOneClassifier(MultinomialNB())
     error_y = [0, 1, 2, 3, 4, 5, 2]
-    message_re = escape("Mini-batch contains {0} while "
-                        "it must be subset of {1}".format(np.unique(error_y),
+    message_re = escape("Mini-batch contains {} while "
+                        "it must be subset of {}".format(np.unique(error_y),
                                                           np.unique(y)))
     assert_raises_regexp(ValueError, message_re, ovo.partial_fit, X[:7],
                          error_y, np.unique(y))
@@ -595,7 +595,7 @@ def test_ovo_decision_function():
         # binary classifiers.
         # Therefore, sorting predictions based on votes would yield
         # mostly tied predictions:
-        assert set(votes[:, class_idx]).issubset(set([0., 1., 2.]))
+        assert set(votes[:, class_idx]).issubset({0., 1., 2.})
 
         # The OVO decision function on the other hand is able to resolve
         # most of the ties on this data as it combines both the vote counts
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 55202ae63afe2..259876acd1a42 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -45,7 +45,7 @@
 )
 
 
-class NoFit(object):
+class NoFit:
     """Small class to test parameter dispatching.
     """
 
@@ -947,12 +947,12 @@ def test_pipeline_wrong_memory():
                         " Got memory='1' instead.", cached_pipe.fit, X, y)
 
 
-class DummyMemory(object):
+class DummyMemory:
     def cache(self, func):
         return func
 
 
-class WrongDummyMemory(object):
+class WrongDummyMemory:
     pass
 
 
diff --git a/sklearn/tree/_reingold_tilford.py b/sklearn/tree/_reingold_tilford.py
index d83969badb623..fae84bbb85f64 100644
--- a/sklearn/tree/_reingold_tilford.py
+++ b/sklearn/tree/_reingold_tilford.py
@@ -19,7 +19,7 @@
 import numpy as np
 
 
-class DrawTree(object):
+class DrawTree:
     def __init__(self, tree, parent=None, depth=0, number=1):
         self.x = -1.
         self.y = depth
@@ -193,7 +193,7 @@ def second_walk(v, m=0, depth=0, min=None):
     return min
 
 
-class Tree(object):
+class Tree:
     def __init__(self, label="", node_id=-1, *children):
         self.label = label
         self.node_id = node_id
diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index 017275cfb1c19..0a0fd4cdb0b48 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -66,7 +66,7 @@ def _color_brew(n):
     return color_list
 
 
-class Sentinel(object):
+class Sentinel:
     def __repr__(self):
         return '"tree.dot"'
 
@@ -176,7 +176,7 @@ def plot_tree(decision_tree, max_depth=None, feature_names=None,
     return exporter.export(decision_tree, ax=ax)
 
 
-class _BaseTreeExporter(object):
+class _BaseTreeExporter:
     def __init__(self, max_depth=None, feature_names=None,
                  class_names=None, label='all', filled=False,
                  impurity=True, node_ids=False,
@@ -355,7 +355,7 @@ def __init__(self, out_file=SENTINEL, max_depth=None,
                  node_ids=False, proportion=False, rotate=False, rounded=False,
                  special_characters=False, precision=3):
 
-        super(_DOTTreeExporter, self).__init__(
+        super().__init__(
             max_depth=max_depth, feature_names=feature_names,
             class_names=class_names, label=label, filled=filled,
             impurity=impurity,
@@ -513,7 +513,7 @@ def __init__(self, max_depth=None, feature_names=None,
                  proportion=False, rotate=False, rounded=False,
                  precision=3, fontsize=None):
 
-        super(_MPLTreeExporter, self).__init__(
+        super().__init__(
             max_depth=max_depth, feature_names=feature_names,
             class_names=class_names, label=label, filled=filled,
             impurity=impurity, node_ids=node_ids, proportion=proportion,
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 7001b7741c4e6..f4f844d168e88 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -164,7 +164,7 @@
 
 def assert_tree_equal(d, s, message):
     assert_equal(s.node_count, d.node_count,
-                 "{0}: inequal number of node ({1} != {2})"
+                 "{}: inequal number of node ({} != {})"
                  "".format(message, s.node_count, d.node_count))
 
     assert_array_equal(d.children_right, s.children_right,
@@ -197,12 +197,12 @@ def test_classification_toy():
         clf = Tree(random_state=0)
         clf.fit(X, y)
         assert_array_equal(clf.predict(T), true_result,
-                           "Failed with {0}".format(name))
+                           "Failed with {}".format(name))
 
         clf = Tree(max_features=1, random_state=1)
         clf.fit(X, y)
         assert_array_equal(clf.predict(T), true_result,
-                           "Failed with {0}".format(name))
+                           "Failed with {}".format(name))
 
 
 def test_weighted_classification_toy():
@@ -212,11 +212,11 @@ def test_weighted_classification_toy():
 
         clf.fit(X, y, sample_weight=np.ones(len(X)))
         assert_array_equal(clf.predict(T), true_result,
-                           "Failed with {0}".format(name))
+                           "Failed with {}".format(name))
 
         clf.fit(X, y, sample_weight=np.full(len(X), 0.5))
         assert_array_equal(clf.predict(T), true_result,
-                           "Failed with {0}".format(name))
+                           "Failed with {}".format(name))
 
 
 def test_regression_toy():
@@ -225,12 +225,12 @@ def test_regression_toy():
         reg = Tree(random_state=1)
         reg.fit(X, y)
         assert_almost_equal(reg.predict(T), true_result,
-                            err_msg="Failed with {0}".format(name))
+                            err_msg="Failed with {}".format(name))
 
         clf = Tree(max_features=1, random_state=1)
         clf.fit(X, y)
         assert_almost_equal(reg.predict(T), true_result,
-                            err_msg="Failed with {0}".format(name))
+                            err_msg="Failed with {}".format(name))
 
 
 def test_xor():
@@ -248,12 +248,12 @@ def test_xor():
         clf = Tree(random_state=0)
         clf.fit(X, y)
         assert_equal(clf.score(X, y), 1.0,
-                     "Failed with {0}".format(name))
+                     "Failed with {}".format(name))
 
         clf = Tree(random_state=0, max_features=1)
         clf.fit(X, y)
         assert_equal(clf.score(X, y), 1.0,
-                     "Failed with {0}".format(name))
+                     "Failed with {}".format(name))
 
 
 def test_iris():
@@ -263,14 +263,14 @@ def test_iris():
         clf.fit(iris.data, iris.target)
         score = accuracy_score(clf.predict(iris.data), iris.target)
         assert_greater(score, 0.9,
-                       "Failed with {0}, criterion = {1} and score = {2}"
+                       "Failed with {}, criterion = {} and score = {}"
                        "".format(name, criterion, score))
 
         clf = Tree(criterion=criterion, max_features=2, random_state=0)
         clf.fit(iris.data, iris.target)
         score = accuracy_score(clf.predict(iris.data), iris.target)
         assert_greater(score, 0.5,
-                       "Failed with {0}, criterion = {1} and score = {2}"
+                       "Failed with {}, criterion = {} and score = {}"
                        "".format(name, criterion, score))
 
 
@@ -282,7 +282,7 @@ def test_boston():
         reg.fit(boston.data, boston.target)
         score = mean_squared_error(boston.target, reg.predict(boston.data))
         assert_less(score, 1,
-                    "Failed with {0}, criterion = {1} and score = {2}"
+                    "Failed with {}, criterion = {} and score = {}"
                     "".format(name, criterion, score))
 
         # using fewer features reduces the learning ability of this tree,
@@ -291,7 +291,7 @@ def test_boston():
         reg.fit(boston.data, boston.target)
         score = mean_squared_error(boston.target, reg.predict(boston.data))
         assert_less(score, 2,
-                    "Failed with {0}, criterion = {1} and score = {2}"
+                    "Failed with {}, criterion = {} and score = {}"
                     "".format(name, criterion, score))
 
 
@@ -305,13 +305,13 @@ def test_probability():
         prob_predict = clf.predict_proba(iris.data)
         assert_array_almost_equal(np.sum(prob_predict, 1),
                                   np.ones(iris.data.shape[0]),
-                                  err_msg="Failed with {0}".format(name))
+                                  err_msg="Failed with {}".format(name))
         assert_array_equal(np.argmax(prob_predict, 1),
                            clf.predict(iris.data),
-                           err_msg="Failed with {0}".format(name))
+                           err_msg="Failed with {}".format(name))
         assert_almost_equal(clf.predict_proba(iris.data),
                             np.exp(clf.predict_log_proba(iris.data)), 8,
-                            err_msg="Failed with {0}".format(name))
+                            err_msg="Failed with {}".format(name))
 
 
 def test_arrayrepr():
@@ -334,13 +334,13 @@ def test_pure_set():
         clf = TreeClassifier(random_state=0)
         clf.fit(X, y)
         assert_array_equal(clf.predict(X), y,
-                           err_msg="Failed with {0}".format(name))
+                           err_msg="Failed with {}".format(name))
 
     for name, TreeRegressor in REG_TREES.items():
         reg = TreeRegressor(random_state=0)
         reg.fit(X, y)
         assert_almost_equal(reg.predict(X), y,
-                            err_msg="Failed with {0}".format(name))
+                            err_msg="Failed with {}".format(name))
 
 
 def test_numerical_stability():
@@ -383,8 +383,8 @@ def test_importances():
         importances = clf.feature_importances_
         n_important = np.sum(importances > 0.1)
 
-        assert_equal(importances.shape[0], 10, "Failed with {0}".format(name))
-        assert_equal(n_important, 3, "Failed with {0}".format(name))
+        assert_equal(importances.shape[0], 10, "Failed with {}".format(name))
+        assert_equal(n_important, 3, "Failed with {}".format(name))
 
     # Check on iris that importances are the same for all builders
     clf = DecisionTreeClassifier(random_state=0)
@@ -590,7 +590,7 @@ def test_min_samples_split():
         node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1]
 
         assert_greater(np.min(node_samples), 9,
-                       "Failed with {0}".format(name))
+                       "Failed with {}".format(name))
 
         # test for float parameter
         est = TreeEstimator(min_samples_split=0.2,
@@ -601,7 +601,7 @@ def test_min_samples_split():
         node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1]
 
         assert_greater(np.min(node_samples), 9,
-                       "Failed with {0}".format(name))
+                       "Failed with {}".format(name))
 
 
 def test_min_samples_leaf():
@@ -624,7 +624,7 @@ def test_min_samples_leaf():
         # drop inner nodes
         leaf_count = node_counts[node_counts != 0]
         assert_greater(np.min(leaf_count), 4,
-                       "Failed with {0}".format(name))
+                       "Failed with {}".format(name))
 
         # test float parameter
         est = TreeEstimator(min_samples_leaf=0.1,
@@ -636,7 +636,7 @@ def test_min_samples_leaf():
         # drop inner nodes
         leaf_count = node_counts[node_counts != 0]
         assert_greater(np.min(leaf_count), 4,
-                       "Failed with {0}".format(name))
+                       "Failed with {}".format(name))
 
 
 def check_min_weight_fraction_leaf(name, datasets, sparse=False):
@@ -673,8 +673,8 @@ def check_min_weight_fraction_leaf(name, datasets, sparse=False):
         assert_greater_equal(
             np.min(leaf_weights),
             total_weight * est.min_weight_fraction_leaf,
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}".format(
+            "Failed with {} "
+            "min_weight_fraction_leaf={}".format(
                 name, est.min_weight_fraction_leaf))
 
     # test case with no weights passed in
@@ -697,8 +697,8 @@ def check_min_weight_fraction_leaf(name, datasets, sparse=False):
         assert_greater_equal(
             np.min(leaf_weights),
             total_weight * est.min_weight_fraction_leaf,
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}".format(
+            "Failed with {} "
+            "min_weight_fraction_leaf={}".format(
                 name, est.min_weight_fraction_leaf))
 
 
@@ -744,9 +744,9 @@ def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets,
             np.min(leaf_weights),
             max((total_weight *
                  est.min_weight_fraction_leaf), 5),
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}, "
-            "min_samples_leaf={2}".format(name,
+            "Failed with {} "
+            "min_weight_fraction_leaf={}, "
+            "min_samples_leaf={}".format(name,
                                           est.min_weight_fraction_leaf,
                                           est.min_samples_leaf))
     for max_leaf_nodes, frac in product((None, 1000), np.linspace(0, 0.5, 3)):
@@ -769,9 +769,9 @@ def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets,
             np.min(leaf_weights),
             max((total_weight * est.min_weight_fraction_leaf),
                 (total_weight * est.min_samples_leaf)),
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}, "
-            "min_samples_leaf={2}".format(name,
+            "Failed with {} "
+            "min_weight_fraction_leaf={}, "
+            "min_samples_leaf={}".format(name,
                                           est.min_weight_fraction_leaf,
                                           est.min_samples_leaf))
 
@@ -805,7 +805,7 @@ def test_min_impurity_split():
         est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
                             random_state=0)
         assert est.min_impurity_split is None, (
-            "Failed, min_impurity_split = {0} > 1e-7".format(
+            "Failed, min_impurity_split = {} > 1e-7".format(
                 est.min_impurity_split))
         try:
             assert_warns(DeprecationWarning, est.fit, X, y)
@@ -815,8 +815,8 @@ def test_min_impurity_split():
             if (est.tree_.children_left[node] == TREE_LEAF or
                     est.tree_.children_right[node] == TREE_LEAF):
                 assert_equal(est.tree_.impurity[node], 0.,
-                             "Failed with {0} "
-                             "min_impurity_split={1}".format(
+                             "Failed with {} "
+                             "min_impurity_split={}".format(
                                  est.tree_.impurity[node],
                                  est.min_impurity_split))
 
@@ -832,13 +832,13 @@ def test_min_impurity_split():
             if (est.tree_.children_left[node] == TREE_LEAF or
                     est.tree_.children_right[node] == TREE_LEAF):
                 assert_greater_equal(est.tree_.impurity[node], 0,
-                                     "Failed with {0}, "
-                                     "min_impurity_split={1}".format(
+                                     "Failed with {}, "
+                                     "min_impurity_split={}".format(
                                          est.tree_.impurity[node],
                                          est.min_impurity_split))
                 assert_less_equal(est.tree_.impurity[node], min_impurity_split,
-                                  "Failed with {0}, "
-                                  "min_impurity_split={1}".format(
+                                  "Failed with {}, "
+                                  "min_impurity_split={}".format(
                                       est.tree_.impurity[node],
                                       est.min_impurity_split))
 
@@ -868,7 +868,7 @@ def test_min_impurity_decrease():
         for est, expected_decrease in ((est1, 1e-7), (est2, 0.05),
                                        (est3, 0.0001), (est4, 0.1)):
             assert_less_equal(est.min_impurity_decrease, expected_decrease,
-                              "Failed, min_impurity_decrease = {0} > {1}"
+                              "Failed, min_impurity_decrease = {} > {}"
                               .format(est.min_impurity_decrease,
                                       expected_decrease))
             est.fit(X, y)
@@ -899,8 +899,8 @@ def test_min_impurity_decrease():
                         imp_parent - wtd_avg_left_right_imp)
 
                     assert_greater_equal(actual_decrease, expected_decrease,
-                                         "Failed with {0} "
-                                         "expected min_impurity_decrease={1}"
+                                         "Failed with {} "
+                                         "expected min_impurity_decrease={}"
                                          .format(actual_decrease,
                                                  expected_decrease))
 
@@ -923,13 +923,13 @@ def test_min_impurity_decrease():
         score2 = est2.score(X, y)
         assert_equal(score, score2,
                      "Failed to generate same score  after pickling "
-                     "with {0}".format(name))
+                     "with {}".format(name))
 
         for attribute in fitted_attribute:
             assert_equal(getattr(est2.tree_, attribute),
                          fitted_attribute[attribute],
-                         "Failed to generate same attribute {0} after "
-                         "pickling with {1}".format(attribute, name))
+                         "Failed to generate same attribute {} after "
+                         "pickling with {}".format(attribute, name))
 
 
 def test_multioutput():
@@ -1341,7 +1341,7 @@ def check_sparse_input(tree, dataset, max_depth=None):
         s = TreeEstimator(random_state=0, max_depth=max_depth).fit(X_sparse, y)
 
         assert_tree_equal(d.tree_, s.tree_,
-                          "{0} with dense and sparse format gave different "
+                          "{} with dense and sparse format gave different "
                           "trees".format(tree))
 
         y_pred = d.predict(X)
@@ -1393,7 +1393,7 @@ def check_sparse_parameters(tree, dataset):
     s = TreeEstimator(random_state=0, max_features=1,
                       max_depth=2).fit(X_sparse, y)
     assert_tree_equal(d.tree_, s.tree_,
-                      "{0} with dense and sparse format gave different "
+                      "{} with dense and sparse format gave different "
                       "trees".format(tree))
     assert_array_almost_equal(s.predict(X), d.predict(X))
 
@@ -1403,7 +1403,7 @@ def check_sparse_parameters(tree, dataset):
     s = TreeEstimator(random_state=0, max_features=1,
                       min_samples_split=10).fit(X_sparse, y)
     assert_tree_equal(d.tree_, s.tree_,
-                      "{0} with dense and sparse format gave different "
+                      "{} with dense and sparse format gave different "
                       "trees".format(tree))
     assert_array_almost_equal(s.predict(X), d.predict(X))
 
@@ -1413,7 +1413,7 @@ def check_sparse_parameters(tree, dataset):
     s = TreeEstimator(random_state=0,
                       min_samples_leaf=X_sparse.shape[0] // 2).fit(X_sparse, y)
     assert_tree_equal(d.tree_, s.tree_,
-                      "{0} with dense and sparse format gave different "
+                      "{} with dense and sparse format gave different "
                       "trees".format(tree))
     assert_array_almost_equal(s.predict(X), d.predict(X))
 
@@ -1421,7 +1421,7 @@ def check_sparse_parameters(tree, dataset):
     d = TreeEstimator(random_state=0, max_leaf_nodes=3).fit(X, y)
     s = TreeEstimator(random_state=0, max_leaf_nodes=3).fit(X_sparse, y)
     assert_tree_equal(d.tree_, s.tree_,
-                      "{0} with dense and sparse format gave different "
+                      "{} with dense and sparse format gave different "
                       "trees".format(tree))
     assert_array_almost_equal(s.predict(X), d.predict(X))
 
@@ -1441,7 +1441,7 @@ def check_sparse_criterion(tree, dataset):
                           criterion=criterion).fit(X_sparse, y)
 
         assert_tree_equal(d.tree_, s.tree_,
-                          "{0} with dense and sparse format gave different "
+                          "{} with dense and sparse format gave different "
                           "trees".format(tree))
         assert_array_almost_equal(s.predict(X), d.predict(X))
 
@@ -1501,7 +1501,7 @@ def check_explicit_sparse_zeros(tree, max_depth=3,
     s = TreeEstimator(random_state=0, max_depth=max_depth).fit(X_sparse, y)
 
     assert_tree_equal(d.tree_, s.tree_,
-                      "{0} with dense and sparse format gave different "
+                      "{} with dense and sparse format gave different "
                       "trees".format(tree))
 
     Xs = (X_test, X_sparse_test)
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index f8982590f7671..083392c43ba00 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -757,7 +757,7 @@ def __init__(self,
                  min_impurity_split=None,
                  class_weight=None,
                  presort=False):
-        super(DecisionTreeClassifier, self).__init__(
+        super().__init__(
             criterion=criterion,
             splitter=splitter,
             max_depth=max_depth,
@@ -808,7 +808,7 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         self : object
         """
 
-        super(DecisionTreeClassifier, self).fit(
+        super().fit(
             X, y,
             sample_weight=sample_weight,
             check_input=check_input,
@@ -1100,7 +1100,7 @@ def __init__(self,
                  min_impurity_decrease=0.,
                  min_impurity_split=None,
                  presort=False):
-        super(DecisionTreeRegressor, self).__init__(
+        super().__init__(
             criterion=criterion,
             splitter=splitter,
             max_depth=max_depth,
@@ -1149,7 +1149,7 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         self : object
         """
 
-        super(DecisionTreeRegressor, self).fit(
+        super().fit(
             X, y,
             sample_weight=sample_weight,
             check_input=check_input,
@@ -1326,7 +1326,7 @@ def __init__(self,
                  min_impurity_decrease=0.,
                  min_impurity_split=None,
                  class_weight=None):
-        super(ExtraTreeClassifier, self).__init__(
+        super().__init__(
             criterion=criterion,
             splitter=splitter,
             max_depth=max_depth,
@@ -1494,7 +1494,7 @@ def __init__(self,
                  min_impurity_decrease=0.,
                  min_impurity_split=None,
                  max_leaf_nodes=None):
-        super(ExtraTreeRegressor, self).__init__(
+        super().__init__(
             criterion=criterion,
             splitter=splitter,
             max_depth=max_depth,
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index c3eac1b5d4479..6d9675acbc2b8 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -90,7 +90,7 @@ class Bunch(dict):
     """
 
     def __init__(self, **kwargs):
-        super(Bunch, self).__init__(kwargs)
+        super().__init__(kwargs)
 
     def __setattr__(self, key, value):
         self[key] = value
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index d6388a5c5f4b9..91edde0230798 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -5,7 +5,7 @@
 __all__ = ["deprecated"]
 
 
-class deprecated(object):
+class deprecated:
     """Decorator to mark a function or class as deprecated.
 
     Issue a warning when the function is called/the class is instantiated and
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 02050071a0d0b..a4229d1bb4115 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -161,11 +161,11 @@ def check_supervised_y_no_nan(name, estimator_orig):
         estimator.fit(X, y)
     except ValueError as e:
         if str(e) != errmsg:
-            raise ValueError("Estimator {0} raised error as expected, but "
+            raise ValueError("Estimator {} raised error as expected, but "
                              "does not match expected error message"
                              .format(name))
     else:
-        raise ValueError("Estimator {0} should have raised error on fitting "
+        raise ValueError("Estimator {} should have raised error on fitting "
                          "array y with NaN value.".format(name))
 
 
@@ -386,7 +386,7 @@ def set_checking_parameters(estimator):
         estimator.set_params(k=1)
 
 
-class NotAnArray(object):
+class NotAnArray:
     """An object that is convertible to an array
 
     Parameters
@@ -544,7 +544,7 @@ def check_sample_weights_pandas_series(name, estimator_orig):
             try:
                 estimator.fit(X, y, sample_weight=weights)
             except ValueError:
-                raise ValueError("Estimator {0} raises error if "
+                raise ValueError("Estimator {} raises error if "
                                  "'sample_weight' parameter is of "
                                  "type pandas.Series".format(name))
         except ImportError:
@@ -1987,7 +1987,7 @@ def check_no_attributes_set_in_init(name, estimator):
             % (name, sorted(invalid_attr)))
     # Ensure that each parameter is set in init
     invalid_attr = (set(init_params) - set(vars(estimator))
-                    - set(["self"]))
+                    - {"self"})
     assert not invalid_attr, (
             "Estimator %s should store all parameters"
             " as an attribute during init. Did not find "
diff --git a/sklearn/utils/linear_assignment_.py b/sklearn/utils/linear_assignment_.py
index 7c3570a9744ab..846e8b6b869d9 100644
--- a/sklearn/utils/linear_assignment_.py
+++ b/sklearn/utils/linear_assignment_.py
@@ -60,7 +60,7 @@ def linear_assignment(X):
     return indices
 
 
-class _HungarianState(object):
+class _HungarianState:
     """State of one execution of the Hungarian algorithm.
 
     Parameters
diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py
index e2d6ca58b5e05..9f8ea059b9520 100644
--- a/sklearn/utils/metaestimators.py
+++ b/sklearn/utils/metaestimators.py
@@ -22,7 +22,7 @@ def __init__(self):
         pass
 
     def _get_params(self, attr, deep=True):
-        out = super(_BaseComposition, self).get_params(deep=deep)
+        out = super().get_params(deep=deep)
         if not deep:
             return out
         estimators = getattr(self, attr)
@@ -47,7 +47,7 @@ def _set_params(self, attr, **params):
             if '__' not in name and name in names:
                 self._replace_estimator(attr, name, params.pop(name))
         # 3. Step parameters and other initialisation arguments
-        super(_BaseComposition, self).set_params(**params)
+        super().set_params(**params)
         return self
 
     def _replace_estimator(self, attr, name, new_val):
@@ -62,18 +62,18 @@ def _replace_estimator(self, attr, name, new_val):
     def _validate_names(self, names):
         if len(set(names)) != len(names):
             raise ValueError('Names provided are not unique: '
-                             '{0!r}'.format(list(names)))
+                             '{!r}'.format(list(names)))
         invalid_names = set(names).intersection(self.get_params(deep=False))
         if invalid_names:
             raise ValueError('Estimator names conflict with constructor '
-                             'arguments: {0!r}'.format(sorted(invalid_names)))
+                             'arguments: {!r}'.format(sorted(invalid_names)))
         invalid_names = [name for name in names if '__' in name]
         if invalid_names:
             raise ValueError('Estimator names must not contain __: got '
-                             '{0!r}'.format(invalid_names))
+                             '{!r}'.format(invalid_names))
 
 
-class _IffHasAttrDescriptor(object):
+class _IffHasAttrDescriptor:
     """Implements a conditional property using the descriptor protocol.
 
     Using this class to create a decorator will raise an ``AttributeError``
diff --git a/sklearn/utils/mocking.py b/sklearn/utils/mocking.py
index 762264c4d1b5c..53c7960786d23 100644
--- a/sklearn/utils/mocking.py
+++ b/sklearn/utils/mocking.py
@@ -4,7 +4,7 @@
 from .validation import _num_samples, check_array
 
 
-class ArraySlicingWrapper(object):
+class ArraySlicingWrapper:
     """
     Parameters
     ----------
@@ -17,7 +17,7 @@ def __getitem__(self, aslice):
         return MockDataFrame(self.array[aslice])
 
 
-class MockDataFrame(object):
+class MockDataFrame:
     """
     Parameters
     ----------
diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index f383f7a821394..445d0fbe7fc5f 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -72,7 +72,7 @@ def unique_labels(*ys):
         raise ValueError('No argument has been passed.')
     # Check that we don't mix label format
 
-    ys_types = set(type_of_target(x) for x in ys)
+    ys_types = {type_of_target(x) for x in ys}
     if ys_types == {"binary", "multiclass"}:
         ys_types = {"multiclass"}
 
@@ -83,8 +83,8 @@ def unique_labels(*ys):
 
     # Check consistency for the indicator format
     if (label_type == "multilabel-indicator" and
-            len(set(check_array(y, ['csr', 'csc', 'coo']).shape[1]
-                    for y in ys)) > 1):
+            len({check_array(y, ['csr', 'csc', 'coo']).shape[1]
+                 for y in ys}) > 1):
         raise ValueError("Multi-label binary indicator input with "
                          "different numbers of labels")
 
@@ -96,7 +96,7 @@ def unique_labels(*ys):
     ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))
 
     # Check that we don't mix string type with number type
-    if (len(set(isinstance(label, str) for label in ys_labels)) > 1):
+    if (len({isinstance(label, str) for label in ys_labels}) > 1):
         raise ValueError("Mix of label input types (string and number)")
 
     return np.array(sorted(ys_labels))
diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py
index 61be8214dd1f1..3acae1a3b79ff 100644
--- a/sklearn/utils/random.py
+++ b/sklearn/utils/random.py
@@ -58,7 +58,7 @@ def random_choice_csc(n_samples, classes, class_probability=None,
             class_prob_j = np.asarray(class_probability[j])
 
         if not np.isclose(np.sum(class_prob_j), 1.0):
-            raise ValueError("Probability array at index {0} does not sum to "
+            raise ValueError("Probability array at index {} does not sum to "
                              "one".format(j))
 
         if class_prob_j.shape[0] != classes[j].shape[0]:
diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py
index 918f32e6da3e5..73c16773e991e 100644
--- a/sklearn/utils/sparsefuncs.py
+++ b/sklearn/utils/sparsefuncs.py
@@ -453,7 +453,7 @@ def count_nonzero(X, axis=None, sample_weight=None):
     elif axis == -2:
         axis = 0
     elif X.format != 'csr':
-        raise TypeError('Expected CSR sparse format, got {0}'.format(X.format))
+        raise TypeError('Expected CSR sparse format, got {}'.format(X.format))
 
     # We rely here on the fact that np.diff(Y.indptr) for a CSR
     # will return the number of nonzero entries in each row.
@@ -478,7 +478,7 @@ def count_nonzero(X, axis=None, sample_weight=None):
             return np.bincount(X.indices, minlength=X.shape[1],
                             weights=weights)
     else:
-        raise ValueError('Unsupported axis: {0}'.format(axis))
+        raise ValueError('Unsupported axis: {}'.format(axis))
 
 
 def _get_median(data, n_zeros):
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 699026b9e47ee..66e0299f1c378 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -320,7 +320,7 @@ def ignore_warnings(obj=None, category=Warning):
         return _IgnoreWarnings(category=category)
 
 
-class _IgnoreWarnings(object):
+class _IgnoreWarnings:
     """Improved and simplified Python warnings context manager and decorator.
 
     This class allows the user to ignore the warnings raised by a function.
@@ -507,7 +507,7 @@ def fake_mldata(columns_dict, dataname, matfile, ordering=None):
 
 
 @deprecated('deprecated in version 0.20 to be removed in version 0.22')
-class mock_mldata_urlopen(object):
+class mock_mldata_urlopen:
     """Object that mocks the urlopen function to fake requests to mldata.
 
     When requesting a dataset with a name that is in mock_datasets, this object
@@ -821,7 +821,7 @@ def _delete_folder(folder_path, warn=False):
             warnings.warn("Could not delete temporary folder %s" % folder_path)
 
 
-class TempMemmap(object):
+class TempMemmap:
     """
     Parameters
     ----------
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 9228ea248f897..8492f7f09cc7d 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -95,7 +95,7 @@ def set_params(self, **kwargs):
             if p < 0:
                 raise ValueError("p can't be less than 0")
             self.p = p
-        return super(RaisesErrorInSetParams, self).set_params(**kwargs)
+        return super().set_params(**kwargs)
 
     def fit(self, X, y=None):
         X, y = check_X_y(X, y)
@@ -112,8 +112,7 @@ def set_params(self, **kwargs):
             if p < 0:
                 p = 0
             self.p = p
-        return super(ModifiesValueInsteadOfRaisingError,
-                     self).set_params(**kwargs)
+        return super().set_params(**kwargs)
 
     def fit(self, X, y=None):
         X, y = check_X_y(X, y)
@@ -132,8 +131,7 @@ def set_params(self, **kwargs):
             if a is None:
                 kwargs.pop('b')
                 self.b = 'method2'
-        return super(ModifiesAnotherValue,
-                     self).set_params(**kwargs)
+        return super().set_params(**kwargs)
 
     def fit(self, X, y=None):
         X, y = check_X_y(X, y)
@@ -411,11 +409,11 @@ def test_check_estimators_unfitted():
 
 
 def test_check_no_attributes_set_in_init():
-    class NonConformantEstimatorPrivateSet(object):
+    class NonConformantEstimatorPrivateSet:
         def __init__(self):
             self.you_should_not_set_this_ = None
 
-    class NonConformantEstimatorNoParamSet(object):
+    class NonConformantEstimatorNoParamSet:
         def __init__(self, you_should_set_this_=None):
             pass
 
diff --git a/sklearn/utils/tests/test_metaestimators.py b/sklearn/utils/tests/test_metaestimators.py
index e2df28e496515..40cee4aedffa7 100644
--- a/sklearn/utils/tests/test_metaestimators.py
+++ b/sklearn/utils/tests/test_metaestimators.py
@@ -1,12 +1,12 @@
 from sklearn.utils.metaestimators import if_delegate_has_method
 
 
-class Prefix(object):
+class Prefix:
     def func(self):
         pass
 
 
-class MockMetaEstimator(object):
+class MockMetaEstimator:
     """This is a mock meta estimator"""
     a_prefix = Prefix()
 
@@ -25,7 +25,7 @@ def test_delegated_docstring():
            in str(MockMetaEstimator().func.__doc__)
 
 
-class MetaEst(object):
+class MetaEst:
     """A mock meta estimator"""
     def __init__(self, sub_est, better_sub_est=None):
         self.sub_est = sub_est
@@ -52,14 +52,14 @@ def predict(self):
         pass
 
 
-class HasPredict(object):
+class HasPredict:
     """A mock sub-estimator with predict method"""
 
     def predict(self):
         pass
 
 
-class HasNoPredict(object):
+class HasNoPredict:
     """A mock sub-estimator with no predict method"""
     pass
 
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 184f44f8db29e..a3c19ff53dd45 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -31,7 +31,7 @@
 from sklearn import datasets
 
 
-class NotAnArray(object):
+class NotAnArray:
     """An object that is convertable to an array. This is useful to
     simulate a Pandas timeseries."""
 
@@ -71,8 +71,8 @@ def __array__(self, dtype=None):
         NotAnArray(np.array([1, 0, 2])),
         [0, 1, 2],
         ['a', 'b', 'c'],
-        np.array([u'a', u'b', u'c']),
-        np.array([u'a', u'b', u'c'], dtype=object),
+        np.array(['a', 'b', 'c']),
+        np.array(['a', 'b', 'c'], dtype=object),
         np.array(['a', 'b', 'c'], dtype=object),
     ],
     'multiclass-multioutput': [
@@ -82,8 +82,8 @@ def __array__(self, dtype=None):
         np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float),
         np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float32),
         np.array([['a', 'b'], ['c', 'd']]),
-        np.array([[u'a', u'b'], [u'c', u'd']]),
-        np.array([[u'a', u'b'], [u'c', u'd']], dtype=object),
+        np.array([['a', 'b'], ['c', 'd']]),
+        np.array([['a', 'b'], ['c', 'd']], dtype=object),
         np.array([[1, 0, 2]]),
         NotAnArray(np.array([[1, 0, 2]])),
     ],
@@ -106,7 +106,7 @@ def __array__(self, dtype=None):
         ['a', 'b'],
         ['abc', 'def'],
         np.array(['abc', 'def']),
-        [u'a', u'b'],
+        ['a', 'b'],
         np.array(['abc', 'def'], dtype=object),
     ],
     'continuous': [
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 729b5ef81c684..389b11a90aa31 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -359,7 +359,7 @@ def f_check_param_definition(a, b, c, d, e):
     return a + b + c + d
 
 
-class Klass(object):
+class Klass:
     def f_missing(self, X, y):
         pass
 
@@ -381,7 +381,7 @@ def f_bad_sections(self, X, y):
         pass
 
 
-class MockEst(object):
+class MockEst:
     def __init__(self):
         """MockEstimator"""
     def fit(self, X, y):
@@ -397,7 +397,7 @@ def score(self, X):
         return 1.
 
 
-class MockMetaEstimator(object):
+class MockMetaEstimator:
     def __init__(self, delegate):
         """MetaEstimator to check if doctest on delegated methods work.
 
@@ -495,7 +495,7 @@ def test_check_docstring_parameters():
         assert mess in incorrect[0], '"%s" not in "%s"' % (mess, incorrect[0])
 
 
-class RegistrationCounter(object):
+class RegistrationCounter:
     def __init__(self):
         self.nb_calls = 0
 
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index f76d95b82ef89..88138452d6ab6 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -65,7 +65,7 @@ def ham():
         warnings.simplefilter("always")
 
         @deprecated("don't use this")
-        class Ham(object):
+        class Ham:
             SPAM = 1
 
         ham = Ham()
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 99019e25c0c81..e9d766ed44094 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -725,12 +725,12 @@ def test_check_dataframe_warns_on_dtype():
                        dtype='numeric', warn_on_dtype=True)
 
 
-class DummyMemory(object):
+class DummyMemory:
     def cache(self, func):
         return func
 
 
-class WrongDummyMemory(object):
+class WrongDummyMemory:
     pass
 
 
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index fc882e0719a8d..d5597b5a10a96 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -785,7 +785,7 @@ def column_or_1d(y, warn=False):
                           DataConversionWarning, stacklevel=2)
         return np.ravel(y)
 
-    raise ValueError("bad input shape {0}".format(shape))
+    raise ValueError("bad input shape {}".format(shape))
 
 
 def check_random_state(seed):
@@ -865,7 +865,7 @@ def check_symmetric(array, tol=1E-10, raise_warning=True,
     """
     if (array.ndim != 2) or (array.shape[0] != array.shape[1]):
         raise ValueError("array must be 2-dimensional and square. "
-                         "shape = {0}".format(array.shape))
+                         "shape = {}".format(array.shape))
 
     if sp.issparse(array):
         diff = array - array.T