diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py
index 19bd22a7157a5..55cfc9a960d07 100644
--- a/benchmarks/bench_plot_neighbors.py
+++ b/benchmarks/bench_plot_neighbors.py
@@ -38,10 +38,10 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
 
     #------------------------------------------------------------
     # varying N
-    N_results_build = dict([(alg, np.zeros(len(Nrange)))
-                            for alg in algorithms])
-    N_results_query = dict([(alg, np.zeros(len(Nrange)))
-                            for alg in algorithms])
+    N_results_build = {alg: np.zeros(len(Nrange))
+                       for alg in algorithms}
+    N_results_query = {alg: np.zeros(len(Nrange))
+                       for alg in algorithms}
 
     for i, NN in enumerate(Nrange):
         print("N = %i (%i out of %i)" % (NN, i + 1, len(Nrange)))
@@ -61,10 +61,10 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
 
     #------------------------------------------------------------
     # varying D
-    D_results_build = dict([(alg, np.zeros(len(Drange)))
-                            for alg in algorithms])
-    D_results_query = dict([(alg, np.zeros(len(Drange)))
-                            for alg in algorithms])
+    D_results_build = {alg: np.zeros(len(Drange))
+                       for alg in algorithms}
+    D_results_query = {alg: np.zeros(len(Drange))
+                       for alg in algorithms}
 
     for i, DD in enumerate(Drange):
         print("D = %i (%i out of %i)" % (DD, i + 1, len(Drange)))
@@ -84,10 +84,10 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
 
     #------------------------------------------------------------
     # varying k
-    k_results_build = dict([(alg, np.zeros(len(krange)))
-                            for alg in algorithms])
-    k_results_query = dict([(alg, np.zeros(len(krange)))
-                            for alg in algorithms])
+    k_results_build = {alg: np.zeros(len(krange))
+                       for alg in algorithms}
+    k_results_query = {alg: np.zeros(len(krange))
+                       for alg in algorithms}
 
     X = get_data(N, DD, dataset)
 
diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
index 9cb96aa9dc1e2..3768447bed3a9 100644
--- a/benchmarks/bench_plot_nmf.py
+++ b/benchmarks/bench_plot_nmf.py
@@ -203,7 +203,7 @@ class _PGNMF(NMF):
     def __init__(self, n_components=None, solver='pg', init=None,
                  tol=1e-4, max_iter=200, random_state=None,
                  alpha=0., l1_ratio=0., nls_max_iter=10):
-        super(_PGNMF, self).__init__(
+        super().__init__(
             n_components=n_components, init=init, solver=solver, tol=tol,
             max_iter=max_iter, random_state=random_state, alpha=alpha,
             l1_ratio=l1_ratio)
diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
index c993f719ac245..c8bea117b406e 100644
--- a/benchmarks/bench_sample_without_replacement.py
+++ b/benchmarks/bench_sample_without_replacement.py
@@ -133,9 +133,9 @@ def bench_sample(sampling, n_population, n_samples):
 
     ###########################################################################
     # Remove unspecified algorithm
-    sampling_algorithm = dict((key, value)
-                              for key, value in sampling_algorithm.items()
-                              if key in selected_algorithm)
+    sampling_algorithm = {key: value
+                          for key, value in sampling_algorithm.items()
+                          if key in selected_algorithm}
 
     ###########################################################################
     # Perform benchmark
diff --git a/doc/conf.py b/doc/conf.py
index 6065b4557ac7e..3779c1f4871d4 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -288,6 +288,6 @@ def setup(app):
 
 # The following is used by sphinx.ext.linkcode to provide links to github
 linkcode_resolve = make_linkcode_resolve('sklearn',
-                                         u'https://github.com/scikit-learn/'
+                                         'https://github.com/scikit-learn/'
                                          'scikit-learn/blob/{revision}/'
                                          '{package}/{path}#L{lineno}')
diff --git a/doc/tutorial/machine_learning_map/parse_path.py b/doc/tutorial/machine_learning_map/parse_path.py
index 1539092df64ad..61df1cf0ad2e8 100644
--- a/doc/tutorial/machine_learning_map/parse_path.py
+++ b/doc/tutorial/machine_learning_map/parse_path.py
@@ -29,7 +29,7 @@ class CaselessPreservingLiteral(CaselessLiteral):
         instead of as defined.
     """
     def __init__( self, matchString ):
-        super(CaselessPreservingLiteral,self).__init__( matchString.upper() )
+        super().__init__(matchString.upper())
         self.name = "'%s'" % matchString
         self.errmsg = "Expected " + self.name
         self.myException.msg = self.errmsg
diff --git a/doc/tutorial/text_analytics/data/languages/fetch_data.py b/doc/tutorial/text_analytics/data/languages/fetch_data.py
index 5c5c36a322caf..0bee5e1f4586f 100644
--- a/doc/tutorial/text_analytics/data/languages/fetch_data.py
+++ b/doc/tutorial/text_analytics/data/languages/fetch_data.py
@@ -17,23 +17,23 @@
 import codecs
 
 pages = {
-    u'ar': u'http://ar.wikipedia.org/wiki/%D9%88%D9%8A%D9%83%D9%8A%D8%A8%D9%8A%D8%AF%D9%8A%D8%A7',
-    u'de': u'http://de.wikipedia.org/wiki/Wikipedia',
-    u'en': u'https://en.wikipedia.org/wiki/Wikipedia',
-    u'es': u'http://es.wikipedia.org/wiki/Wikipedia',
-    u'fr': u'http://fr.wikipedia.org/wiki/Wikip%C3%A9dia',
-    u'it': u'http://it.wikipedia.org/wiki/Wikipedia',
-    u'ja': u'http://ja.wikipedia.org/wiki/Wikipedia',
-    u'nl': u'http://nl.wikipedia.org/wiki/Wikipedia',
-    u'pl': u'http://pl.wikipedia.org/wiki/Wikipedia',
-    u'pt': u'http://pt.wikipedia.org/wiki/Wikip%C3%A9dia',
-    u'ru': u'http://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8%D0%BF%D0%B5%D0%B4%D0%B8%D1%8F',
+    'ar': 'http://ar.wikipedia.org/wiki/%D9%88%D9%8A%D9%83%D9%8A%D8%A8%D9%8A%D8%AF%D9%8A%D8%A7',   # noqa: E501
+    'de': 'http://de.wikipedia.org/wiki/Wikipedia',
+    'en': 'https://en.wikipedia.org/wiki/Wikipedia',
+    'es': 'http://es.wikipedia.org/wiki/Wikipedia',
+    'fr': 'http://fr.wikipedia.org/wiki/Wikip%C3%A9dia',
+    'it': 'http://it.wikipedia.org/wiki/Wikipedia',
+    'ja': 'http://ja.wikipedia.org/wiki/Wikipedia',
+    'nl': 'http://nl.wikipedia.org/wiki/Wikipedia',
+    'pl': 'http://pl.wikipedia.org/wiki/Wikipedia',
+    'pt': 'http://pt.wikipedia.org/wiki/Wikip%C3%A9dia',
+    'ru': 'http://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8%D0%BF%D0%B5%D0%B4%D0%B8%D1%8F',  # noqa: E501
 #    u'zh': u'http://zh.wikipedia.org/wiki/Wikipedia',
 }
 
-html_folder = u'html'
-text_folder = u'paragraphs'
-short_text_folder = u'short_paragraphs'
+html_folder = 'html'
+text_folder = 'paragraphs'
+short_text_folder = 'short_paragraphs'
 n_words_per_short_text = 5
 
 
@@ -93,7 +93,7 @@
         groups = np.array_split(words, n_groups)
 
         for group in groups:
-            small_content = u" ".join(group)
+            small_content = " ".join(group)
 
             short_text_filename = os.path.join(short_text_lang_folder,
                                                '%s_%04d.txt' % (lang, j))
diff --git a/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py b/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
index 0b56cfc10d837..438481120d126 100644
--- a/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
+++ b/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
@@ -52,11 +52,11 @@
 
 # Predict the result on some short new sentences:
 sentences = [
-    u'This is a language detection test.',
-    u'Ceci est un test de d\xe9tection de la langue.',
-    u'Dies ist ein Test, um die Sprache zu erkennen.',
+    'This is a language detection test.',
+    'Ceci est un test de d\xe9tection de la langue.',
+    'Dies ist ein Test, um die Sprache zu erkennen.',
 ]
 predicted = clf.predict(sentences)
 
 for s, p in zip(sentences, predicted):
-    print(u'The language of "%s" is "%s"' % (s, dataset.target_names[p]))
+    print('The language of "%s" is "%s"' % (s, dataset.target_names[p]))
diff --git a/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py b/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py
index 910b4dc50427d..defb7dc11630a 100644
--- a/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py
+++ b/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py
@@ -60,11 +60,11 @@
 
 # Predict the result on some short new sentences:
 sentences = [
-    u'This is a language detection test.',
-    u'Ceci est un test de d\xe9tection de la langue.',
-    u'Dies ist ein Test, um die Sprache zu erkennen.',
+    'This is a language detection test.',
+    'Ceci est un test de d\xe9tection de la langue.',
+    'Dies ist ein Test, um die Sprache zu erkennen.',
 ]
 predicted = clf.predict(sentences)
 
 for s, p in zip(sentences, predicted):
-    print(u'The language of "%s" is "%s"' % (s, dataset.target_names[p]))
+    print('The language of "%s" is "%s"' % (s, dataset.target_names[p]))
diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py
index 8dafc4fae120f..cf53190b45869 100644
--- a/examples/applications/plot_out_of_core_classification.py
+++ b/examples/applications/plot_out_of_core_classification.py
@@ -221,7 +221,7 @@ def get_minibatch(doc_iter, size, pos_class=positive_class):
     Note: size is before excluding invalid docs with no topics assigned.
 
     """
-    data = [(u'{title}\n\n{body}'.format(**doc), pos_class in doc['topics'])
+    data = [('{title}\n\n{body}'.format(**doc), pos_class in doc['topics'])
             for doc in itertools.islice(doc_iter, size)
             if doc['topics']]
     if not len(data):
diff --git a/examples/applications/svm_gui.py b/examples/applications/svm_gui.py
index 7c5c9bbf305b8..a44b996b079c6 100644
--- a/examples/applications/svm_gui.py
+++ b/examples/applications/svm_gui.py
@@ -44,7 +44,7 @@
 x_min, x_max = -50, 50
 
 
-class Model(object):
+class Model:
     """The Model which hold the data. It implements the
     observable in the observer pattern and notifies the
     registered observers on change event.
@@ -76,7 +76,7 @@ def dump_svmlight_file(self, file):
         dump_svmlight_file(X, y, file)
 
 
-class Controller(object):
+class Controller:
     def __init__(self, model):
         self.model = model
         self.kernel = Tk.IntVar()
@@ -139,7 +139,7 @@ def refit(self):
             self.fit()
 
 
-class View(object):
+class View:
     """Test docstring. """
     def __init__(self, root, controller):
         f = Figure()
@@ -245,7 +245,7 @@ def plot_decision_surface(self, surface, type):
             raise ValueError("surface type unknown")
 
 
-class ControllBar(object):
+class ControllBar:
     def __init__(self, root, controller):
         fm = Tk.Frame(root)
         kernel_group = Tk.Frame(fm)
diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py
index 27f3844a23094..d0e66b60ff436 100644
--- a/examples/applications/wikipedia_principal_eigenvector.py
+++ b/examples/applications/wikipedia_principal_eigenvector.py
@@ -112,7 +112,7 @@ def get_redirects(redirects_filename):
     for l, source in enumerate(redirects.keys()):
         transitive_target = None
         target = redirects[source]
-        seen = set([source])
+        seen = {source}
         while True:
             transitive_target = target
             target = redirects.get(target)
@@ -172,7 +172,7 @@ def get_adjacency_matrix(redirects_filename, page_links_filename, limit=None):
 # stop after 5M links to make it possible to work in RAM
 X, redirects, index_map = get_adjacency_matrix(
     redirects_filename, page_links_filename, limit=5000000)
-names = dict((i, name) for name, i in index_map.items())
+names = {i: name for name, i in index_map.items()}
 
 print("Computing the principal singular vectors using randomized_svd")
 t0 = time()
diff --git a/examples/gaussian_process/plot_gpr_noisy_targets.py b/examples/gaussian_process/plot_gpr_noisy_targets.py
index 455b26ceef6a5..50992c19337b3 100644
--- a/examples/gaussian_process/plot_gpr_noisy_targets.py
+++ b/examples/gaussian_process/plot_gpr_noisy_targets.py
@@ -63,8 +63,8 @@ def f(x):
 # the MSE
 plt.figure()
 plt.plot(x, f(x), 'r:', label=r'$f(x) = x\,\sin(x)$')
-plt.plot(X, y, 'r.', markersize=10, label=u'Observations')
-plt.plot(x, y_pred, 'b-', label=u'Prediction')
+plt.plot(X, y, 'r.', markersize=10, label='Observations')
+plt.plot(x, y_pred, 'b-', label='Prediction')
 plt.fill(np.concatenate([x, x[::-1]]),
          np.concatenate([y_pred - 1.9600 * sigma,
                         (y_pred + 1.9600 * sigma)[::-1]]),
@@ -99,8 +99,8 @@ def f(x):
 # the MSE
 plt.figure()
 plt.plot(x, f(x), 'r:', label=r'$f(x) = x\,\sin(x)$')
-plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations')
-plt.plot(x, y_pred, 'b-', label=u'Prediction')
+plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label='Observations')
+plt.plot(x, y_pred, 'b-', label='Prediction')
 plt.fill(np.concatenate([x, x[::-1]]),
          np.concatenate([y_pred - 1.9600 * sigma,
                         (y_pred + 1.9600 * sigma)[::-1]]),
diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py
index 14a0efccc01da..b9704ee435db2 100644
--- a/examples/mixture/plot_gmm_covariances.py
+++ b/examples/mixture/plot_gmm_covariances.py
@@ -83,9 +83,9 @@ def make_ellipses(gmm, ax):
 n_classes = len(np.unique(y_train))
 
 # Try GMMs using different types of covariances.
-estimators = dict((cov_type, GaussianMixture(n_components=n_classes,
-                   covariance_type=cov_type, max_iter=20, random_state=0))
-                  for cov_type in ['spherical', 'diag', 'tied', 'full'])
+estimators = {cov_type: GaussianMixture(n_components=n_classes,
+              covariance_type=cov_type, max_iter=20, random_state=0)
+              for cov_type in ['spherical', 'diag', 'tied', 'full']}
 
 n_estimators = len(estimators)
 
diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py
index b40dc91fc4d8f..d6aea44e6c546 100644
--- a/examples/model_selection/plot_nested_cross_validation_iris.py
+++ b/examples/model_selection/plot_nested_cross_validation_iris.py
@@ -90,7 +90,7 @@
 
 score_difference = non_nested_scores - nested_scores
 
-print("Average difference of {0:6f} with std. dev. of {1:6f}."
+print("Average difference of {:6f} with std. dev. of {:6f}."
       .format(score_difference.mean(), score_difference.std()))
 
 # Plot scores on each trial for nested and non-nested CV
diff --git a/setup.py b/setup.py
index cce21f5883c5a..dd6ae2fc7913e 100755
--- a/setup.py
+++ b/setup.py
@@ -49,12 +49,12 @@
 # We need to import setuptools early, if we want setuptools features,
 # as it monkey-patches the 'setup' function
 # For some commands, use setuptools
-SETUPTOOLS_COMMANDS = set([
+SETUPTOOLS_COMMANDS = {
     'develop', 'release', 'bdist_egg', 'bdist_rpm',
     'bdist_wininst', 'install_egg_info', 'build_sphinx',
     'egg_info', 'easy_install', 'upload', 'bdist_wheel',
     '--single-version-externally-managed',
-])
+}
 if SETUPTOOLS_COMMANDS.intersection(sys.argv):
     import setuptools
 
@@ -63,8 +63,8 @@
         include_package_data=True,
         extras_require={
             'alldeps': (
-                'numpy >= {0}'.format(NUMPY_MIN_VERSION),
-                'scipy >= {0}'.format(SCIPY_MIN_VERSION),
+                'numpy >= {}'.format(NUMPY_MIN_VERSION),
+                'scipy >= {}'.format(SCIPY_MIN_VERSION),
             ),
         },
     )
@@ -111,7 +111,7 @@ def run(self):
 # to PyPI at release time.
 # The URL of the artifact repositories are configured in the setup.cfg file.
 
-WHEELHOUSE_UPLOADER_COMMANDS = set(['fetch_artifacts', 'upload_all'])
+WHEELHOUSE_UPLOADER_COMMANDS = {'fetch_artifacts', 'upload_all'}
 if WHEELHOUSE_UPLOADER_COMMANDS.intersection(sys.argv):
     import wheelhouse_uploader.cmd
 
@@ -190,8 +190,8 @@ def setup_package():
                                  ],
                     cmdclass=cmdclass,
                     install_requires=[
-                        'numpy>={0}'.format(NUMPY_MIN_VERSION),
-                        'scipy>={0}'.format(SCIPY_MIN_VERSION)
+                        'numpy>={}'.format(NUMPY_MIN_VERSION),
+                        'scipy>={}'.format(SCIPY_MIN_VERSION)
                     ],
                     **extra_setuptools_args)
 
@@ -214,7 +214,7 @@ def setup_package():
         metadata['version'] = VERSION
     else:
         numpy_status = get_numpy_status()
-        numpy_req_str = "scikit-learn requires NumPy >= {0}.\n".format(
+        numpy_req_str = "scikit-learn requires NumPy >= {}.\n".format(
             NUMPY_MIN_VERSION)
 
         instructions = ("Installation instructions are available on the "
@@ -224,12 +224,12 @@ def setup_package():
         if numpy_status['up_to_date'] is False:
             if numpy_status['version']:
                 raise ImportError("Your installation of Numerical Python "
-                                  "(NumPy) {0} is out-of-date.\n{1}{2}"
+                                  "(NumPy) {} is out-of-date.\n{}{}"
                                   .format(numpy_status['version'],
                                           numpy_req_str, instructions))
             else:
                 raise ImportError("Numerical Python (NumPy) is not "
-                                  "installed.\n{0}{1}"
+                                  "installed.\n{}{}"
                                   .format(numpy_req_str, instructions))
 
         from numpy.distutils.core import setup
diff --git a/sklearn/base.py b/sklearn/base.py
index 15ec12b827f4f..103d72c75b600 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -126,7 +126,7 @@ def _pprint(params, offset=0, printer=repr):
 
 
 ###############################################################################
-class BaseEstimator(object):
+class BaseEstimator:
     """Base class for all estimators in scikit-learn
 
     Notes
@@ -270,7 +270,7 @@ def __setstate__(self, state):
 
 
 ###############################################################################
-class ClassifierMixin(object):
+class ClassifierMixin:
     """Mixin class for all classifiers in scikit-learn."""
     _estimator_type = "classifier"
 
@@ -303,7 +303,7 @@ def score(self, X, y, sample_weight=None):
 
 
 ###############################################################################
-class RegressorMixin(object):
+class RegressorMixin:
     """Mixin class for all regression estimators in scikit-learn."""
     _estimator_type = "regressor"
 
@@ -344,7 +344,7 @@ def score(self, X, y, sample_weight=None):
 
 
 ###############################################################################
-class ClusterMixin(object):
+class ClusterMixin:
     """Mixin class for all cluster estimators in scikit-learn."""
     _estimator_type = "clusterer"
 
@@ -370,7 +370,7 @@ def fit_predict(self, X, y=None):
         return self.labels_
 
 
-class BiclusterMixin(object):
+class BiclusterMixin:
     """Mixin class for all bicluster estimators in scikit-learn"""
 
     @property
@@ -446,7 +446,7 @@ def get_submatrix(self, i, data):
 
 
 ###############################################################################
-class TransformerMixin(object):
+class TransformerMixin:
     """Mixin class for all transformers in scikit-learn."""
 
     def fit_transform(self, X, y=None, **fit_params):
@@ -479,7 +479,7 @@ def fit_transform(self, X, y=None, **fit_params):
             return self.fit(X, y, **fit_params).transform(X)
 
 
-class DensityMixin(object):
+class DensityMixin:
     """Mixin class for all density estimators in scikit-learn."""
     _estimator_type = "DensityEstimator"
 
@@ -497,7 +497,7 @@ def score(self, X, y=None):
         pass
 
 
-class OutlierMixin(object):
+class OutlierMixin:
     """Mixin class for all outlier detection estimators in scikit-learn."""
     _estimator_type = "outlier_detector"
 
@@ -524,7 +524,7 @@ def fit_predict(self, X, y=None):
 
 
 ###############################################################################
-class MetaEstimatorMixin(object):
+class MetaEstimatorMixin:
     """Mixin class for all meta estimators in scikit-learn."""
     # this is just a tag for the moment
 
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 9de7cb93d1322..265f03313327f 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -248,7 +248,7 @@ def predict(self, X):
         return self.classes_[np.argmax(self.predict_proba(X), axis=1)]
 
 
-class _CalibratedClassifier(object):
+class _CalibratedClassifier:
     """Probability calibration with isotonic regression or sigmoid.
 
     It assumes that base_estimator has already been fit, and trains the
diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
index 4b5f72ada9ad3..6ddccc16c1467 100644
--- a/sklearn/cluster/birch.py
+++ b/sklearn/cluster/birch.py
@@ -86,7 +86,7 @@ def _split_node(node, threshold, branching_factor):
     return new_subcluster1, new_subcluster2
 
 
-class _CFNode(object):
+class _CFNode:
     """Each node in a CFTree is called a CFNode.
 
     The CFNode can have a maximum of branching_factor
@@ -239,7 +239,7 @@ def insert_cf_subcluster(self, subcluster):
                 return True
 
 
-class _CFSubcluster(object):
+class _CFSubcluster:
     """Each subcluster in a CFNode is called a CFSubcluster.
 
     A CFSubcluster can have a CFNode has its child.
diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 9f47bae36aed5..5c5287e50bc73 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -725,7 +725,7 @@ def _automatic_cluster(reachability_plot, ordering,
     return root_node
 
 
-class _TreeNode(object):
+class _TreeNode:
     # automatic cluster helper classes and functions
     def __init__(self, points, start, end, parent_node):
         self.points = points
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index ae1cf0cde3a57..9226c973283b8 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -313,8 +313,8 @@ def named_transformers_(self):
 
         """
         # Use Bunch object to improve autocomplete
-        return Bunch(**dict([(name, trans) for name, trans, _
-                             in self.transformers_]))
+        return Bunch(**{name: trans for name, trans, _
+                        in self.transformers_})
 
     def get_feature_names(self):
         """Get feature names from all transformers.
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index fa195ce030298..be1dec6ac1c13 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -611,7 +611,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         raise ValueError('Mix of nominal and non-nominal targets is not '
                          'currently supported')
 
-    description = u"{}\n\nDownloaded from openml.org.".format(
+    description = "{}\n\nDownloaded from openml.org.".format(
         data_description.pop('description'))
 
     # reshape y back to 1-D array, if there is only 1 target column; back
diff --git a/sklearn/datasets/rcv1.py b/sklearn/datasets/rcv1.py
index 66566c389baf3..965ff1fbcb063 100644
--- a/sklearn/datasets/rcv1.py
+++ b/sklearn/datasets/rcv1.py
@@ -208,7 +208,7 @@ def fetch_rcv1(data_home=None, subset='all', download_if_missing=True,
         category_names = {}
         with GzipFile(filename=topics_archive_path, mode='rb') as f:
             for line in f:
-                line_components = line.decode("ascii").split(u" ")
+                line_components = line.decode("ascii").split(" ")
                 if len(line_components) == 3:
                     cat, doc, _ = line_components
                     if cat not in category_names:
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 4cda24c7398b2..2f58d746f7c6c 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -152,7 +152,7 @@ def _monkey_patch_webbased_functions(context,
     path_suffix = '.gz'
     read_fn = gzip.open
 
-    class MockHTTPResponse(object):
+    class MockHTTPResponse:
         def __init__(self, data, is_gzip):
             self.data = data
             self.is_gzip = is_gzip
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index 57627cc834c95..ea12c9f8e3a12 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -38,7 +38,7 @@ def test_fetch_rcv1():
     assert_equal(103, len(cat_list))
 
     # test ordering of categories
-    first_categories = [u'C11', u'C12', u'C13', u'C14', u'C15', u'C151']
+    first_categories = ['C11', 'C12', 'C13', 'C14', 'C15', 'C151']
     assert_array_equal(first_categories, cat_list[:6])
 
     # test number of sample for some categories
diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
index 1ca2ef8b20492..08252d392dfe2 100644
--- a/sklearn/ensemble/base.py
+++ b/sklearn/ensemble/base.py
@@ -122,8 +122,8 @@ def _make_estimator(self, append=True, random_state=None):
         sub-estimators.
         """
         estimator = clone(self.base_estimator_)
-        estimator.set_params(**dict((p, getattr(self, p))
-                                    for p in self.estimator_params))
+        estimator.set_params(**{p: getattr(self, p)
+                                for p in self.estimator_params})
 
         if random_state is not None:
             _set_random_states(estimator, random_state)
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index e0f3d9e4c35f7..b276094ed9d5e 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -61,7 +61,7 @@
 from ..exceptions import NotFittedError
 
 
-class QuantileEstimator(object):
+class QuantileEstimator:
     """An estimator predicting the alpha-quantile of the training targets.
 
     Parameters
@@ -114,7 +114,7 @@ def predict(self, X):
         return y
 
 
-class MeanEstimator(object):
+class MeanEstimator:
     """An estimator predicting the mean of the training targets."""
     def fit(self, X, y, sample_weight=None):
         """Fit the estimator.
@@ -155,7 +155,7 @@ def predict(self, X):
         return y
 
 
-class LogOddsEstimator(object):
+class LogOddsEstimator:
     """An estimator predicting the log odds ratio."""
     scale = 1.0
 
@@ -210,7 +210,7 @@ class ScaledLogOddsEstimator(LogOddsEstimator):
     scale = 0.5
 
 
-class PriorProbabilityEstimator(object):
+class PriorProbabilityEstimator:
     """An estimator predicting the probability of each
     class in the training data.
     """
@@ -253,7 +253,7 @@ def predict(self, X):
         return y
 
 
-class ZeroEstimator(object):
+class ZeroEstimator:
     """An estimator that simply predicts zero. """
 
     def fit(self, X, y, sample_weight=None):
@@ -299,7 +299,7 @@ def predict(self, X):
         return y
 
 
-class LossFunction(object, metaclass=ABCMeta):
+class LossFunction(metaclass=ABCMeta):
     """Abstract base class for various loss functions.
 
     Parameters
@@ -1043,7 +1043,7 @@ def _score_to_decision(self, score):
 INIT_ESTIMATORS = {'zero': ZeroEstimator}
 
 
-class VerboseReporter(object):
+class VerboseReporter:
     """Reports verbose output to stdout.
 
     Parameters
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index 3a71743072e0e..435d2ef30e97c 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -59,7 +59,7 @@ def test_samme_proba():
 
     # _samme_proba calls estimator.predict_proba.
     # Make a mock object so I can control what gets returned.
-    class MockEstimator(object):
+    class MockEstimator:
         def predict_proba(self, X):
             assert_array_equal(X.shape, probs.shape)
             return probs
diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
index 1b7d5079d0ac1..6c7dd84547df2 100644
--- a/sklearn/ensemble/voting_classifier.py
+++ b/sklearn/ensemble/voting_classifier.py
@@ -339,8 +339,7 @@ def get_params(self, deep=True):
             Setting it to True gets the various classifiers and the parameters
             of the classifiers as well
         """
-        return super(VotingClassifier,
-                     self)._get_params('estimators', deep=deep)
+        return super()._get_params('estimators', deep=deep)
 
     def _predict(self, X):
         """Collect results from clf.predict calls. """
diff --git a/sklearn/feature_extraction/dict_vectorizer.py b/sklearn/feature_extraction/dict_vectorizer.py
index a37819b6da64b..29d4ae58e57c5 100644
--- a/sklearn/feature_extraction/dict_vectorizer.py
+++ b/sklearn/feature_extraction/dict_vectorizer.py
@@ -125,7 +125,7 @@ def fit(self, X, y=None):
 
         if self.sort:
             feature_names.sort()
-            vocab = dict((f, i) for i, f in enumerate(feature_names))
+            vocab = {f: i for i, f in enumerate(feature_names)}
 
         self.feature_names_ = feature_names
         self.vocabulary_ = vocab
diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py
index e3472682da7b2..919f5856ebb86 100644
--- a/sklearn/feature_extraction/tests/test_feature_hasher.py
+++ b/sklearn/feature_extraction/tests/test_feature_hasher.py
@@ -15,7 +15,7 @@ def test_feature_hasher_dicts():
     assert_equal("dict", h.input_type)
 
     raw_X = [{"foo": "bar", "dada": 42, "tzara": 37},
-             {"foo": "baz", "gaga": u"string1"}]
+             {"foo": "baz", "gaga": "string1"}]
     X1 = FeatureHasher(n_features=16).transform(raw_X)
     gen = (iter(d.items()) for d in raw_X)
     X2 = FeatureHasher(n_features=16, input_type="pair").transform(gen)
@@ -59,7 +59,7 @@ def test_feature_hasher_pairs():
 
 def test_feature_hasher_pairs_with_string_values():
     raw_X = (iter(d.items()) for d in [{"foo": 1, "bar": "a"},
-                                       {"baz": u"abc", "quux": 4, "foo": -1}])
+                                       {"baz": "abc", "quux": 4, "foo": -1}])
     h = FeatureHasher(n_features=16, input_type="pair")
     x1, x2 = h.transform(raw_X).toarray()
     x1_nz = sorted(np.abs(x1[x1 != 0]))
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 004f771126724..2a3d0dcbaafaf 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -67,7 +67,7 @@ def uppercase(s):
 
 
 def strip_eacute(s):
-    return s.replace(u'é', 'e')
+    return s.replace('é', 'e')
 
 
 def split_tokenize(s):
@@ -80,11 +80,11 @@ def lazy_analyze(s):
 
 def test_strip_accents():
     # check some classical latin accentuated symbols
-    a = u'àáâãäåçèéêë'
+    a = 'àáâãäåçèéêë'
     expected = 'aaaaaaceeee'
     assert_equal(strip_accents_unicode(a), expected)
 
-    a = u'ìíîïñòóôõöùúûüý'
+    a = 'ìíîïñòóôõöùúûüý'
     expected = 'iiiinooooouuuuy'
     assert_equal(strip_accents_unicode(a), expected)
 
@@ -94,18 +94,18 @@ def test_strip_accents():
     assert_equal(strip_accents_unicode(a), expected)
 
     # mix letters accentuated and not
-    a = u"this is à test"
+    a = "this is à test"
     expected = 'this is a test'
     assert_equal(strip_accents_unicode(a), expected)
 
 
 def test_to_ascii():
     # check some classical latin accentuated symbols
-    a = u'àáâãäåçèéêë'
+    a = 'àáâãäåçèéêë'
     expected = 'aaaaaaceeee'
     assert_equal(strip_accents_ascii(a), expected)
 
-    a = u"ìíîïñòóôõöùúûüý"
+    a = "ìíîïñòóôõöùúûüý"
     expected = 'iiiinooooouuuuy'
     assert_equal(strip_accents_ascii(a), expected)
 
@@ -115,7 +115,7 @@ def test_to_ascii():
     assert_equal(strip_accents_ascii(a), expected)
 
     # mix letters accentuated and not
-    a = u"this is à test"
+    a = "this is à test"
     expected = 'this is a test'
     assert_equal(strip_accents_ascii(a), expected)
 
@@ -123,8 +123,8 @@ def test_to_ascii():
 @pytest.mark.parametrize('Vectorizer', (CountVectorizer, HashingVectorizer))
 def test_word_analyzer_unigrams(Vectorizer):
     wa = Vectorizer(strip_accents='ascii').build_analyzer()
-    text = (u"J'ai mangé du kangourou  ce midi, "
-            u"c'était pas très bon.")
+    text = ("J'ai mangé du kangourou  ce midi, "
+            "c'était pas très bon.")
     expected = ['ai', 'mange', 'du', 'kangourou', 'ce', 'midi',
                 'etait', 'pas', 'tres', 'bon']
     assert_equal(wa(text), expected)
@@ -142,8 +142,8 @@ def test_word_analyzer_unigrams(Vectorizer):
 
     # with custom preprocessor
     wa = Vectorizer(preprocessor=uppercase).build_analyzer()
-    text = (u"J'ai mangé du kangourou  ce midi, "
-            u" c'était pas très bon.")
+    text = ("J'ai mangé du kangourou  ce midi, "
+            " c'était pas très bon.")
     expected = ['AI', 'MANGE', 'DU', 'KANGOUROU', 'CE', 'MIDI',
                 'ETAIT', 'PAS', 'TRES', 'BON']
     assert_equal(wa(text), expected)
@@ -151,8 +151,8 @@ def test_word_analyzer_unigrams(Vectorizer):
     # with custom tokenizer
     wa = Vectorizer(tokenizer=split_tokenize,
                     strip_accents='ascii').build_analyzer()
-    text = (u"J'ai mangé du kangourou  ce midi, "
-            u"c'était pas très bon.")
+    text = ("J'ai mangé du kangourou  ce midi, "
+            "c'était pas très bon.")
     expected = ["j'ai", 'mange', 'du', 'kangourou', 'ce', 'midi,',
                 "c'etait", 'pas', 'tres', 'bon.']
     assert_equal(wa(text), expected)
@@ -162,7 +162,7 @@ def test_word_analyzer_unigrams_and_bigrams():
     wa = CountVectorizer(analyzer="word", strip_accents='unicode',
                          ngram_range=(1, 2)).build_analyzer()
 
-    text = u"J'ai mangé du kangourou  ce midi, c'était pas très bon."
+    text = "J'ai mangé du kangourou  ce midi, c'était pas très bon."
     expected = ['ai', 'mange', 'du', 'kangourou', 'ce', 'midi',
                 'etait', 'pas', 'tres', 'bon', 'ai mange', 'mange du',
                 'du kangourou', 'kangourou ce', 'ce midi', 'midi etait',
@@ -173,7 +173,7 @@ def test_word_analyzer_unigrams_and_bigrams():
 def test_unicode_decode_error():
     # decode_error default to strict, so this should fail
     # First, encode (as bytes) a unicode string.
-    text = u"J'ai mangé du kangourou  ce midi, c'était pas très bon."
+    text = "J'ai mangé du kangourou  ce midi, c'était pas très bon."
     text_bytes = text.encode('utf-8')
 
     # Then let the Analyzer try to decode it as ascii. It should fail,
@@ -190,7 +190,7 @@ def test_char_ngram_analyzer():
     cnga = CountVectorizer(analyzer='char', strip_accents='unicode',
                            ngram_range=(3, 6)).build_analyzer()
 
-    text = u"J'ai mangé du kangourou  ce midi, c'était pas très bon"
+    text = "J'ai mangé du kangourou  ce midi, c'était pas très bon"
     expected = ["j'a", "'ai", 'ai ', 'i m', ' ma']
     assert_equal(cnga(text)[:5], expected)
     expected = ['s tres', ' tres ', 'tres b', 'res bo', 'es bon']
@@ -583,8 +583,8 @@ def test_feature_names():
 @pytest.mark.parametrize('Vectorizer', (CountVectorizer, TfidfVectorizer))
 def test_vectorizer_max_features(Vectorizer):
     expected_vocabulary = {'burger', 'beer', 'salad', 'pizza'}
-    expected_stop_words = {u'celeri', u'tomato', u'copyright', u'coke',
-                           u'sparkling', u'water', u'the'}
+    expected_stop_words = {'celeri', 'tomato', 'copyright', 'coke',
+                           'sparkling', 'water', 'the'}
 
     # test bounded number of extracted features
     vectorizer = Vectorizer(max_df=0.6, max_features=4)
@@ -833,9 +833,9 @@ def test_vectorizer_pipeline_cross_validation():
 def test_vectorizer_unicode():
     # tests that the count vectorizer works with cyrillic.
     document = (
-        u"Машинное обучение — обширный подраздел искусственного "
-        u"интеллекта, изучающий методы построения алгоритмов, "
-        u"способных обучаться."
+        "Машинное обучение — обширный подраздел искусственного "
+        "интеллекта, изучающий методы построения алгоритмов, "
+        "способных обучаться."
         )
 
     vect = CountVectorizer()
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 675842e1bea93..c46ac8fd6cc42 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -113,7 +113,7 @@ def _check_stop_list(stop):
         return frozenset(stop)
 
 
-class VectorizerMixin(object):
+class VectorizerMixin:
     """Provides common code for text vectorizers (tokenization logic)."""
 
     _white_spaces = re.compile(r"\s\s+")
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
index c7d360f728b1a..1f912d7c8e6a7 100644
--- a/sklearn/feature_selection/tests/test_rfe.py
+++ b/sklearn/feature_selection/tests/test_rfe.py
@@ -24,7 +24,7 @@
 from sklearn.metrics import get_scorer
 
 
-class MockClassifier(object):
+class MockClassifier:
     """
     Dummy classifier to test recursive feature elimination
     """
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index a61ab1fc8c3a4..e7d5e97037427 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -356,7 +356,7 @@ def is_stationary(self):
         """Returns whether the kernel is stationary. """
 
 
-class NormalizedKernelMixin(object):
+class NormalizedKernelMixin:
     """Mixin for kernels which are normalized: k(X, X)=1.
 
     .. versionadded:: 0.18
@@ -382,7 +382,7 @@ def diag(self, X):
         return np.ones(X.shape[0])
 
 
-class StationaryKernelMixin(object):
+class StationaryKernelMixin:
     """Mixin for kernels which are stationary: k(X, Y)= f(X-Y).
 
     .. versionadded:: 0.18
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index 4bbae6462e2e8..57b78598f9617 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -302,7 +302,7 @@ def _predict_proba_lr(self, X):
             return prob
 
 
-class SparseCoefMixin(object):
+class SparseCoefMixin:
     """Mixin for converting coef_ to and from CSR format.
 
     L1-regularizing estimators should inherit this.
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 6419e00a3c804..f414c3ba4b32f 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -1224,9 +1224,9 @@ def fit(self, X, y):
             self.alphas_ = np.asarray(alphas[0])
 
         # Refit the model with the parameters selected
-        common_params = dict((name, value)
-                             for name, value in self.get_params().items()
-                             if name in model.get_params())
+        common_params = {name: value
+                         for name, value in self.get_params().items()
+                         if name in model.get_params()}
         model.set_params(**common_params)
         model.alpha = best_alpha
         model.l1_ratio = best_l1_ratio
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index a1d49ac570e92..d593ffbff0eab 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -2008,8 +2008,8 @@ def fit(self, X, y, sample_weight=None):
         label_encoder = LabelEncoder().fit(y)
         y = label_encoder.transform(y)
         if isinstance(class_weight, dict):
-            class_weight = dict((label_encoder.transform([cls])[0], v)
-                                for cls, v in class_weight.items())
+            class_weight = {label_encoder.transform([cls])[0]: v
+                            for cls, v in class_weight.items()}
 
         # The original class labels
         classes = self.classes_ = label_encoder.classes_
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 9b13bf8c9c56a..3b3ab0e95989c 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -44,7 +44,7 @@
 # Default value of ``epsilon`` parameter.
 
 
-class _ValidationScoreCallback(object):
+class _ValidationScoreCallback:
     """Callback for early stopping based on validation score"""
 
     def __init__(self, estimator, X_val, y_val, sample_weight_val,
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index f861c05425d6a..517781c8e6cfd 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -111,7 +111,7 @@ def test_error():
 @pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_logistic_cv_mock_scorer():
 
-    class MockScorer(object):
+    class MockScorer:
         def __init__(self):
             self.calls = 0
             self.scores = [0.1, 0.4, 0.8, 0.5]
diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py
index 7085129163d9b..e46949c256e9f 100644
--- a/sklearn/linear_model/tests/test_perceptron.py
+++ b/sklearn/linear_model/tests/test_perceptron.py
@@ -20,7 +20,7 @@
 X_csr.sort_indices()
 
 
-class MyPerceptron(object):
+class MyPerceptron:
 
     def __init__(self, n_iter=1):
         self.n_iter = n_iter
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index 7eadea9004eb1..629933a3bc8e9 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -113,7 +113,7 @@ def decision_function(self, X, *args, **kw):
 ###############################################################################
 # Tests common to classification and regression
 
-class CommonTest(object):
+class CommonTest:
 
     def factory(self, **kwargs):
         if "random_state" not in kwargs:
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 7f5250f8fa3bb..8e6ee8dc06b2c 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -270,7 +270,7 @@ def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None):
     check_consistent_length(y_true, y_pred, sample_weight)
 
     n_labels = labels.size
-    label_to_ind = dict((y, x) for x, y in enumerate(labels))
+    label_to_ind = {y: x for x, y in enumerate(labels)}
     # convert yt, yp into index
     y_pred = np.array([label_to_ind.get(x, n_labels + 1) for x in y_pred])
     y_true = np.array([label_to_ind.get(x, n_labels + 1) for x in y_true])
@@ -1723,7 +1723,7 @@ class 2       1.00      0.67      0.80         3
                 "parameter".format(len(labels), len(target_names))
             )
     if target_names is None:
-        target_names = [u'%s' % l for l in labels]
+        target_names = ['%s' % l for l in labels]
 
     headers = ["precision", "recall", "f1-score", "support"]
     # compute per-class results without averaging
@@ -1747,13 +1747,13 @@ class 2       1.00      0.67      0.80         3
         longest_last_line_heading = 'weighted avg'
         name_width = max(len(cn) for cn in target_names)
         width = max(name_width, len(longest_last_line_heading), digits)
-        head_fmt = u'{:>{width}s} ' + u' {:>9}' * len(headers)
-        report = head_fmt.format(u'', *headers, width=width)
-        report += u'\n\n'
-        row_fmt = u'{:>{width}s} ' + u' {:>9.{digits}f}' * 3 + u' {:>9}\n'
+        head_fmt = '{:>{width}s} ' + ' {:>9}' * len(headers)
+        report = head_fmt.format('', *headers, width=width)
+        report += '\n\n'
+        row_fmt = '{:>{width}s} ' + ' {:>9.{digits}f}' * 3 + ' {:>9}\n'
         for row in rows:
             report += row_fmt.format(*row, width=width, digits=digits)
-        report += u'\n'
+        report += '\n'
 
     # compute all applicable averages
     for average in average_options:
@@ -1773,9 +1773,9 @@ class 2       1.00      0.67      0.80         3
                 zip(headers, [i.item() for i in avg]))
         else:
             if line_heading == 'accuracy':
-                row_fmt_accuracy = u'{:>{width}s} ' + \
-                        u' {:>9.{digits}}' * 2 + u' {:>9.{digits}f}' + \
-                        u' {:>9}\n'
+                row_fmt_accuracy = '{:>{width}s} ' + \
+                        ' {:>9.{digits}}' * 2 + ' {:>9.{digits}f}' + \
+                        ' {:>9}\n'
                 report += row_fmt_accuracy.format(line_heading, '', '',
                                                   *avg[2:], width=width,
                                                   digits=digits)
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 1b1c2674c35ec..11404c51f0a79 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -1577,8 +1577,8 @@ def pairwise_kernels(X, Y=None, metric="linear", filter_params=False,
         func = metric.__call__
     elif metric in PAIRWISE_KERNEL_FUNCTIONS:
         if filter_params:
-            kwds = dict((k, kwds[k]) for k in kwds
-                        if k in KERNEL_PARAMS[metric])
+            kwds = {k: kwds[k] for k in kwds
+                    if k in KERNEL_PARAMS[metric]}
         func = PAIRWISE_KERNEL_FUNCTIONS[metric]
     elif callable(metric):
         func = partial(_pairwise_callable, metric=metric, **kwds)
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index f93736ed097a3..02cfc200ce635 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -43,7 +43,7 @@
 from ..base import is_regressor
 
 
-class _BaseScorer(object, metaclass=ABCMeta):
+class _BaseScorer(metaclass=ABCMeta):
     def __init__(self, score_func, sign, kwargs):
         self._kwargs = kwargs
         self._score_func = score_func
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index de72337d4024b..0c802ae65b874 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -998,11 +998,11 @@ def test_classification_report_multiclass_with_string_label():
 def test_classification_report_multiclass_with_unicode_label():
     y_true, y_pred, _ = make_prediction(binary=False)
 
-    labels = np.array([u"blue\xa2", u"green\xa2", u"red\xa2"])
+    labels = np.array(["blue\xa2", "green\xa2", "red\xa2"])
     y_true = labels[y_true]
     y_pred = labels[y_pred]
 
-    expected_report = u"""\
+    expected_report = """\
               precision    recall  f1-score   support
 
        blue\xa2       0.83      0.79      0.81        24
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index de304feb47847..f72fe6c0f0925 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -460,7 +460,7 @@ def test_symmetry():
 
     assert_equal(
         SYMMETRIC_METRICS.intersection(NOT_SYMMETRIC_METRICS),
-        set([]))
+        set())
 
     # Symmetric metric
     for name in SYMMETRIC_METRICS:
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 2ab4b6b72e3a7..1abee56535869 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -108,7 +108,7 @@ def teardown_module():
     shutil.rmtree(TEMP_FOLDER)
 
 
-class EstimatorWithoutFit(object):
+class EstimatorWithoutFit:
     """Dummy estimator to test scoring validators"""
     pass
 
@@ -119,7 +119,7 @@ def fit(self, X, y):
         return self
 
 
-class EstimatorWithFitAndScore(object):
+class EstimatorWithFitAndScore:
     """Dummy estimator to test scoring validators"""
     def fit(self, X, y):
         return self
@@ -128,7 +128,7 @@ def score(self, X, y):
         return 1.0
 
 
-class EstimatorWithFitAndPredict(object):
+class EstimatorWithFitAndPredict:
     """Dummy estimator to test scoring validators"""
     def fit(self, X, y):
         self.y = y
@@ -138,7 +138,7 @@ def predict(self, X):
         return self.y
 
 
-class DummyScorer(object):
+class DummyScorer:
     """Dummy scorer that always returns 1."""
     def __call__(self, est, X, y):
         return 1
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index 66d34f89d4be7..4d549ccd7b9d1 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -68,7 +68,7 @@ def generate_data(n_samples, n_features, weights, means, precisions,
     return X
 
 
-class RandomData(object):
+class RandomData:
     def __init__(self, rng, n_samples=500, n_components=2, n_features=2,
                  scale=50):
         self.n_samples = n_samples
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 8184897269260..1ac4563c62e8a 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -44,7 +44,7 @@
            'ParameterSampler', 'RandomizedSearchCV']
 
 
-class ParameterGrid(object):
+class ParameterGrid:
     """Grid of parameters with a discrete number of values for each.
 
     Can be used to iterate over parameter value combinations with the
@@ -180,7 +180,7 @@ def __getitem__(self, ind):
         raise IndexError('ParameterGrid index out of range')
 
 
-class ParameterSampler(object):
+class ParameterSampler:
     """Generator on parameters sampled from given distributions.
 
     Non-deterministic iterable over random candidate combinations for hyper-
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 171ede1ce5ded..3dc3ebebf123a 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -1500,8 +1500,7 @@ def _iter_indices(self, X, y, groups):
             raise ValueError("The 'groups' parameter should not be None.")
         groups = check_array(groups, ensure_2d=False, dtype=None)
         classes, group_indices = np.unique(groups, return_inverse=True)
-        for group_train, group_test in super(
-                GroupShuffleSplit, self)._iter_indices(X=classes):
+        for group_train, group_test in super()._iter_indices(X=classes):
             # these are the indices of classes in the partition
             # invert them into data indices
 
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index e5c1f539914f6..15989494fb581 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -489,8 +489,8 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
 
     # Adjust length of sample weights
     fit_params = fit_params if fit_params is not None else {}
-    fit_params = dict([(k, _index_param_value(X, v, train))
-                      for k, v in fit_params.items()])
+    fit_params = {k: _index_param_value(X, v, train)
+                  for k, v in fit_params.items()}
 
     train_scores = {}
     if parameters is not None:
@@ -827,8 +827,8 @@ def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
     """
     # Adjust length of sample weights
     fit_params = fit_params if fit_params is not None else {}
-    fit_params = dict([(k, _index_param_value(X, v, train))
-                      for k, v in fit_params.items()])
+    fit_params = {k: _index_param_value(X, v, train)
+                  for k, v in fit_params.items()}
 
     X_train, y_train = _safe_split(estimator, X, y, train)
     X_test, _ = _safe_split(estimator, X, y, test, train)
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 96a8341115134..021955730bd7c 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -71,7 +71,7 @@
 digits = load_digits()
 
 
-class MockClassifier(object):
+class MockClassifier:
     """Dummy classifier to test the cross-validation"""
 
     def __init__(self, a=0, allow_nd=False):
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 3cd6c9afb2d6e..51624cf00ccf7 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -115,8 +115,7 @@ def _is_training_data(self, X):
 class MockIncrementalImprovingEstimator(MockImprovingEstimator):
     """Dummy classifier that provides partial_fit"""
     def __init__(self, n_max_train_sizes):
-        super(MockIncrementalImprovingEstimator,
-              self).__init__(n_max_train_sizes)
+        super().__init__(n_max_train_sizes)
         self.x = None
 
     def _is_training_data(self, X):
@@ -161,7 +160,7 @@ def predict(self, X):
         raise NotImplementedError
 
 
-class MockClassifier(object):
+class MockClassifier:
     """Dummy classifier to test the cross-validation"""
 
     def __init__(self, a=0, allow_nd=False):
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index fdfe1bed0ca9f..f7076cd117921 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -742,7 +742,7 @@ def fit(self, X, y):
         else:
             self.code_book_[self.code_book_ != 1] = 0
 
-        classes_index = dict((c, i) for i, c in enumerate(self.classes_))
+        classes_index = {c: i for i, c in enumerate(self.classes_)}
 
         Y = np.array([self.code_book_[classes_index[y[i]]]
                       for i in range(X.shape[0])], dtype=np.int)
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index f86ee077f7205..dc187f0c31918 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -291,7 +291,7 @@ def _tree_query_parallel_helper(tree, data, n_neighbors, return_distance):
     return tree.query(data, n_neighbors, return_distance)
 
 
-class KNeighborsMixin(object):
+class KNeighborsMixin:
     """Mixin for k-neighbors searches"""
 
     def _kneighbors_reduce_func(self, dist, start,
@@ -580,7 +580,7 @@ def _tree_query_radius_parallel_helper(tree, data, radius, return_distance):
     return tree.query_radius(data, radius, return_distance)
 
 
-class RadiusNeighborsMixin(object):
+class RadiusNeighborsMixin:
     """Mixin for radius-based neighbors searches"""
 
     def _radius_neighbors_reduce_func(self, dist, start,
@@ -854,7 +854,7 @@ def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity'):
                           shape=(n_samples1, n_samples2))
 
 
-class SupervisedFloatMixin(object):
+class SupervisedFloatMixin:
     def fit(self, X, y):
         """Fit the model using X as training data and y as target values
 
@@ -874,7 +874,7 @@ def fit(self, X, y):
         return self._fit(X)
 
 
-class SupervisedIntegerMixin(object):
+class SupervisedIntegerMixin:
     def fit(self, X, y):
         """Fit the model using X as training data and y as target values
 
@@ -917,7 +917,7 @@ def fit(self, X, y):
         return self._fit(X)
 
 
-class UnsupervisedMixin(object):
+class UnsupervisedMixin:
     def fit(self, X, y=None):
         """Fit the model using X as training data
 
diff --git a/sklearn/neural_network/_stochastic_optimizers.py b/sklearn/neural_network/_stochastic_optimizers.py
index 73017900f90db..8f19c7b488acc 100644
--- a/sklearn/neural_network/_stochastic_optimizers.py
+++ b/sklearn/neural_network/_stochastic_optimizers.py
@@ -7,7 +7,7 @@
 import numpy as np
 
 
-class BaseOptimizer(object):
+class BaseOptimizer:
     """Base (Stochastic) gradient descent optimizer
 
     Parameters
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 0e592c253fe40..4eb1d6076c966 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -212,8 +212,8 @@ def _fit(self, X, y=None, **fit_params):
 
         fit_transform_one_cached = memory.cache(_fit_transform_one)
 
-        fit_params_steps = dict((name, {}) for name, step in self.steps
-                                if step is not None)
+        fit_params_steps = {name: {} for name, step in self.steps
+                            if step is not None}
         for pname, pval in fit_params.items():
             step, param = pname.split('__', 1)
             fit_params_steps[step][param] = pval
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 1a5ad20d32ef4..5122eec1e5233 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -150,8 +150,8 @@ def test_polynomial_feature_names():
     # test some unicode
     poly = PolynomialFeatures(degree=1, include_bias=True).fit(X)
     feature_names = poly.get_feature_names(
-        [u"\u0001F40D", u"\u262E", u"\u05D0"])
-    assert_array_equal([u"1", u"\u0001F40D", u"\u262E", u"\u05D0"],
+        ["\u0001F40D", "\u262E", "\u05D0"])
+    assert_array_equal(["1", "\u0001F40D", "\u262E", "\u05D0"],
                        feature_names)
 
 
diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
index 2b3a59445ef9e..6e1db0e458846 100644
--- a/sklearn/preprocessing/tests/test_encoders.py
+++ b/sklearn/preprocessing/tests/test_encoders.py
@@ -541,12 +541,12 @@ def test_one_hot_encoder_feature_names():
 
 def test_one_hot_encoder_feature_names_unicode():
     enc = OneHotEncoder()
-    X = np.array([[u'c❤t1', u'dat2']], dtype=object).T
+    X = np.array([['c❤t1', 'dat2']], dtype=object).T
     enc.fit(X)
     feature_names = enc.get_feature_names()
-    assert_array_equal([u'x0_c❤t1', u'x0_dat2'], feature_names)
-    feature_names = enc.get_feature_names(input_features=[u'n👍me'])
-    assert_array_equal([u'n👍me_c❤t1', u'n👍me_dat2'], feature_names)
+    assert_array_equal(['x0_c❤t1', 'x0_dat2'], feature_names)
+    feature_names = enc.get_feature_names(input_features=['n👍me'])
+    assert_array_equal(['n👍me_c❤t1', 'n👍me_dat2'], feature_names)
 
 
 @pytest.mark.parametrize("X", [np.array([[1, np.nan]]).T,
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 2a8bbba2632af..ec1c1356c3367 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -65,7 +65,7 @@ def __init__(self, a=None):
         self.a = 1
 
 
-class NoEstimator(object):
+class NoEstimator:
     def __init__(self):
         pass
 
@@ -381,7 +381,7 @@ def test_pickle_version_no_warning_is_issued_with_non_sklearn_estimator():
         TreeNoVersion.__module__ = module_backup
 
 
-class DontPickleAttributeMixin(object):
+class DontPickleAttributeMixin:
     def __getstate__(self):
         data = self.__dict__.copy()
         data["_attribute_not_pickled"] = None
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 267d3bb06aefc..642dc311f5e6e 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -194,10 +194,9 @@ def test_all_tests_are_importable():
                                       \.tests(\.|$)|
                                       \._
                                       ''')
-    lookup = dict((name, ispkg)
-                  for _, name, ispkg
-                  in pkgutil.walk_packages(sklearn.__path__,
-                                           prefix='sklearn.'))
+    lookup = {name: ispkg
+              for _, name, ispkg
+              in pkgutil.walk_packages(sklearn.__path__, prefix='sklearn.')}
     missing_tests = [name for name, ispkg in lookup.items()
                      if ispkg
                      and not HAS_TESTS_EXCEPTIONS.search(name)
diff --git a/sklearn/tests/test_impute.py b/sklearn/tests/test_impute.py
index 7131ac3ed0f5f..9521e877554c9 100644
--- a/sklearn/tests/test_impute.py
+++ b/sklearn/tests/test_impute.py
@@ -298,7 +298,7 @@ def test_imputation_most_frequent_pandas(dtype):
     # Test imputation using the most frequent strategy on pandas df
     pd = pytest.importorskip("pandas")
 
-    f = io.StringIO(u"Cat1,Cat2,Cat3,Cat4\n"
+    f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
                     ",i,x,\n"
                     "a,,y,\n"
                     "a,j,,\n"
@@ -411,7 +411,7 @@ def test_imputation_constant_pandas(dtype):
     # Test imputation using the constant strategy on pandas df
     pd = pytest.importorskip("pandas")
 
-    f = io.StringIO(u"Cat1,Cat2,Cat3,Cat4\n"
+    f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
                     ",i,x,\n"
                     "a,,y,\n"
                     "a,j,,\n"
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index 47de7ae374b74..c430912876f6e 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -16,7 +16,7 @@
 from sklearn.exceptions import NotFittedError
 
 
-class DelegatorData(object):
+class DelegatorData:
     def __init__(self, name, construct, skip_methods=(),
                  fit_args=make_classification()):
         self.name = name
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 55202ae63afe2..259876acd1a42 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -45,7 +45,7 @@
 )
 
 
-class NoFit(object):
+class NoFit:
     """Small class to test parameter dispatching.
     """
 
@@ -947,12 +947,12 @@ def test_pipeline_wrong_memory():
                         " Got memory='1' instead.", cached_pipe.fit, X, y)
 
 
-class DummyMemory(object):
+class DummyMemory:
     def cache(self, func):
         return func
 
 
-class WrongDummyMemory(object):
+class WrongDummyMemory:
     pass
 
 
diff --git a/sklearn/tree/_reingold_tilford.py b/sklearn/tree/_reingold_tilford.py
index d83969badb623..fae84bbb85f64 100644
--- a/sklearn/tree/_reingold_tilford.py
+++ b/sklearn/tree/_reingold_tilford.py
@@ -19,7 +19,7 @@
 import numpy as np
 
 
-class DrawTree(object):
+class DrawTree:
     def __init__(self, tree, parent=None, depth=0, number=1):
         self.x = -1.
         self.y = depth
@@ -193,7 +193,7 @@ def second_walk(v, m=0, depth=0, min=None):
     return min
 
 
-class Tree(object):
+class Tree:
     def __init__(self, label="", node_id=-1, *children):
         self.label = label
         self.node_id = node_id
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index d6388a5c5f4b9..91edde0230798 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -5,7 +5,7 @@
 __all__ = ["deprecated"]
 
 
-class deprecated(object):
+class deprecated:
     """Decorator to mark a function or class as deprecated.
 
     Issue a warning when the function is called/the class is instantiated and
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 02050071a0d0b..539344f5c83d0 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -386,7 +386,7 @@ def set_checking_parameters(estimator):
         estimator.set_params(k=1)
 
 
-class NotAnArray(object):
+class NotAnArray:
     """An object that is convertible to an array
 
     Parameters
@@ -1986,8 +1986,7 @@ def check_no_attributes_set_in_init(name, estimator):
             " from parameters during init. Found attributes %s."
             % (name, sorted(invalid_attr)))
     # Ensure that each parameter is set in init
-    invalid_attr = (set(init_params) - set(vars(estimator))
-                    - set(["self"]))
+    invalid_attr = set(init_params) - set(vars(estimator)) - {"self"}
     assert not invalid_attr, (
             "Estimator %s should store all parameters"
             " as an attribute during init. Did not find "
diff --git a/sklearn/utils/linear_assignment_.py b/sklearn/utils/linear_assignment_.py
index 7c3570a9744ab..846e8b6b869d9 100644
--- a/sklearn/utils/linear_assignment_.py
+++ b/sklearn/utils/linear_assignment_.py
@@ -60,7 +60,7 @@ def linear_assignment(X):
     return indices
 
 
-class _HungarianState(object):
+class _HungarianState:
     """State of one execution of the Hungarian algorithm.
 
     Parameters
diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py
index 459a33ca1b54b..d3aba4f49cea4 100644
--- a/sklearn/utils/metaestimators.py
+++ b/sklearn/utils/metaestimators.py
@@ -73,7 +73,7 @@ def _validate_names(self, names):
                              '{0!r}'.format(invalid_names))
 
 
-class _IffHasAttrDescriptor(object):
+class _IffHasAttrDescriptor:
     """Implements a conditional property using the descriptor protocol.
 
     Using this class to create a decorator will raise an ``AttributeError``
diff --git a/sklearn/utils/mocking.py b/sklearn/utils/mocking.py
index 762264c4d1b5c..53c7960786d23 100644
--- a/sklearn/utils/mocking.py
+++ b/sklearn/utils/mocking.py
@@ -4,7 +4,7 @@
 from .validation import _num_samples, check_array
 
 
-class ArraySlicingWrapper(object):
+class ArraySlicingWrapper:
     """
     Parameters
     ----------
@@ -17,7 +17,7 @@ def __getitem__(self, aslice):
         return MockDataFrame(self.array[aslice])
 
 
-class MockDataFrame(object):
+class MockDataFrame:
     """
     Parameters
     ----------
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 699026b9e47ee..66e0299f1c378 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -320,7 +320,7 @@ def ignore_warnings(obj=None, category=Warning):
         return _IgnoreWarnings(category=category)
 
 
-class _IgnoreWarnings(object):
+class _IgnoreWarnings:
     """Improved and simplified Python warnings context manager and decorator.
 
     This class allows the user to ignore the warnings raised by a function.
@@ -507,7 +507,7 @@ def fake_mldata(columns_dict, dataname, matfile, ordering=None):
 
 
 @deprecated('deprecated in version 0.20 to be removed in version 0.22')
-class mock_mldata_urlopen(object):
+class mock_mldata_urlopen:
     """Object that mocks the urlopen function to fake requests to mldata.
 
     When requesting a dataset with a name that is in mock_datasets, this object
@@ -821,7 +821,7 @@ def _delete_folder(folder_path, warn=False):
             warnings.warn("Could not delete temporary folder %s" % folder_path)
 
 
-class TempMemmap(object):
+class TempMemmap:
     """
     Parameters
     ----------
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 4b002d257dfdc..8492f7f09cc7d 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -112,8 +112,7 @@ def set_params(self, **kwargs):
             if p < 0:
                 p = 0
             self.p = p
-        return super(ModifiesValueInsteadOfRaisingError,
-                     self).set_params(**kwargs)
+        return super().set_params(**kwargs)
 
     def fit(self, X, y=None):
         X, y = check_X_y(X, y)
@@ -132,8 +131,7 @@ def set_params(self, **kwargs):
             if a is None:
                 kwargs.pop('b')
                 self.b = 'method2'
-        return super(ModifiesAnotherValue,
-                     self).set_params(**kwargs)
+        return super().set_params(**kwargs)
 
     def fit(self, X, y=None):
         X, y = check_X_y(X, y)
@@ -411,11 +409,11 @@ def test_check_estimators_unfitted():
 
 
 def test_check_no_attributes_set_in_init():
-    class NonConformantEstimatorPrivateSet(object):
+    class NonConformantEstimatorPrivateSet:
         def __init__(self):
             self.you_should_not_set_this_ = None
 
-    class NonConformantEstimatorNoParamSet(object):
+    class NonConformantEstimatorNoParamSet:
         def __init__(self, you_should_set_this_=None):
             pass
 
diff --git a/sklearn/utils/tests/test_metaestimators.py b/sklearn/utils/tests/test_metaestimators.py
index e2df28e496515..40cee4aedffa7 100644
--- a/sklearn/utils/tests/test_metaestimators.py
+++ b/sklearn/utils/tests/test_metaestimators.py
@@ -1,12 +1,12 @@
 from sklearn.utils.metaestimators import if_delegate_has_method
 
 
-class Prefix(object):
+class Prefix:
     def func(self):
         pass
 
 
-class MockMetaEstimator(object):
+class MockMetaEstimator:
     """This is a mock meta estimator"""
     a_prefix = Prefix()
 
@@ -25,7 +25,7 @@ def test_delegated_docstring():
            in str(MockMetaEstimator().func.__doc__)
 
 
-class MetaEst(object):
+class MetaEst:
     """A mock meta estimator"""
     def __init__(self, sub_est, better_sub_est=None):
         self.sub_est = sub_est
@@ -52,14 +52,14 @@ def predict(self):
         pass
 
 
-class HasPredict(object):
+class HasPredict:
     """A mock sub-estimator with predict method"""
 
     def predict(self):
         pass
 
 
-class HasNoPredict(object):
+class HasNoPredict:
     """A mock sub-estimator with no predict method"""
     pass
 
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 184f44f8db29e..a3c19ff53dd45 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -31,7 +31,7 @@
 from sklearn import datasets
 
 
-class NotAnArray(object):
+class NotAnArray:
     """An object that is convertable to an array. This is useful to
     simulate a Pandas timeseries."""
 
@@ -71,8 +71,8 @@ def __array__(self, dtype=None):
         NotAnArray(np.array([1, 0, 2])),
         [0, 1, 2],
         ['a', 'b', 'c'],
-        np.array([u'a', u'b', u'c']),
-        np.array([u'a', u'b', u'c'], dtype=object),
+        np.array(['a', 'b', 'c']),
+        np.array(['a', 'b', 'c'], dtype=object),
         np.array(['a', 'b', 'c'], dtype=object),
     ],
     'multiclass-multioutput': [
@@ -82,8 +82,8 @@ def __array__(self, dtype=None):
         np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float),
         np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float32),
         np.array([['a', 'b'], ['c', 'd']]),
-        np.array([[u'a', u'b'], [u'c', u'd']]),
-        np.array([[u'a', u'b'], [u'c', u'd']], dtype=object),
+        np.array([['a', 'b'], ['c', 'd']]),
+        np.array([['a', 'b'], ['c', 'd']], dtype=object),
         np.array([[1, 0, 2]]),
         NotAnArray(np.array([[1, 0, 2]])),
     ],
@@ -106,7 +106,7 @@ def __array__(self, dtype=None):
         ['a', 'b'],
         ['abc', 'def'],
         np.array(['abc', 'def']),
-        [u'a', u'b'],
+        ['a', 'b'],
         np.array(['abc', 'def'], dtype=object),
     ],
     'continuous': [
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 729b5ef81c684..389b11a90aa31 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -359,7 +359,7 @@ def f_check_param_definition(a, b, c, d, e):
     return a + b + c + d
 
 
-class Klass(object):
+class Klass:
     def f_missing(self, X, y):
         pass
 
@@ -381,7 +381,7 @@ def f_bad_sections(self, X, y):
         pass
 
 
-class MockEst(object):
+class MockEst:
     def __init__(self):
         """MockEstimator"""
     def fit(self, X, y):
@@ -397,7 +397,7 @@ def score(self, X):
         return 1.
 
 
-class MockMetaEstimator(object):
+class MockMetaEstimator:
     def __init__(self, delegate):
         """MetaEstimator to check if doctest on delegated methods work.
 
@@ -495,7 +495,7 @@ def test_check_docstring_parameters():
         assert mess in incorrect[0], '"%s" not in "%s"' % (mess, incorrect[0])
 
 
-class RegistrationCounter(object):
+class RegistrationCounter:
     def __init__(self):
         self.nb_calls = 0
 
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index f76d95b82ef89..88138452d6ab6 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -65,7 +65,7 @@ def ham():
         warnings.simplefilter("always")
 
         @deprecated("don't use this")
-        class Ham(object):
+        class Ham:
             SPAM = 1
 
         ham = Ham()
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 99019e25c0c81..e9d766ed44094 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -725,12 +725,12 @@ def test_check_dataframe_warns_on_dtype():
                        dtype='numeric', warn_on_dtype=True)
 
 
-class DummyMemory(object):
+class DummyMemory:
     def cache(self, func):
         return func
 
 
-class WrongDummyMemory(object):
+class WrongDummyMemory:
     pass