diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index a3b35f40a88d7..fb65d800e78ba 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -706,17 +706,17 @@ def test_overrided_gram_matrix():
                          clf.fit, X, y)
 
 
-def test_lasso_non_float_y():
+@pytest.mark.parametrize('model', [ElasticNet, Lasso])
+def test_lasso_non_float_y(model):
     X = [[0, 0], [1, 1], [-1, -1]]
     y = [0, 1, 2]
     y_float = [0.0, 1.0, 2.0]
 
-    for model in [ElasticNet, Lasso]:
-        clf = model(fit_intercept=False)
-        clf.fit(X, y)
-        clf_float = model(fit_intercept=False)
-        clf_float.fit(X, y_float)
-        assert_array_equal(clf.coef_, clf_float.coef_)
+    clf = model(fit_intercept=False)
+    clf.fit(X, y)
+    clf_float = model(fit_intercept=False)
+    clf_float.fit(X, y_float)
+    assert_array_equal(clf.coef_, clf_float.coef_)
 
 
 def test_enet_float_precision():
diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py
index e41df9cce1178..630559fe4fef2 100644
--- a/sklearn/linear_model/tests/test_least_angle.py
+++ b/sklearn/linear_model/tests/test_least_angle.py
@@ -3,6 +3,8 @@
 import numpy as np
 from scipy import linalg
 
+import pytest
+
 from sklearn.model_selection import train_test_split
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_almost_equal
@@ -172,18 +174,20 @@ def test_no_path_all_precomputed():
     assert_true(alpha_ == alphas_[-1])
 
 
-def test_lars_precompute():
+@pytest.mark.parametrize(
+        'classifier',
+        [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC])
+def test_lars_precompute(classifier):
     # Check for different values of precompute
     X, y = diabetes.data, diabetes.target
     G = np.dot(X.T, X)
-    for classifier in [linear_model.Lars, linear_model.LarsCV,
-                       linear_model.LassoLarsIC]:
-        clf = classifier(precompute=G)
-        output_1 = ignore_warnings(clf.fit)(X, y).coef_
-        for precompute in [True, False, 'auto', None]:
-            clf = classifier(precompute=precompute)
-            output_2 = clf.fit(X, y).coef_
-            assert_array_almost_equal(output_1, output_2, decimal=8)
+
+    clf = classifier(precompute=G)
+    output_1 = ignore_warnings(clf.fit)(X, y).coef_
+    for precompute in [True, False, 'auto', None]:
+        clf = classifier(precompute=precompute)
+        output_2 = clf.fit(X, y).coef_
+        assert_array_almost_equal(output_1, output_2, decimal=8)
 
 
 def test_singular_matrix():
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index a179c89e199a3..e363fed2abb9d 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -1,6 +1,9 @@
 import numpy as np
 import scipy.sparse as sp
 from scipy import linalg, optimize, sparse
+
+import pytest
+
 from sklearn.datasets import load_iris, make_classification
 from sklearn.metrics import log_loss
 from sklearn.model_selection import StratifiedKFold
@@ -139,63 +142,63 @@ def test_predict_iris():
         assert_greater(np.mean(pred == target), .95)
 
 
-def test_multinomial_validation():
-    for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
-        lr = LogisticRegression(C=-1, solver=solver, multi_class='multinomial')
-        assert_raises(ValueError, lr.fit, [[0, 1], [1, 0]], [0, 1])
+@pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga'])
+def test_multinomial_validation(solver):
+    lr = LogisticRegression(C=-1, solver=solver, multi_class='multinomial')
+    assert_raises(ValueError, lr.fit, [[0, 1], [1, 0]], [0, 1])
 
 
-def test_check_solver_option():
+@pytest.mark.parametrize('LR', [LogisticRegression, LogisticRegressionCV])
+def test_check_solver_option(LR):
     X, y = iris.data, iris.target
-    for LR in [LogisticRegression, LogisticRegressionCV]:
 
-        msg = ('Logistic Regression supports only liblinear, newton-cg, '
-               'lbfgs, sag and saga solvers, got wrong_name')
-        lr = LR(solver="wrong_name")
+    msg = ('Logistic Regression supports only liblinear, newton-cg, '
+           'lbfgs, sag and saga solvers, got wrong_name')
+    lr = LR(solver="wrong_name")
+    assert_raise_message(ValueError, msg, lr.fit, X, y)
+
+    msg = "multi_class should be either multinomial or ovr, got wrong_name"
+    lr = LR(solver='newton-cg', multi_class="wrong_name")
+    assert_raise_message(ValueError, msg, lr.fit, X, y)
+
+    # only 'liblinear' solver
+    msg = "Solver liblinear does not support a multinomial backend."
+    lr = LR(solver='liblinear', multi_class='multinomial')
+    assert_raise_message(ValueError, msg, lr.fit, X, y)
+
+    # all solvers except 'liblinear'
+    for solver in ['newton-cg', 'lbfgs', 'sag']:
+        msg = ("Solver %s supports only l2 penalties, got l1 penalty." %
+               solver)
+        lr = LR(solver=solver, penalty='l1')
         assert_raise_message(ValueError, msg, lr.fit, X, y)
-
-        msg = "multi_class should be either multinomial or ovr, got wrong_name"
-        lr = LR(solver='newton-cg', multi_class="wrong_name")
+    for solver in ['newton-cg', 'lbfgs', 'sag', 'saga']:
+        msg = ("Solver %s supports only dual=False, got dual=True" %
+               solver)
+        lr = LR(solver=solver, dual=True)
         assert_raise_message(ValueError, msg, lr.fit, X, y)
 
-        # only 'liblinear' solver
-        msg = "Solver liblinear does not support a multinomial backend."
-        lr = LR(solver='liblinear', multi_class='multinomial')
-        assert_raise_message(ValueError, msg, lr.fit, X, y)
 
-        # all solvers except 'liblinear'
-        for solver in ['newton-cg', 'lbfgs', 'sag']:
-            msg = ("Solver %s supports only l2 penalties, got l1 penalty." %
-                   solver)
-            lr = LR(solver=solver, penalty='l1')
-            assert_raise_message(ValueError, msg, lr.fit, X, y)
-        for solver in ['newton-cg', 'lbfgs', 'sag', 'saga']:
-            msg = ("Solver %s supports only dual=False, got dual=True" %
-                   solver)
-            lr = LR(solver=solver, dual=True)
-            assert_raise_message(ValueError, msg, lr.fit, X, y)
-
-
-def test_multinomial_binary():
+@pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga'])
+def test_multinomial_binary(solver):
     # Test multinomial LR on a binary problem.
     target = (iris.target > 0).astype(np.intp)
     target = np.array(["setosa", "not-setosa"])[target]
 
-    for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
-        clf = LogisticRegression(solver=solver, multi_class='multinomial',
-                                 random_state=42, max_iter=2000)
-        clf.fit(iris.data, target)
+    clf = LogisticRegression(solver=solver, multi_class='multinomial',
+                             random_state=42, max_iter=2000)
+    clf.fit(iris.data, target)
 
-        assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
-        assert_equal(clf.intercept_.shape, (1,))
-        assert_array_equal(clf.predict(iris.data), target)
+    assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
+    assert_equal(clf.intercept_.shape, (1,))
+    assert_array_equal(clf.predict(iris.data), target)
 
-        mlr = LogisticRegression(solver=solver, multi_class='multinomial',
-                                 random_state=42, fit_intercept=False)
-        mlr.fit(iris.data, target)
-        pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
-                                      axis=1)]
-        assert_greater(np.mean(pred == target), .9)
+    mlr = LogisticRegression(solver=solver, multi_class='multinomial',
+                             random_state=42, fit_intercept=False)
+    mlr.fit(iris.data, target)
+    pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
+                                  axis=1)]
+    assert_greater(np.mean(pred == target), .9)
 
 
 def test_multinomial_binary_probabilities():
@@ -1043,7 +1046,9 @@ def test_max_iter():
                 assert_equal(lr.n_iter_[0], max_iter)
 
 
-def test_n_iter():
+@pytest.mark.parametrize('solver',
+                         ['newton-cg', 'liblinear', 'sag', 'saga', 'lbfgs'])
+def test_n_iter(solver):
     # Test that self.n_iter_ has the correct format.
     X, y = iris.data, iris.target
     y_bin = y.copy()
@@ -1052,76 +1057,73 @@ def test_n_iter():
     n_Cs = 4
     n_cv_fold = 2
 
-    for solver in ['newton-cg', 'liblinear', 'sag', 'saga', 'lbfgs']:
-        # OvR case
-        n_classes = 1 if solver == 'liblinear' else np.unique(y).shape[0]
-        clf = LogisticRegression(tol=1e-2, multi_class='ovr',
-                                 solver=solver, C=1.,
-                                 random_state=42, max_iter=100)
-        clf.fit(X, y)
-        assert_equal(clf.n_iter_.shape, (n_classes,))
+    # OvR case
+    n_classes = 1 if solver == 'liblinear' else np.unique(y).shape[0]
+    clf = LogisticRegression(tol=1e-2, multi_class='ovr',
+                             solver=solver, C=1.,
+                             random_state=42, max_iter=100)
+    clf.fit(X, y)
+    assert_equal(clf.n_iter_.shape, (n_classes,))
 
-        n_classes = np.unique(y).shape[0]
-        clf = LogisticRegressionCV(tol=1e-2, multi_class='ovr',
-                                   solver=solver, Cs=n_Cs, cv=n_cv_fold,
-                                   random_state=42, max_iter=100)
-        clf.fit(X, y)
-        assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
-        clf.fit(X, y_bin)
-        assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))
-
-        # multinomial case
-        n_classes = 1
-        if solver in ('liblinear', 'sag', 'saga'):
-            break
-
-        clf = LogisticRegression(tol=1e-2, multi_class='multinomial',
-                                 solver=solver, C=1.,
-                                 random_state=42, max_iter=100)
-        clf.fit(X, y)
-        assert_equal(clf.n_iter_.shape, (n_classes,))
+    n_classes = np.unique(y).shape[0]
+    clf = LogisticRegressionCV(tol=1e-2, multi_class='ovr',
+                               solver=solver, Cs=n_Cs, cv=n_cv_fold,
+                               random_state=42, max_iter=100)
+    clf.fit(X, y)
+    assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
+    clf.fit(X, y_bin)
+    assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))
+
+    # multinomial case
+    n_classes = 1
+    if solver in ('liblinear', 'sag', 'saga'):
+        return
+
+    clf = LogisticRegression(tol=1e-2, multi_class='multinomial',
+                             solver=solver, C=1.,
+                             random_state=42, max_iter=100)
+    clf.fit(X, y)
+    assert_equal(clf.n_iter_.shape, (n_classes,))
 
-        clf = LogisticRegressionCV(tol=1e-2, multi_class='multinomial',
-                                   solver=solver, Cs=n_Cs, cv=n_cv_fold,
-                                   random_state=42, max_iter=100)
-        clf.fit(X, y)
-        assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
-        clf.fit(X, y_bin)
-        assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))
+    clf = LogisticRegressionCV(tol=1e-2, multi_class='multinomial',
+                               solver=solver, Cs=n_Cs, cv=n_cv_fold,
+                               random_state=42, max_iter=100)
+    clf.fit(X, y)
+    assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
+    clf.fit(X, y_bin)
+    assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))
 
 
-def test_warm_start():
+@pytest.mark.parametrize('solver', ('newton-cg', 'sag', 'saga', 'lbfgs'))
+@pytest.mark.parametrize('warm_start', (True, False))
+@pytest.mark.parametrize('fit_intercept', (True, False))
+@pytest.mark.parametrize('multi_class', ['ovr', 'multinomial'])
+def test_warm_start(solver, warm_start, fit_intercept, multi_class):
     # A 1-iteration second fit on same data should give almost same result
     # with warm starting, and quite different result without warm starting.
     # Warm starting does not work with liblinear solver.
     X, y = iris.data, iris.target
 
-    solvers = ['newton-cg', 'sag', 'saga', 'lbfgs']
-
-    for warm_start in [True, False]:
-        for fit_intercept in [True, False]:
-            for solver in solvers:
-                for multi_class in ['ovr', 'multinomial']:
-                    clf = LogisticRegression(tol=1e-4, multi_class=multi_class,
-                                             warm_start=warm_start,
-                                             solver=solver,
-                                             random_state=42, max_iter=100,
-                                             fit_intercept=fit_intercept)
-                    with ignore_warnings(category=ConvergenceWarning):
-                        clf.fit(X, y)
-                        coef_1 = clf.coef_
-
-                        clf.max_iter = 1
-                        clf.fit(X, y)
-                    cum_diff = np.sum(np.abs(coef_1 - clf.coef_))
-                    msg = ("Warm starting issue with %s solver in %s mode "
-                           "with fit_intercept=%s and warm_start=%s"
-                           % (solver, multi_class, str(fit_intercept),
-                              str(warm_start)))
-                    if warm_start:
-                        assert_greater(2.0, cum_diff, msg)
-                    else:
-                        assert_greater(cum_diff, 2.0, msg)
+    clf = LogisticRegression(tol=1e-4, multi_class=multi_class,
+                             warm_start=warm_start,
+                             solver=solver,
+                             random_state=42, max_iter=100,
+                             fit_intercept=fit_intercept)
+    with ignore_warnings(category=ConvergenceWarning):
+        clf.fit(X, y)
+        coef_1 = clf.coef_
+
+        clf.max_iter = 1
+        clf.fit(X, y)
+    cum_diff = np.sum(np.abs(coef_1 - clf.coef_))
+    msg = ("Warm starting issue with %s solver in %s mode "
+           "with fit_intercept=%s and warm_start=%s"
+           % (solver, multi_class, str(fit_intercept),
+              str(warm_start)))
+    if warm_start:
+        assert_greater(2.0, cum_diff, msg)
+    else:
+        assert_greater(cum_diff, 2.0, msg)
 
 
 def test_saga_vs_liblinear():
diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py
index 5620c29e18374..ee519b7390c5b 100644
--- a/sklearn/linear_model/tests/test_passive_aggressive.py
+++ b/sklearn/linear_model/tests/test_passive_aggressive.py
@@ -2,6 +2,8 @@
 import numpy as np
 import scipy.sparse as sp
 
+import pytest
+
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_array_almost_equal, assert_array_equal
@@ -111,23 +113,22 @@ def test_classifier_refit():
     assert_array_equal(clf.classes_, iris.target_names)
 
 
-def test_classifier_correctness():
+@pytest.mark.parametrize('loss', ("hinge", "squared_hinge"))
+def test_classifier_correctness(loss):
     y_bin = y.copy()
     y_bin[y != 1] = -1
 
-    for loss in ("hinge", "squared_hinge"):
-
-        clf1 = MyPassiveAggressive(
-            C=1.0, loss=loss, fit_intercept=True, n_iter=2)
-        clf1.fit(X, y_bin)
+    clf1 = MyPassiveAggressive(
+        C=1.0, loss=loss, fit_intercept=True, n_iter=2)
+    clf1.fit(X, y_bin)
 
-        for data in (X, X_csr):
-            clf2 = PassiveAggressiveClassifier(
-                C=1.0, loss=loss, fit_intercept=True, max_iter=2,
-                shuffle=False, tol=None)
-            clf2.fit(data, y_bin)
+    for data in (X, X_csr):
+        clf2 = PassiveAggressiveClassifier(
+            C=1.0, loss=loss, fit_intercept=True, max_iter=2,
+            shuffle=False, tol=None)
+        clf2.fit(data, y_bin)
 
-            assert_array_almost_equal(clf1.w, clf2.coef_.ravel(), decimal=2)
+        assert_array_almost_equal(clf1.w, clf2.coef_.ravel(), decimal=2)
 
 
 def test_classifier_undefined_methods():
@@ -248,22 +249,24 @@ def test_regressor_partial_fit():
                 assert_true(hasattr(reg, 'standard_coef_'))
 
 
-def test_regressor_correctness():
+@pytest.mark.parametrize(
+        'loss',
+        ("epsilon_insensitive", "squared_epsilon_insensitive"))
+def test_regressor_correctness(loss):
     y_bin = y.copy()
     y_bin[y != 1] = -1
 
-    for loss in ("epsilon_insensitive", "squared_epsilon_insensitive"):
-        reg1 = MyPassiveAggressive(
-            C=1.0, loss=loss, fit_intercept=True, n_iter=2)
-        reg1.fit(X, y_bin)
+    reg1 = MyPassiveAggressive(
+        C=1.0, loss=loss, fit_intercept=True, n_iter=2)
+    reg1.fit(X, y_bin)
 
-        for data in (X, X_csr):
-            reg2 = PassiveAggressiveRegressor(
-                C=1.0, tol=None, loss=loss, fit_intercept=True, max_iter=2,
-                shuffle=False)
-            reg2.fit(data, y_bin)
+    for data in (X, X_csr):
+        reg2 = PassiveAggressiveRegressor(
+            C=1.0, tol=None, loss=loss, fit_intercept=True, max_iter=2,
+            shuffle=False)
+        reg2.fit(data, y_bin)
 
-            assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
+        assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
 
 
 def test_regressor_undefined_methods():
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index a2f2a135b3ae4..2f574b88ba7b5 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -3,6 +3,8 @@
 from scipy import linalg
 from itertools import product
 
+import pytest
+
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
@@ -57,41 +59,42 @@
 SPARSE_FILTER = lambda X: sp.csr_matrix(X)
 
 
-def test_ridge():
+@pytest.mark.parametrize('solver',
+                         ("svd", "sparse_cg", "cholesky", "lsqr", "sag"))
+def test_ridge(solver):
     # Ridge regression convergence test using score
     # TODO: for this test to be robust, we should use a dataset instead
     # of np.random.
     rng = np.random.RandomState(0)
     alpha = 1.0
 
-    for solver in ("svd", "sparse_cg", "cholesky", "lsqr", "sag"):
-        # With more samples than features
-        n_samples, n_features = 6, 5
-        y = rng.randn(n_samples)
-        X = rng.randn(n_samples, n_features)
+    # With more samples than features
+    n_samples, n_features = 6, 5
+    y = rng.randn(n_samples)
+    X = rng.randn(n_samples, n_features)
 
-        ridge = Ridge(alpha=alpha, solver=solver)
-        ridge.fit(X, y)
-        assert_equal(ridge.coef_.shape, (X.shape[1], ))
-        assert_greater(ridge.score(X, y), 0.47)
+    ridge = Ridge(alpha=alpha, solver=solver)
+    ridge.fit(X, y)
+    assert_equal(ridge.coef_.shape, (X.shape[1], ))
+    assert_greater(ridge.score(X, y), 0.47)
 
-        if solver in ("cholesky", "sag"):
-            # Currently the only solvers to support sample_weight.
-            ridge.fit(X, y, sample_weight=np.ones(n_samples))
-            assert_greater(ridge.score(X, y), 0.47)
+    if solver in ("cholesky", "sag"):
+        # Currently the only solvers to support sample_weight.
+        ridge.fit(X, y, sample_weight=np.ones(n_samples))
+        assert_greater(ridge.score(X, y), 0.47)
 
-        # With more features than samples
-        n_samples, n_features = 5, 10
-        y = rng.randn(n_samples)
-        X = rng.randn(n_samples, n_features)
-        ridge = Ridge(alpha=alpha, solver=solver)
-        ridge.fit(X, y)
-        assert_greater(ridge.score(X, y), .9)
+    # With more features than samples
+    n_samples, n_features = 5, 10
+    y = rng.randn(n_samples)
+    X = rng.randn(n_samples, n_features)
+    ridge = Ridge(alpha=alpha, solver=solver)
+    ridge.fit(X, y)
+    assert_greater(ridge.score(X, y), .9)
 
-        if solver in ("cholesky", "sag"):
-            # Currently the only solvers to support sample_weight.
-            ridge.fit(X, y, sample_weight=np.ones(n_samples))
-            assert_greater(ridge.score(X, y), 0.9)
+    if solver in ("cholesky", "sag"):
+        # Currently the only solvers to support sample_weight.
+        ridge.fit(X, y, sample_weight=np.ones(n_samples))
+        assert_greater(ridge.score(X, y), 0.9)
 
 
 def test_primal_dual_relationship():
@@ -153,6 +156,8 @@ def test_ridge_regression_convergence_fail():
 
 def test_ridge_sample_weights():
     # TODO: loop over sparse data as well
+    # Note: parametrizing this test with pytest results in failed
+    #       assertions, meaning that is is not extremely robust
 
     rng = np.random.RandomState(0)
     param_grid = product((1.0, 1e-2), (True, False),
@@ -483,15 +488,13 @@ def check_dense_sparse(test_func):
         assert_array_almost_equal(ret_dense, ret_sparse, decimal=3)
 
 
-def test_dense_sparse():
-    for test_func in (_test_ridge_loo,
-                      _test_ridge_cv,
-                      _test_ridge_cv_normalize,
-                      _test_ridge_diabetes,
-                      _test_multi_ridge_diabetes,
-                      _test_ridge_classifiers,
-                      _test_tolerance):
-        yield check_dense_sparse, test_func
+@pytest.mark.parametrize(
+        'test_func',
+        (_test_ridge_loo, _test_ridge_cv, _test_ridge_cv_normalize,
+         _test_ridge_diabetes, _test_multi_ridge_diabetes,
+         _test_ridge_classifiers, _test_tolerance))
+def test_dense_sparse(test_func):
+    check_dense_sparse(test_func)
 
 
 def test_ridge_cv_sparse_svd():
@@ -543,33 +546,33 @@ def test_class_weights():
     assert_array_almost_equal(reg.intercept_, rega.intercept_)
 
 
-def test_class_weight_vs_sample_weight():
+@pytest.mark.parametrize('reg', (RidgeClassifier, RidgeClassifierCV))
+def test_class_weight_vs_sample_weight(reg):
     """Check class_weights resemble sample_weights behavior."""
-    for reg in (RidgeClassifier, RidgeClassifierCV):
-
-        # Iris is balanced, so no effect expected for using 'balanced' weights
-        reg1 = reg()
-        reg1.fit(iris.data, iris.target)
-        reg2 = reg(class_weight='balanced')
-        reg2.fit(iris.data, iris.target)
-        assert_almost_equal(reg1.coef_, reg2.coef_)
-
-        # Inflate importance of class 1, check against user-defined weights
-        sample_weight = np.ones(iris.target.shape)
-        sample_weight[iris.target == 1] *= 100
-        class_weight = {0: 1., 1: 100., 2: 1.}
-        reg1 = reg()
-        reg1.fit(iris.data, iris.target, sample_weight)
-        reg2 = reg(class_weight=class_weight)
-        reg2.fit(iris.data, iris.target)
-        assert_almost_equal(reg1.coef_, reg2.coef_)
-
-        # Check that sample_weight and class_weight are multiplicative
-        reg1 = reg()
-        reg1.fit(iris.data, iris.target, sample_weight ** 2)
-        reg2 = reg(class_weight=class_weight)
-        reg2.fit(iris.data, iris.target, sample_weight)
-        assert_almost_equal(reg1.coef_, reg2.coef_)
+
+    # Iris is balanced, so no effect expected for using 'balanced' weights
+    reg1 = reg()
+    reg1.fit(iris.data, iris.target)
+    reg2 = reg(class_weight='balanced')
+    reg2.fit(iris.data, iris.target)
+    assert_almost_equal(reg1.coef_, reg2.coef_)
+
+    # Inflate importance of class 1, check against user-defined weights
+    sample_weight = np.ones(iris.target.shape)
+    sample_weight[iris.target == 1] *= 100
+    class_weight = {0: 1., 1: 100., 2: 1.}
+    reg1 = reg()
+    reg1.fit(iris.data, iris.target, sample_weight)
+    reg2 = reg(class_weight=class_weight)
+    reg2.fit(iris.data, iris.target)
+    assert_almost_equal(reg1.coef_, reg2.coef_)
+
+    # Check that sample_weight and class_weight are multiplicative
+    reg1 = reg()
+    reg1.fit(iris.data, iris.target, sample_weight ** 2)
+    reg2 = reg(class_weight=class_weight)
+    reg2.fit(iris.data, iris.target, sample_weight)
+    assert_almost_equal(reg1.coef_, reg2.coef_)
 
 
 def test_class_weights_cv():
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index 9f372f706ca71..18bc073139650 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -1174,16 +1174,16 @@ def test_numerical_stability_large_gradient():
     assert_true(np.isfinite(model.coef_).all())
 
 
-def test_large_regularization():
+@pytest.mark.parametrize('penalty', ['l2', 'l1', 'elasticnet'])
+def test_large_regularization(penalty):
     # Non regression tests for numerical stability issues caused by large
     # regularization parameters
-    for penalty in ['l2', 'l1', 'elasticnet']:
-        model = SGDClassifier(alpha=1e5, learning_rate='constant', eta0=0.1,
-                              penalty=penalty, shuffle=False,
-                              tol=None, max_iter=6)
-        with np.errstate(all='raise'):
-            model.fit(iris.data, iris.target)
-        assert_array_almost_equal(model.coef_, np.zeros_like(model.coef_))
+    model = SGDClassifier(alpha=1e5, learning_rate='constant', eta0=0.1,
+                          penalty=penalty, shuffle=False,
+                          tol=None, max_iter=6)
+    with np.errstate(all='raise'):
+        model.fit(iris.data, iris.target)
+    assert_array_almost_equal(model.coef_, np.zeros_like(model.coef_))
 
 
 def test_tol_parameter():
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 6b1d87bb18bf5..cc692ae0d0cd0 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -3,6 +3,8 @@
 import numpy as np
 import scipy.sparse as sp
 
+import pytest
+
 from sklearn.neighbors import BallTree
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils.testing import assert_less_equal
@@ -596,35 +598,35 @@ def test_no_sparse_on_barnes_hut():
                          tsne.fit_transform, X_csr)
 
 
-def test_64bit():
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+@pytest.mark.parametrize('dt', [np.float32, np.float64])
+def test_64bit(method, dt):
     # Ensure 64bit arrays are handled correctly.
     random_state = check_random_state(0)
-    methods = ['barnes_hut', 'exact']
-    for method in methods:
-        for dt in [np.float32, np.float64]:
-            X = random_state.randn(50, 2).astype(dt)
-            tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
-                        random_state=0, method=method, verbose=0)
-            X_embedded = tsne.fit_transform(X)
-            effective_type = X_embedded.dtype
 
-            # tsne cython code is only single precision, so the output will
-            # always be single precision, irrespectively of the input dtype
-            assert effective_type == np.float32
+    X = random_state.randn(50, 2).astype(dt)
+    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
+                random_state=0, method=method, verbose=0)
+    X_embedded = tsne.fit_transform(X)
+    effective_type = X_embedded.dtype
 
+    # tsne cython code is only single precision, so the output will
+    # always be single precision, irrespectively of the input dtype
+    assert effective_type == np.float32
 
-def test_kl_divergence_not_nan():
+
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+def test_kl_divergence_not_nan(method):
     # Ensure kl_divergence_ is computed at last iteration
     # even though n_iter % n_iter_check != 0, i.e. 1003 % 50 != 0
     random_state = check_random_state(0)
-    methods = ['barnes_hut', 'exact']
-    for method in methods:
-        X = random_state.randn(50, 2)
-        tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
-                    random_state=0, method=method, verbose=0, n_iter=1003)
-        tsne.fit_transform(X)
 
-        assert not np.isnan(tsne.kl_divergence_)
+    X = random_state.randn(50, 2)
+    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
+                random_state=0, method=method, verbose=0, n_iter=1003)
+    tsne.fit_transform(X)
+
+    assert not np.isnan(tsne.kl_divergence_)
 
 
 def test_barnes_hut_angle():
@@ -807,9 +809,9 @@ def assert_uniform_grid(Y, try_name=None):
     assert_less(largest_to_mean, 2, msg=try_name)
 
 
-def test_uniform_grid():
-    for method in ['barnes_hut', 'exact']:
-        yield check_uniform_grid, method
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+def test_uniform_grid(method):
+    check_uniform_grid(method)
 
 
 def test_bh_match_exact():
diff --git a/sklearn/metrics/cluster/tests/test_common.py b/sklearn/metrics/cluster/tests/test_common.py
index 71534380fe6ec..a7e54d22cc7c8 100644
--- a/sklearn/metrics/cluster/tests/test_common.py
+++ b/sklearn/metrics/cluster/tests/test_common.py
@@ -101,10 +101,7 @@ def test_non_symmetry(metric_name, y1, y2):
     assert metric(y1, y2) != pytest.approx(metric(y2, y1))
 
 
-@pytest.mark.parametrize(
-    "metric_name",
-    [name for name in NORMALIZED_METRICS]
-)
+@pytest.mark.parametrize("metric_name", NORMALIZED_METRICS)
 def test_normalized_output(metric_name):
     upper_bound_1 = [0, 0, 0, 1, 1, 1]
     upper_bound_2 = [0, 0, 0, 1, 1, 1]
@@ -126,7 +123,7 @@ def test_normalized_output(metric_name):
 # that is when 0 and 1 exchanged.
 @pytest.mark.parametrize(
     "metric_name",
-    [name for name in dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)]
+    dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
 )
 def test_permute_labels(metric_name):
     y_label = np.array([0, 0, 0, 1, 1, 0, 1])
@@ -147,7 +144,7 @@ def test_permute_labels(metric_name):
 # For all clustering metrics Input parameters can be both
 @pytest.mark.parametrize(
     "metric_name",
-    [name for name in dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)]
+    dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
 )
 # in the form of arrays lists, positive, negetive or string
 def test_format_invariance(metric_name):
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 1705510cc1ea4..923f60994dac1 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -6,6 +6,8 @@
 from itertools import product
 import warnings
 
+import pytest
+
 from sklearn import datasets
 from sklearn import svm
 
@@ -520,7 +522,8 @@ def test_matthews_corrcoef_multiclass():
     assert_almost_equal(mcc, 0.)
 
 
-def test_matthews_corrcoef_overflow():
+@pytest.mark.parametrize('n_points', [100, 10000, 1000000])
+def test_matthews_corrcoef_overflow(n_points):
     # https://github.com/scikit-learn/scikit-learn/issues/9622
     rng = np.random.RandomState(20170906)
 
@@ -543,16 +546,15 @@ def random_ys(n_points):    # binary
         y_pred = (x_pred > 0.5)
         return y_true, y_pred
 
-    for n_points in [100, 10000, 1000000]:
-        arr = np.repeat([0., 1.], n_points)  # binary
-        assert_almost_equal(matthews_corrcoef(arr, arr), 1.0)
-        arr = np.repeat([0., 1., 2.], n_points)  # multiclass
-        assert_almost_equal(matthews_corrcoef(arr, arr), 1.0)
+    arr = np.repeat([0., 1.], n_points)  # binary
+    assert_almost_equal(matthews_corrcoef(arr, arr), 1.0)
+    arr = np.repeat([0., 1., 2.], n_points)  # multiclass
+    assert_almost_equal(matthews_corrcoef(arr, arr), 1.0)
 
-        y_true, y_pred = random_ys(n_points)
-        assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)
-        assert_almost_equal(matthews_corrcoef(y_true, y_pred),
-                            mcc_safe(y_true, y_pred))
+    y_true, y_pred = random_ys(n_points)
+    assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)
+    assert_almost_equal(matthews_corrcoef(y_true, y_pred),
+                        mcc_safe(y_true, y_pred))
 
 
 def test_precision_recall_f1_score_multiclass():
@@ -610,18 +612,19 @@ def test_precision_recall_f1_score_multiclass():
     assert_array_equal(s, [24, 20, 31])
 
 
-def test_precision_refcall_f1_score_multilabel_unordered_labels():
+@pytest.mark.parametrize('average',
+                         ['samples', 'micro', 'macro', 'weighted', None])
+def test_precision_refcall_f1_score_multilabel_unordered_labels(average):
     # test that labels need not be sorted in the multilabel case
     y_true = np.array([[1, 1, 0, 0]])
     y_pred = np.array([[0, 0, 1, 1]])
-    for average in ['samples', 'micro', 'macro', 'weighted', None]:
-        p, r, f, s = precision_recall_fscore_support(
-            y_true, y_pred, labels=[3, 0, 1, 2], warn_for=[], average=average)
-        assert_array_equal(p, 0)
-        assert_array_equal(r, 0)
-        assert_array_equal(f, 0)
-        if average is None:
-            assert_array_equal(s, [0, 1, 1, 0])
+    p, r, f, s = precision_recall_fscore_support(
+        y_true, y_pred, labels=[3, 0, 1, 2], warn_for=[], average=average)
+    assert_array_equal(p, 0)
+    assert_array_equal(r, 0)
+    assert_array_equal(f, 0)
+    if average is None:
+        assert_array_equal(s, [0, 1, 1, 0])
 
 
 def test_precision_recall_f1_score_binary_averaged():
@@ -1207,10 +1210,33 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
                         0.333, 2)
 
 
-def test_precision_recall_f1_no_labels():
+@pytest.mark.parametrize('beta', [1])
+@pytest.mark.parametrize('average', ["macro", "micro", "weighted", "samples"])
+def test_precision_recall_f1_no_labels(beta, average):
+    y_true = np.zeros((20, 3))
+    y_pred = np.zeros_like(y_true)
+
+    p, r, f, s = assert_warns(UndefinedMetricWarning,
+                              precision_recall_fscore_support,
+                              y_true, y_pred, average=average,
+                              beta=beta)
+    assert_almost_equal(p, 0)
+    assert_almost_equal(r, 0)
+    assert_almost_equal(f, 0)
+    assert_equal(s, None)
+
+    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
+                         y_true, y_pred,
+                         beta=beta, average=average)
+    assert_almost_equal(fbeta, 0)
+
+
+def test_precision_recall_f1_no_labels_average_none():
     y_true = np.zeros((20, 3))
     y_pred = np.zeros_like(y_true)
 
+    beta = 1
+
     # tp = [0, 0, 0]
     # fn = [0, 0, 0]
     # fp = [0, 0, 0]
@@ -1219,33 +1245,17 @@ def test_precision_recall_f1_no_labels():
     # |y_i| = [0, 0, 0]
     # |y_hat_i| = [0, 0, 0]
 
-    for beta in [1]:
-        p, r, f, s = assert_warns(UndefinedMetricWarning,
-                                  precision_recall_fscore_support,
-                                  y_true, y_pred, average=None, beta=beta)
-        assert_array_almost_equal(p, [0, 0, 0], 2)
-        assert_array_almost_equal(r, [0, 0, 0], 2)
-        assert_array_almost_equal(f, [0, 0, 0], 2)
-        assert_array_almost_equal(s, [0, 0, 0], 2)
-
-        fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
-                             y_true, y_pred, beta=beta, average=None)
-        assert_array_almost_equal(fbeta, [0, 0, 0], 2)
-
-        for average in ["macro", "micro", "weighted", "samples"]:
-            p, r, f, s = assert_warns(UndefinedMetricWarning,
-                                      precision_recall_fscore_support,
-                                      y_true, y_pred, average=average,
-                                      beta=beta)
-            assert_almost_equal(p, 0)
-            assert_almost_equal(r, 0)
-            assert_almost_equal(f, 0)
-            assert_equal(s, None)
-
-            fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
-                                 y_true, y_pred,
-                                 beta=beta, average=average)
-            assert_almost_equal(fbeta, 0)
+    p, r, f, s = assert_warns(UndefinedMetricWarning,
+                              precision_recall_fscore_support,
+                              y_true, y_pred, average=None, beta=beta)
+    assert_array_almost_equal(p, [0, 0, 0], 2)
+    assert_array_almost_equal(r, [0, 0, 0], 2)
+    assert_array_almost_equal(f, [0, 0, 0], 2)
+    assert_array_almost_equal(s, [0, 0, 0], 2)
+
+    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
+                         y_true, y_pred, beta=beta, average=None)
+    assert_array_almost_equal(fbeta, [0, 0, 0], 2)
 
 
 def test_prf_warnings():
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 680b78c3dd43d..f835fdd507761 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -2,10 +2,13 @@
 
 from functools import partial
 from itertools import product
+from itertools import chain
 
 import numpy as np
 import scipy.sparse as sp
 
+import pytest
+
 from sklearn.datasets import make_multilabel_classification
 from sklearn.preprocessing import LabelBinarizer
 from sklearn.utils.multiclass import type_of_target
@@ -193,7 +196,7 @@
 # is already written.
 
 # Those metrics don't support binary inputs
-METRIC_UNDEFINED_BINARY = [
+METRIC_UNDEFINED_BINARY = {
     "samples_f0.5_score",
     "samples_f1_score",
     "samples_f2_score",
@@ -209,10 +212,10 @@
 
     "label_ranking_loss",
     "label_ranking_average_precision_score",
-]
+}
 
 # Those metrics don't support multiclass inputs
-METRIC_UNDEFINED_MULTICLASS = [
+METRIC_UNDEFINED_MULTICLASS = {
     "brier_score_loss",
     "balanced_accuracy_score",
 
@@ -229,24 +232,24 @@
     "f1_score",
     "f2_score",
     "f0.5_score",
-]
+}
 
 # Metric undefined with "binary" or "multiclass" input
-METRIC_UNDEFINED_BINARY_MULTICLASS = set(METRIC_UNDEFINED_BINARY).union(
-    set(METRIC_UNDEFINED_MULTICLASS))
+METRIC_UNDEFINED_BINARY_MULTICLASS = METRIC_UNDEFINED_BINARY.union(
+    METRIC_UNDEFINED_MULTICLASS)
 
 # Metrics with an "average" argument
-METRICS_WITH_AVERAGING = [
+METRICS_WITH_AVERAGING = {
     "precision_score", "recall_score", "f1_score", "f2_score", "f0.5_score"
-]
+}
 
 # Threshold-based metrics with an "average" argument
-THRESHOLDED_METRICS_WITH_AVERAGING = [
+THRESHOLDED_METRICS_WITH_AVERAGING = {
     "roc_auc_score", "average_precision_score", "partial_roc_auc",
-]
+}
 
 # Metrics with a "pos_label" argument
-METRICS_WITH_POS_LABEL = [
+METRICS_WITH_POS_LABEL = {
     "roc_curve",
 
     "brier_score_loss",
@@ -262,12 +265,12 @@
 
     "macro_f0.5_score", "macro_f1_score", "macro_f2_score",
     "macro_precision_score", "macro_recall_score",
-]
+}
 
 # Metrics with a "labels" argument
 # TODO: Handle multi_class metrics that has a labels argument as well as a
 # decision function argument. e.g hinge_loss
-METRICS_WITH_LABELS = [
+METRICS_WITH_LABELS = {
     "confusion_matrix",
 
     "hamming_loss",
@@ -284,17 +287,17 @@
     "macro_precision_score", "macro_recall_score",
 
     "cohen_kappa_score",
-]
+}
 
 # Metrics with a "normalize" option
-METRICS_WITH_NORMALIZE_OPTION = [
+METRICS_WITH_NORMALIZE_OPTION = {
     "accuracy_score",
     "jaccard_similarity_score",
     "zero_one_loss",
-]
+}
 
 # Threshold-based metrics with "multilabel-indicator" format support
-THRESHOLDED_MULTILABEL_METRICS = [
+THRESHOLDED_MULTILABEL_METRICS = {
     "log_loss",
     "unnormalized_log_loss",
 
@@ -307,10 +310,10 @@
 
     "coverage_error", "label_ranking_loss",
     "label_ranking_average_precision_score",
-]
+}
 
 # Classification metrics with  "multilabel-indicator" format
-MULTILABELS_METRICS = [
+MULTILABELS_METRICS = {
     "accuracy_score", "unnormalized_accuracy_score",
     "hamming_loss",
     "jaccard_similarity_score", "unnormalized_jaccard_similarity_score",
@@ -327,17 +330,17 @@
 
     "samples_f0.5_score", "samples_f1_score", "samples_f2_score",
     "samples_precision_score", "samples_recall_score",
-]
+}
 
 # Regression metrics with "multioutput-continuous" format support
-MULTIOUTPUT_METRICS = [
+MULTIOUTPUT_METRICS = {
     "mean_absolute_error", "mean_squared_error", "r2_score",
     "explained_variance_score"
-]
+}
 
 # Symmetric with respect to their input arguments y_true and y_pred
 # metric(y_true, y_pred) == metric(y_pred, y_true).
-SYMMETRIC_METRICS = [
+SYMMETRIC_METRICS = {
     "accuracy_score", "unnormalized_accuracy_score",
     "hamming_loss",
     "jaccard_similarity_score", "unnormalized_jaccard_similarity_score",
@@ -353,11 +356,11 @@
     "median_absolute_error",
 
     "cohen_kappa_score",
-]
+}
 
 # Asymmetric with respect to their input arguments y_true and y_pred
 # metric(y_true, y_pred) != metric(y_pred, y_true).
-NOT_SYMMETRIC_METRICS = [
+NOT_SYMMETRIC_METRICS = {
     "balanced_accuracy_score",
     "explained_variance_score",
     "r2_score",
@@ -370,18 +373,18 @@
 
     "macro_f0.5_score", "macro_f2_score", "macro_precision_score",
     "macro_recall_score", "log_loss", "hinge_loss"
-]
+}
 
 
 # No Sample weight support
-METRICS_WITHOUT_SAMPLE_WEIGHT = [
+METRICS_WITHOUT_SAMPLE_WEIGHT = {
     "confusion_matrix", # Left this one here because the tests in this file do
                         # not work for confusion_matrix, as its output is a
                         # matrix instead of a number. Testing of
                         # confusion_matrix with sample_weight is in
                         # test_classification.py
     "median_absolute_error",
-]
+}
 
 
 @ignore_warnings
@@ -392,13 +395,13 @@ def test_symmetry():
     y_pred = random_state.randint(0, 2, size=(20, ))
 
     # We shouldn't forget any metrics
-    assert_equal(set(SYMMETRIC_METRICS).union(
-        NOT_SYMMETRIC_METRICS, THRESHOLDED_METRICS,
+    assert_equal(SYMMETRIC_METRICS.union(
+        NOT_SYMMETRIC_METRICS, set(THRESHOLDED_METRICS),
         METRIC_UNDEFINED_BINARY_MULTICLASS),
         set(ALL_METRICS))
 
     assert_equal(
-        set(SYMMETRIC_METRICS).intersection(set(NOT_SYMMETRIC_METRICS)),
+        SYMMETRIC_METRICS.intersection(NOT_SYMMETRIC_METRICS),
         set([]))
 
     # Symmetric metric
@@ -415,17 +418,17 @@ def test_symmetry():
                     msg="%s seems to be symmetric" % name)
 
 
-@ignore_warnings
-def test_sample_order_invariance():
+@pytest.mark.parametrize(
+        'name',
+        set(ALL_METRICS) - METRIC_UNDEFINED_BINARY_MULTICLASS)
+def test_sample_order_invariance(name):
     random_state = check_random_state(0)
     y_true = random_state.randint(0, 2, size=(20, ))
     y_pred = random_state.randint(0, 2, size=(20, ))
     y_true_shuffle, y_pred_shuffle = shuffle(y_true, y_pred, random_state=0)
 
-    for name, metric in ALL_METRICS.items():
-        if name in METRIC_UNDEFINED_BINARY_MULTICLASS:
-            continue
-
+    with ignore_warnings():
+        metric = ALL_METRICS[name]
         assert_almost_equal(metric(y_true, y_pred),
                             metric(y_true_shuffle, y_pred_shuffle),
                             err_msg="%s is not sample order invariant"
@@ -472,8 +475,10 @@ def test_sample_order_invariance_multilabel_and_multioutput():
                                     % name)
 
 
-@ignore_warnings
-def test_format_invariance_with_1d_vectors():
+@pytest.mark.parametrize(
+        'name',
+        set(ALL_METRICS) - METRIC_UNDEFINED_BINARY_MULTICLASS)
+def test_format_invariance_with_1d_vectors(name):
     random_state = check_random_state(0)
     y1 = random_state.randint(0, 2, size=(20, ))
     y2 = random_state.randint(0, 2, size=(20, ))
@@ -489,9 +494,8 @@ def test_format_invariance_with_1d_vectors():
     y1_row = np.reshape(y1_1d, (1, -1))
     y2_row = np.reshape(y2_1d, (1, -1))
 
-    for name, metric in ALL_METRICS.items():
-        if name in METRIC_UNDEFINED_BINARY_MULTICLASS:
-            continue
+    with ignore_warnings():
+        metric = ALL_METRICS[name]
 
         measure = metric(y1, y2)
 
@@ -546,14 +550,16 @@ def test_format_invariance_with_1d_vectors():
 
         # NB: We do not test for y1_row, y2_row as these may be
         # interpreted as multilabel or multioutput data.
-        if (name not in (MULTIOUTPUT_METRICS + THRESHOLDED_MULTILABEL_METRICS +
+        if (name not in (MULTIOUTPUT_METRICS | THRESHOLDED_MULTILABEL_METRICS |
                          MULTILABELS_METRICS)):
             assert_raises(ValueError, metric, y1_row, y2_row)
 
 
-@ignore_warnings
-def test_invariance_string_vs_numbers_labels():
-    # Ensure that classification metrics with string labels
+@pytest.mark.parametrize(
+       'name',
+       set(CLASSIFICATION_METRICS) - METRIC_UNDEFINED_BINARY_MULTICLASS)
+def test_classification_invariance_string_vs_numbers_labels(name):
+    # Ensure that classification metrics with string labels are invariant
     random_state = check_random_state(0)
     y1 = random_state.randint(0, 2, size=(20, ))
     y2 = random_state.randint(0, 2, size=(20, ))
@@ -564,10 +570,8 @@ def test_invariance_string_vs_numbers_labels():
     pos_label_str = "spam"
     labels_str = ["eggs", "spam"]
 
-    for name, metric in CLASSIFICATION_METRICS.items():
-        if name in METRIC_UNDEFINED_BINARY_MULTICLASS:
-            continue
-
+    with ignore_warnings():
+        metric = CLASSIFICATION_METRICS[name]
         measure_with_number = metric(y1, y2)
 
         # Ugly, but handle case with a pos_label and label
@@ -600,7 +604,20 @@ def test_invariance_string_vs_numbers_labels():
                                err_msg="{0} failed string vs number  "
                                        "invariance test".format(name))
 
-    for name, metric in THRESHOLDED_METRICS.items():
+
+@pytest.mark.parametrize('name', THRESHOLDED_METRICS)
+def test_thresholded_invariance_string_vs_numbers_labels(name):
+    # Ensure that thresholded metrics with string labels are invariant
+    random_state = check_random_state(0)
+    y1 = random_state.randint(0, 2, size=(20, ))
+    y2 = random_state.randint(0, 2, size=(20, ))
+
+    y1_str = np.array(["eggs", "spam"])[y1]
+
+    pos_label_str = "spam"
+
+    with ignore_warnings():
+        metric = THRESHOLDED_METRICS[name]
         if name not in METRIC_UNDEFINED_BINARY:
             # Ugly, but handle case with a pos_label and label
             metric_str = metric
@@ -623,28 +640,30 @@ def test_invariance_string_vs_numbers_labels():
             assert_raises(ValueError, metric, y1_str.astype('O'), y2)
 
 
-def test_inf_nan_input():
-    invalids =[([0, 1], [np.inf, np.inf]),
-               ([0, 1], [np.nan, np.nan]),
-               ([0, 1], [np.nan, np.inf])]
+invalids = [([0, 1], [np.inf, np.inf]),
+            ([0, 1], [np.nan, np.nan]),
+            ([0, 1], [np.nan, np.inf])]
+
+
+@pytest.mark.parametrize(
+        'metric',
+        chain(THRESHOLDED_METRICS.values(), REGRESSION_METRICS.values()))
+def test_regression_thresholded_inf_nan_input(metric):
 
-    METRICS = dict()
-    METRICS.update(THRESHOLDED_METRICS)
-    METRICS.update(REGRESSION_METRICS)
+    for y_true, y_score in invalids:
+        assert_raise_message(ValueError,
+                             "contains NaN, infinity",
+                             metric, y_true, y_score)
 
-    for metric in METRICS.values():
-        for y_true, y_score in invalids:
-            assert_raise_message(ValueError,
-                                 "contains NaN, infinity",
-                                 metric, y_true, y_score)
 
+@pytest.mark.parametrize('metric', CLASSIFICATION_METRICS.values())
+def test_classification_inf_nan_input(metric):
     # Classification metrics all raise a mixed input exception
-    for metric in CLASSIFICATION_METRICS.values():
-        for y_true, y_score in invalids:
-            assert_raise_message(ValueError,
-                                 "Classification metrics can't handle a mix "
-                                 "of binary and continuous targets",
-                                 metric, y_true, y_score)
+    for y_true, y_score in invalids:
+        assert_raise_message(ValueError,
+                             "Classification metrics can't handle a mix "
+                             "of binary and continuous targets",
+                             metric, y_true, y_score)
 
 
 @ignore_warnings
@@ -667,45 +686,47 @@ def check_single_sample_multioutput(name):
         metric(np.array([[i, j]]), np.array([[k, l]]))
 
 
-def test_single_sample():
-    for name in ALL_METRICS:
-        if (name in METRIC_UNDEFINED_BINARY_MULTICLASS or
-                name in THRESHOLDED_METRICS):
-            # Those metrics are not always defined with one sample
-            # or in multiclass classification
-            continue
+@pytest.mark.parametrize(
+        'name',
+        (set(ALL_METRICS)
+         # Those metrics are not always defined with one sample
+         # or in multiclass classification
+         - METRIC_UNDEFINED_BINARY_MULTICLASS
+         - set(THRESHOLDED_METRICS)))
+def test_single_sample(name):
+    check_single_sample(name)
 
-        yield check_single_sample, name
 
-    for name in MULTIOUTPUT_METRICS + MULTILABELS_METRICS:
-        yield check_single_sample_multioutput, name
+@pytest.mark.parametrize('name', MULTIOUTPUT_METRICS | MULTILABELS_METRICS)
+def test_single_sample_multioutput(name):
+    check_single_sample_multioutput(name)
 
 
-def test_multioutput_number_of_output_differ():
+@pytest.mark.parametrize('name', MULTIOUTPUT_METRICS)
+def test_multioutput_number_of_output_differ(name):
     y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
     y_pred = np.array([[0, 0], [1, 0], [0, 0]])
 
-    for name in MULTIOUTPUT_METRICS:
-        metric = ALL_METRICS[name]
-        assert_raises(ValueError, metric, y_true, y_pred)
+    metric = ALL_METRICS[name]
+    assert_raises(ValueError, metric, y_true, y_pred)
 
 
-def test_multioutput_regression_invariance_to_dimension_shuffling():
+@pytest.mark.parametrize('name', MULTIOUTPUT_METRICS)
+def test_multioutput_regression_invariance_to_dimension_shuffling(name):
     # test invariance to dimension shuffling
     random_state = check_random_state(0)
     y_true = random_state.uniform(0, 2, size=(20, 5))
     y_pred = random_state.uniform(0, 2, size=(20, 5))
 
-    for name in MULTIOUTPUT_METRICS:
-        metric = ALL_METRICS[name]
-        error = metric(y_true, y_pred)
+    metric = ALL_METRICS[name]
+    error = metric(y_true, y_pred)
 
-        for _ in range(3):
-            perm = random_state.permutation(y_true.shape[1])
-            assert_almost_equal(metric(y_true[:, perm], y_pred[:, perm]),
-                                error,
-                                err_msg="%s is not dimension shuffling "
-                                        "invariant" % name)
+    for _ in range(3):
+        perm = random_state.permutation(y_true.shape[1])
+        assert_almost_equal(metric(y_true[:, perm], y_pred[:, perm]),
+                            error,
+                            err_msg="%s is not dimension shuffling "
+                                    "invariant" % name)
 
 
 @ignore_warnings
@@ -747,7 +768,8 @@ def test_multilabel_representation_invariance():
                                     "formats." % name)
 
 
-def test_raise_value_error_multilabel_sequences():
+@pytest.mark.parametrize('name', MULTILABELS_METRICS)
+def test_raise_value_error_multilabel_sequences(name):
     # make sure the multilabel-sequence format raises ValueError
     multilabel_sequences = [
         [[0, 1]],
@@ -757,41 +779,41 @@ def test_raise_value_error_multilabel_sequences():
         [()],
         np.array([[], [1, 2]], dtype='object')]
 
-    for name in MULTILABELS_METRICS:
-        metric = ALL_METRICS[name]
-        for seq in multilabel_sequences:
-            assert_raises(ValueError, metric, seq, seq)
+    metric = ALL_METRICS[name]
+    for seq in multilabel_sequences:
+        assert_raises(ValueError, metric, seq, seq)
 
 
-def test_normalize_option_binary_classification(n_samples=20):
+@pytest.mark.parametrize('name', METRICS_WITH_NORMALIZE_OPTION)
+def test_normalize_option_binary_classification(name):
     # Test in the binary case
+    n_samples = 20
     random_state = check_random_state(0)
     y_true = random_state.randint(0, 2, size=(n_samples, ))
     y_pred = random_state.randint(0, 2, size=(n_samples, ))
 
-    for name in METRICS_WITH_NORMALIZE_OPTION:
-        metrics = ALL_METRICS[name]
-        measure = metrics(y_true, y_pred, normalize=True)
-        assert_greater(measure, 0,
-                       msg="We failed to test correctly the normalize option")
-        assert_almost_equal(metrics(y_true, y_pred, normalize=False)
-                            / n_samples, measure)
+    metrics = ALL_METRICS[name]
+    measure = metrics(y_true, y_pred, normalize=True)
+    assert_greater(measure, 0,
+                   msg="We failed to test correctly the normalize option")
+    assert_almost_equal(metrics(y_true, y_pred, normalize=False)
+                        / n_samples, measure)
 
 
-def test_normalize_option_multiclass_classification():
+@pytest.mark.parametrize('name', METRICS_WITH_NORMALIZE_OPTION)
+def test_normalize_option_multiclass_classification(name):
     # Test in the multiclass case
     random_state = check_random_state(0)
     y_true = random_state.randint(0, 4, size=(20, ))
     y_pred = random_state.randint(0, 4, size=(20, ))
     n_samples = y_true.shape[0]
 
-    for name in METRICS_WITH_NORMALIZE_OPTION:
-        metrics = ALL_METRICS[name]
-        measure = metrics(y_true, y_pred, normalize=True)
-        assert_greater(measure, 0,
-                       msg="We failed to test correctly the normalize option")
-        assert_almost_equal(metrics(y_true, y_pred, normalize=False)
-                            / n_samples, measure)
+    metrics = ALL_METRICS[name]
+    measure = metrics(y_true, y_pred, normalize=True)
+    assert_greater(measure, 0,
+                   msg="We failed to test correctly the normalize option")
+    assert_almost_equal(metrics(y_true, y_pred, normalize=False)
+                        / n_samples, measure)
 
 
 def test_normalize_option_multilabel_classification():
@@ -886,7 +908,9 @@ def check_averaging(name, y_true, y_true_binarize, y_pred, y_pred_binarize,
         raise ValueError("Metric is not recorded as having an average option")
 
 
-def test_averaging_multiclass(n_samples=50, n_classes=3):
+@pytest.mark.parametrize('name', METRICS_WITH_AVERAGING)
+def test_averaging_multiclass(name):
+    n_samples, n_classes = 50, 3
     random_state = check_random_state(0)
     y_true = random_state.randint(0, n_classes, size=(n_samples, ))
     y_pred = random_state.randint(0, n_classes, size=(n_samples, ))
@@ -896,12 +920,14 @@ def test_averaging_multiclass(n_samples=50, n_classes=3):
     y_true_binarize = lb.transform(y_true)
     y_pred_binarize = lb.transform(y_pred)
 
-    for name in METRICS_WITH_AVERAGING:
-        yield (check_averaging, name, y_true, y_true_binarize,
-               y_pred, y_pred_binarize, y_score)
+    check_averaging(name, y_true, y_true_binarize,
+                    y_pred, y_pred_binarize, y_score)
 
 
-def test_averaging_multilabel(n_classes=5, n_samples=40):
+@pytest.mark.parametrize(
+        'name', METRICS_WITH_AVERAGING | THRESHOLDED_METRICS_WITH_AVERAGING)
+def test_averaging_multilabel(name):
+    n_samples, n_classes = 40, 5
     _, y = make_multilabel_classification(n_features=1, n_classes=n_classes,
                                           random_state=5, n_samples=n_samples,
                                           allow_unlabeled=False)
@@ -911,22 +937,27 @@ def test_averaging_multilabel(n_classes=5, n_samples=40):
     y_true_binarize = y_true
     y_pred_binarize = y_pred
 
-    for name in METRICS_WITH_AVERAGING + THRESHOLDED_METRICS_WITH_AVERAGING:
-        yield (check_averaging, name, y_true, y_true_binarize,
-               y_pred, y_pred_binarize, y_score)
+    check_averaging(name, y_true, y_true_binarize,
+                    y_pred, y_pred_binarize, y_score)
 
 
-def test_averaging_multilabel_all_zeroes():
+@pytest.mark.parametrize('name', METRICS_WITH_AVERAGING)
+def test_averaging_multilabel_all_zeroes(name):
     y_true = np.zeros((20, 3))
     y_pred = np.zeros((20, 3))
     y_score = np.zeros((20, 3))
     y_true_binarize = y_true
     y_pred_binarize = y_pred
 
-    for name in METRICS_WITH_AVERAGING:
-        yield (check_averaging, name, y_true, y_true_binarize,
-               y_pred, y_pred_binarize, y_score)
+    check_averaging(name, y_true, y_true_binarize,
+                    y_pred, y_pred_binarize, y_score)
+
 
+def test_averaging_binary_multilabel_all_zeroes():
+    y_true = np.zeros((20, 3))
+    y_pred = np.zeros((20, 3))
+    y_true_binarize = y_true
+    y_pred_binarize = y_pred
     # Test _average_binary_score for weight.sum() == 0
     binary_metric = (lambda y_true, y_score, average="macro":
                      _average_binary_score(
@@ -935,16 +966,16 @@ def test_averaging_multilabel_all_zeroes():
                      y_pred_binarize, is_multilabel=True)
 
 
-def test_averaging_multilabel_all_ones():
+@pytest.mark.parametrize('name', METRICS_WITH_AVERAGING)
+def test_averaging_multilabel_all_ones(name):
     y_true = np.ones((20, 3))
     y_pred = np.ones((20, 3))
     y_score = np.ones((20, 3))
     y_true_binarize = y_true
     y_pred_binarize = y_pred
 
-    for name in METRICS_WITH_AVERAGING:
-        yield (check_averaging, name, y_true, y_true_binarize,
-               y_pred, y_pred_binarize, y_score)
+    check_averaging(name, y_true, y_true_binarize,
+                    y_pred, y_pred_binarize, y_score)
 
 
 @ignore_warnings
@@ -1022,54 +1053,64 @@ def check_sample_weight_invariance(name, metric, y1, y2):
                                                   sample_weight]))
 
 
-def test_sample_weight_invariance(n_samples=50):
+@pytest.mark.parametrize(
+        'name',
+        (set(ALL_METRICS).intersection(set(REGRESSION_METRICS))
+         - METRICS_WITHOUT_SAMPLE_WEIGHT))
+def test_regression_sample_weight_invariance(name):
+    n_samples = 50
     random_state = check_random_state(0)
     # regression
     y_true = random_state.random_sample(size=(n_samples,))
     y_pred = random_state.random_sample(size=(n_samples,))
-    for name in ALL_METRICS:
-        if name not in REGRESSION_METRICS:
-            continue
-        if name in METRICS_WITHOUT_SAMPLE_WEIGHT:
-            continue
-        metric = ALL_METRICS[name]
-        yield check_sample_weight_invariance, name, metric, y_true, y_pred
+    metric = ALL_METRICS[name]
+    check_sample_weight_invariance(name, metric, y_true, y_pred)
+
 
+@pytest.mark.parametrize(
+        'name',
+        (set(ALL_METRICS) - set(REGRESSION_METRICS)
+         - METRICS_WITHOUT_SAMPLE_WEIGHT - METRIC_UNDEFINED_BINARY))
+def test_binary_sample_weight_invariance(name):
     # binary
+    n_samples = 50
     random_state = check_random_state(0)
     y_true = random_state.randint(0, 2, size=(n_samples, ))
     y_pred = random_state.randint(0, 2, size=(n_samples, ))
     y_score = random_state.random_sample(size=(n_samples,))
-    for name in ALL_METRICS:
-        if name in REGRESSION_METRICS:
-            continue
-        if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or
-                name in METRIC_UNDEFINED_BINARY):
-            continue
-        metric = ALL_METRICS[name]
-        if name in THRESHOLDED_METRICS:
-            yield check_sample_weight_invariance, name, metric, y_true, y_score
-        else:
-            yield check_sample_weight_invariance, name, metric, y_true, y_pred
+    metric = ALL_METRICS[name]
+    if name in THRESHOLDED_METRICS:
+        check_sample_weight_invariance(name, metric, y_true, y_score)
+    else:
+        check_sample_weight_invariance(name, metric, y_true, y_pred)
+
 
+@pytest.mark.parametrize(
+        'name',
+        (set(ALL_METRICS) - set(REGRESSION_METRICS)
+         - METRICS_WITHOUT_SAMPLE_WEIGHT
+         - METRIC_UNDEFINED_BINARY_MULTICLASS))
+def test_multiclass_sample_weight_invariance(name):
     # multiclass
+    n_samples = 50
     random_state = check_random_state(0)
     y_true = random_state.randint(0, 5, size=(n_samples, ))
     y_pred = random_state.randint(0, 5, size=(n_samples, ))
     y_score = random_state.random_sample(size=(n_samples, 5))
-    for name in ALL_METRICS:
-        if name in REGRESSION_METRICS:
-            continue
-        if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or
-                name in METRIC_UNDEFINED_BINARY_MULTICLASS):
-            continue
-        metric = ALL_METRICS[name]
-        if name in THRESHOLDED_METRICS:
-            yield check_sample_weight_invariance, name, metric, y_true, y_score
-        else:
-            yield check_sample_weight_invariance, name, metric, y_true, y_pred
+    metric = ALL_METRICS[name]
+    if name in THRESHOLDED_METRICS:
+        check_sample_weight_invariance(name, metric, y_true, y_score)
+    else:
+        check_sample_weight_invariance(name, metric, y_true, y_pred)
 
+
+@pytest.mark.parametrize(
+        'name',
+        (MULTILABELS_METRICS | THRESHOLDED_MULTILABEL_METRICS |
+         MULTIOUTPUT_METRICS) - METRICS_WITHOUT_SAMPLE_WEIGHT)
+def test_multilabel_sample_weight_invariance(name):
     # multilabel indicator
+    random_state = check_random_state(0)
     _, ya = make_multilabel_classification(n_features=1, n_classes=20,
                                            random_state=0, n_samples=100,
                                            allow_unlabeled=False)
@@ -1080,18 +1121,11 @@ def test_sample_weight_invariance(n_samples=50):
     y_pred = np.vstack([ya, ya])
     y_score = random_state.randint(1, 4, size=y_true.shape)
 
-    for name in (MULTILABELS_METRICS + THRESHOLDED_MULTILABEL_METRICS +
-                 MULTIOUTPUT_METRICS):
-        if name in METRICS_WITHOUT_SAMPLE_WEIGHT:
-            continue
-
-        metric = ALL_METRICS[name]
-        if name in THRESHOLDED_METRICS:
-            yield (check_sample_weight_invariance, name, metric,
-                   y_true, y_score)
-        else:
-            yield (check_sample_weight_invariance, name, metric,
-                   y_true, y_pred)
+    metric = ALL_METRICS[name]
+    if name in THRESHOLDED_METRICS:
+        check_sample_weight_invariance(name, metric, y_true, y_score)
+    else:
+        check_sample_weight_invariance(name, metric, y_true, y_pred)
 
 
 @ignore_warnings
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index 0ef089c7a3619..e63219a817bed 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -2,11 +2,12 @@
 
 import numpy as np
 from numpy import linalg
-import pytest
 
 from scipy.sparse import dok_matrix, csr_matrix, issparse
 from scipy.spatial.distance import cosine, cityblock, minkowski, wminkowski
 
+import pytest
+
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_allclose
@@ -129,52 +130,52 @@ def test_pairwise_distances():
     assert_raises(ValueError, pairwise_distances, X, Y, metric="blah")
 
 
-# ignore conversion to boolean in pairwise_distances
-@ignore_warnings(category=DataConversionWarning)
-def test_pairwise_boolean_distance():
+@pytest.mark.parametrize('metric', PAIRWISE_BOOLEAN_FUNCTIONS)
+def test_pairwise_boolean_distance(metric):
     # test that we convert to boolean arrays for boolean distances
     rng = np.random.RandomState(0)
     X = rng.randn(5, 4)
     Y = X.copy()
     Y[0, 0] = 1 - Y[0, 0]
 
-    for metric in PAIRWISE_BOOLEAN_FUNCTIONS:
+    # ignore conversion to boolean in pairwise_distances
+    with ignore_warnings(category=DataConversionWarning):
         for Z in [Y, None]:
             res = pairwise_distances(X, Z, metric=metric)
             res[np.isnan(res)] = 0
             assert_true(np.sum(res != 0) == 0)
 
 
-def test_pairwise_precomputed():
-    for func in [pairwise_distances, pairwise_kernels]:
-        # Test correct shape
-        assert_raises_regexp(ValueError, '.* shape .*',
-                             func, np.zeros((5, 3)), metric='precomputed')
-        # with two args
-        assert_raises_regexp(ValueError, '.* shape .*',
-                             func, np.zeros((5, 3)), np.zeros((4, 4)),
-                             metric='precomputed')
-        # even if shape[1] agrees (although thus second arg is spurious)
-        assert_raises_regexp(ValueError, '.* shape .*',
-                             func, np.zeros((5, 3)), np.zeros((4, 3)),
-                             metric='precomputed')
-
-        # Test not copied (if appropriate dtype)
-        S = np.zeros((5, 5))
-        S2 = func(S, metric="precomputed")
-        assert_true(S is S2)
-        # with two args
-        S = np.zeros((5, 3))
-        S2 = func(S, np.zeros((3, 3)), metric="precomputed")
-        assert_true(S is S2)
-
-        # Test always returns float dtype
-        S = func(np.array([[1]], dtype='int'), metric='precomputed')
-        assert_equal('f', S.dtype.kind)
-
-        # Test converts list to array-like
-        S = func([[1.]], metric='precomputed')
-        assert_true(isinstance(S, np.ndarray))
+@pytest.mark.parametrize('func', [pairwise_distances, pairwise_kernels])
+def test_pairwise_precomputed(func):
+    # Test correct shape
+    assert_raises_regexp(ValueError, '.* shape .*',
+                         func, np.zeros((5, 3)), metric='precomputed')
+    # with two args
+    assert_raises_regexp(ValueError, '.* shape .*',
+                         func, np.zeros((5, 3)), np.zeros((4, 4)),
+                         metric='precomputed')
+    # even if shape[1] agrees (although thus second arg is spurious)
+    assert_raises_regexp(ValueError, '.* shape .*',
+                         func, np.zeros((5, 3)), np.zeros((4, 3)),
+                         metric='precomputed')
+
+    # Test not copied (if appropriate dtype)
+    S = np.zeros((5, 5))
+    S2 = func(S, metric="precomputed")
+    assert_true(S is S2)
+    # with two args
+    S = np.zeros((5, 3))
+    S2 = func(S, np.zeros((3, 3)), metric="precomputed")
+    assert_true(S is S2)
+
+    # Test always returns float dtype
+    S = func(np.array([[1]], dtype='int'), metric='precomputed')
+    assert_equal('f', S.dtype.kind)
+
+    # Test converts list to array-like
+    S = func([[1.]], metric='precomputed')
+    assert_true(isinstance(S, np.ndarray))
 
 
 def check_pairwise_parallel(func, metric, kwds):
@@ -202,16 +203,24 @@ def check_pairwise_parallel(func, metric, kwds):
         assert_array_almost_equal(S, S2)
 
 
-def test_pairwise_parallel():
-    wminkowski_kwds = {'w': np.arange(1, 5).astype('double'), 'p': 1}
-    metrics = [(pairwise_distances, 'euclidean', {}),
-               (pairwise_distances, wminkowski, wminkowski_kwds),
-               (pairwise_distances, 'wminkowski', wminkowski_kwds),
-               (pairwise_kernels, 'polynomial', {'degree': 1}),
-               (pairwise_kernels, callable_rbf_kernel, {'gamma': .1}),
-               ]
-    for func, metric, kwds in metrics:
-        yield check_pairwise_parallel, func, metric, kwds
+_wminkowski_kwds = {'w': np.arange(1, 5).astype('double'), 'p': 1}
+
+
+def callable_rbf_kernel(x, y, **kwds):
+    # Callable version of pairwise.rbf_kernel.
+    K = rbf_kernel(np.atleast_2d(x), np.atleast_2d(y), **kwds)
+    return K
+
+
+@pytest.mark.parametrize(
+        'func, metric, kwds',
+        [(pairwise_distances, 'euclidean', {}),
+         (pairwise_distances, wminkowski, _wminkowski_kwds),
+         (pairwise_distances, 'wminkowski', _wminkowski_kwds),
+         (pairwise_kernels, 'polynomial', {'degree': 1}),
+         (pairwise_kernels, callable_rbf_kernel, {'gamma': .1})])
+def test_pairwise_parallel(func, metric, kwds):
+    check_pairwise_parallel(func, metric, kwds)
 
 
 def test_pairwise_callable_nonstrict_metric():
@@ -221,47 +230,51 @@ def test_pairwise_callable_nonstrict_metric():
     assert_equal(pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0], 5)
 
 
-def callable_rbf_kernel(x, y, **kwds):
-    # Callable version of pairwise.rbf_kernel.
-    K = rbf_kernel(np.atleast_2d(x), np.atleast_2d(y), **kwds)
-    return K
+# Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS.
+@pytest.mark.parametrize(
+        'metric',
+        ["rbf", "laplacian", "sigmoid", "polynomial", "linear",
+         "chi2", "additive_chi2"])
+def test_pairwise_kernels(metric):
+    # Test the pairwise_kernels helper function.
+
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((5, 4))
+    Y = rng.random_sample((2, 4))
+    function = PAIRWISE_KERNEL_FUNCTIONS[metric]
+    # Test with Y=None
+    K1 = pairwise_kernels(X, metric=metric)
+    K2 = function(X)
+    assert_array_almost_equal(K1, K2)
+    # Test with Y=Y
+    K1 = pairwise_kernels(X, Y=Y, metric=metric)
+    K2 = function(X, Y=Y)
+    assert_array_almost_equal(K1, K2)
+    # Test with tuples as X and Y
+    X_tuples = tuple([tuple([v for v in row]) for row in X])
+    Y_tuples = tuple([tuple([v for v in row]) for row in Y])
+    K2 = pairwise_kernels(X_tuples, Y_tuples, metric=metric)
+    assert_array_almost_equal(K1, K2)
 
+    # Test with sparse X and Y
+    X_sparse = csr_matrix(X)
+    Y_sparse = csr_matrix(Y)
+    if metric in ["chi2", "additive_chi2"]:
+        # these don't support sparse matrices yet
+        assert_raises(ValueError, pairwise_kernels,
+                      X_sparse, Y=Y_sparse, metric=metric)
+        return
+    K1 = pairwise_kernels(X_sparse, Y=Y_sparse, metric=metric)
+    assert_array_almost_equal(K1, K2)
 
-def test_pairwise_kernels():    # Test the pairwise_kernels helper function.
 
+def test_pairwise_kernels_callable():
+    # Test the pairwise_kernels helper function
+    # with a callable function, with given keywords.
     rng = np.random.RandomState(0)
     X = rng.random_sample((5, 4))
     Y = rng.random_sample((2, 4))
-    # Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS.
-    test_metrics = ["rbf", "laplacian", "sigmoid", "polynomial", "linear",
-                    "chi2", "additive_chi2"]
-    for metric in test_metrics:
-        function = PAIRWISE_KERNEL_FUNCTIONS[metric]
-        # Test with Y=None
-        K1 = pairwise_kernels(X, metric=metric)
-        K2 = function(X)
-        assert_array_almost_equal(K1, K2)
-        # Test with Y=Y
-        K1 = pairwise_kernels(X, Y=Y, metric=metric)
-        K2 = function(X, Y=Y)
-        assert_array_almost_equal(K1, K2)
-        # Test with tuples as X and Y
-        X_tuples = tuple([tuple([v for v in row]) for row in X])
-        Y_tuples = tuple([tuple([v for v in row]) for row in Y])
-        K2 = pairwise_kernels(X_tuples, Y_tuples, metric=metric)
-        assert_array_almost_equal(K1, K2)
 
-        # Test with sparse X and Y
-        X_sparse = csr_matrix(X)
-        Y_sparse = csr_matrix(Y)
-        if metric in ["chi2", "additive_chi2"]:
-            # these don't support sparse matrices yet
-            assert_raises(ValueError, pairwise_kernels,
-                          X_sparse, Y=Y_sparse, metric=metric)
-            continue
-        K1 = pairwise_kernels(X_sparse, Y=Y_sparse, metric=metric)
-        assert_array_almost_equal(K1, K2)
-    # Test with a callable function, with given keywords.
     metric = callable_rbf_kernel
     kwds = {'gamma': 0.1}
     K1 = pairwise_kernels(X, Y=Y, metric=metric, **kwds)
@@ -286,27 +299,37 @@ def test_pairwise_kernels_filter_param():
     assert_raises(TypeError, pairwise_kernels, X, Y, "rbf", **params)
 
 
-def test_paired_distances():
+@pytest.mark.parametrize('metric, func', iteritems(PAIRED_DISTANCES))
+def test_paired_distances(metric, func):
     # Test the pairwise_distance helper function.
     rng = np.random.RandomState(0)
     # Euclidean distance should be equivalent to calling the function.
     X = rng.random_sample((5, 4))
     # Euclidean distance, with Y != X.
     Y = rng.random_sample((5, 4))
-    for metric, func in iteritems(PAIRED_DISTANCES):
-        S = paired_distances(X, Y, metric=metric)
-        S2 = func(X, Y)
-        assert_array_almost_equal(S, S2)
-        S3 = func(csr_matrix(X), csr_matrix(Y))
-        assert_array_almost_equal(S, S3)
-        if metric in PAIRWISE_DISTANCE_FUNCTIONS:
-            # Check the pairwise_distances implementation
-            # gives the same value
-            distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y)
-            distances = np.diag(distances)
-            assert_array_almost_equal(distances, S)
-
-    # Check the callable implementation
+
+    S = paired_distances(X, Y, metric=metric)
+    S2 = func(X, Y)
+    assert_array_almost_equal(S, S2)
+    S3 = func(csr_matrix(X), csr_matrix(Y))
+    assert_array_almost_equal(S, S3)
+    if metric in PAIRWISE_DISTANCE_FUNCTIONS:
+        # Check the pairwise_distances implementation
+        # gives the same value
+        distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y)
+        distances = np.diag(distances)
+        assert_array_almost_equal(distances, S)
+
+
+def test_paired_distances_callable():
+    # Test the pairwise_distance helper function
+    # with the callable implementation
+    rng = np.random.RandomState(0)
+    # Euclidean distance should be equivalent to calling the function.
+    X = rng.random_sample((5, 4))
+    # Euclidean distance, with Y != X.
+    Y = rng.random_sample((5, 4))
+
     S = paired_distances(X, Y, metric='manhattan')
     S2 = paired_distances(X, Y, metric=lambda x, y: np.abs(x - y).sum(axis=0))
     assert_array_almost_equal(S, S2)
@@ -637,25 +660,29 @@ def test_chi_square_kernel():
                   csr_matrix(X), csr_matrix(Y))
 
 
-def test_kernel_symmetry():
+@pytest.mark.parametrize(
+        'kernel',
+        (linear_kernel, polynomial_kernel, rbf_kernel,
+         laplacian_kernel, sigmoid_kernel, cosine_similarity))
+def test_kernel_symmetry(kernel):
     # Valid kernels should be symmetric
     rng = np.random.RandomState(0)
     X = rng.random_sample((5, 4))
-    for kernel in (linear_kernel, polynomial_kernel, rbf_kernel,
-                   laplacian_kernel, sigmoid_kernel, cosine_similarity):
-        K = kernel(X, X)
-        assert_array_almost_equal(K, K.T, 15)
+    K = kernel(X, X)
+    assert_array_almost_equal(K, K.T, 15)
 
 
-def test_kernel_sparse():
+@pytest.mark.parametrize(
+        'kernel',
+        (linear_kernel, polynomial_kernel, rbf_kernel,
+         laplacian_kernel, sigmoid_kernel, cosine_similarity))
+def test_kernel_sparse(kernel):
     rng = np.random.RandomState(0)
     X = rng.random_sample((5, 4))
     X_sparse = csr_matrix(X)
-    for kernel in (linear_kernel, polynomial_kernel, rbf_kernel,
-                   laplacian_kernel, sigmoid_kernel, cosine_similarity):
-        K = kernel(X, X)
-        K2 = kernel(X_sparse, X_sparse)
-        assert_array_almost_equal(K, K2)
+    K = kernel(X, X)
+    K2 = kernel(X_sparse, X_sparse)
+    assert_array_almost_equal(K, K2)
 
 
 def test_linear_kernel():
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index 07c35c609358d..28b79e9b8474c 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -2,7 +2,6 @@
 
 import pytest
 import numpy as np
-from itertools import product
 import warnings
 from scipy.sparse import csr_matrix
 
@@ -177,19 +176,19 @@ def _partial_roc(y_true, y_predict, max_fpr):
     return 0.5 * (1 + (partial_auc - min_area) / (max_area - min_area))
 
 
-def test_roc_curve():
+@pytest.mark.parametrize('drop', [True, False])
+def test_roc_curve(drop):
     # Test Area under Receiver Operating Characteristic (ROC) curve
     y_true, _, probas_pred = make_prediction(binary=True)
     expected_auc = _auc(y_true, probas_pred)
 
-    for drop in [True, False]:
-        fpr, tpr, thresholds = roc_curve(y_true, probas_pred,
-                                         drop_intermediate=drop)
-        roc_auc = auc(fpr, tpr)
-        assert_array_almost_equal(roc_auc, expected_auc, decimal=2)
-        assert_almost_equal(roc_auc, roc_auc_score(y_true, probas_pred))
-        assert_equal(fpr.shape, tpr.shape)
-        assert_equal(fpr.shape, thresholds.shape)
+    fpr, tpr, thresholds = roc_curve(y_true, probas_pred,
+                                     drop_intermediate=drop)
+    roc_auc = auc(fpr, tpr)
+    assert_array_almost_equal(roc_auc, expected_auc, decimal=2)
+    assert_almost_equal(roc_auc, roc_auc_score(y_true, probas_pred))
+    assert_equal(fpr.shape, tpr.shape)
+    assert_equal(fpr.shape, thresholds.shape)
 
 
 def test_roc_curve_end_points():
@@ -923,18 +922,29 @@ def check_alternative_lrap_implementation(lrap_score, n_classes=5,
     assert_almost_equal(score_lrap, score_my_lrap)
 
 
-def test_label_ranking_avp():
-    for fn in [label_ranking_average_precision_score, _my_lrap]:
-        yield check_lrap_toy, fn
-        yield check_lrap_without_tie_and_increasing_score, fn
-        yield check_lrap_only_ties, fn
-        yield check_zero_or_all_relevant_labels, fn
-        yield check_lrap_error_raised, label_ranking_average_precision_score
+@pytest.mark.parametrize(
+        'check',
+        (check_lrap_toy,
+         check_lrap_without_tie_and_increasing_score,
+         check_lrap_only_ties,
+         check_zero_or_all_relevant_labels))
+@pytest.mark.parametrize(
+        'func',
+        (label_ranking_average_precision_score, _my_lrap))
+def test_label_ranking_avp(check, func):
+    check(func)
+
+
+def test_lrap_error_raised():
+    check_lrap_error_raised(label_ranking_average_precision_score)
+
+
+@pytest.mark.parametrize('n_samples', (1, 2, 8, 20))
+@pytest.mark.parametrize('n_classes', (2, 5, 10))
+@pytest.mark.parametrize('random_state', range(1))
+def test_alternative_lrap_implementation(n_samples, n_classes, random_state):
 
-    for n_samples, n_classes, random_state in product((1, 2, 8, 20),
-                                                      (2, 5, 10),
-                                                      range(1)):
-        yield (check_alternative_lrap_implementation,
+    check_alternative_lrap_implementation(
                label_ranking_average_precision_score,
                n_classes, n_samples, random_state)
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 6af6418635d59..8bb3c3c137dcc 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -6,6 +6,8 @@
 
 import numpy as np
 
+import pytest
+
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_equal
@@ -491,12 +493,12 @@ def check_scorer_memmap(scorer_name):
     assert isinstance(score, numbers.Number), scorer_name
 
 
-def test_scorer_memmap_input():
+@pytest.mark.parametrize('name', SCORERS)
+def test_scorer_memmap_input(name):
     # Non-regression test for #6147: some score functions would
     # return singleton memmap when computed on memmap data instead of scalar
     # float values.
-    for name in SCORERS.keys():
-        yield check_scorer_memmap, name
+    check_scorer_memmap(name)
 
 
 def test_deprecated_names():
diff --git a/sklearn/mixture/tests/test_gmm.py b/sklearn/mixture/tests/test_gmm.py
index 137703adfcad4..134c0493cf558 100644
--- a/sklearn/mixture/tests/test_gmm.py
+++ b/sklearn/mixture/tests/test_gmm.py
@@ -8,6 +8,8 @@
 import copy
 import sys
 
+import pytest
+
 import numpy as np
 from numpy.testing import assert_array_equal, assert_array_almost_equal
 
@@ -160,7 +162,6 @@ def test_GMM_attributes():
     assert_raises(ValueError, g._set_covars, [])
     assert_raises(ValueError, g._set_covars,
                   np.zeros((n_components - 2, n_features)))
-
     assert_raises(ValueError, mixture.GMM, n_components=20,
                   covariance_type='badcovariance_type')
 
@@ -496,10 +497,11 @@ def check_positive_definite_covars(covariance_type):
             assert_greater(np.linalg.det(c), 0)
 
 
-def test_positive_definite_covars():
+@pytest.mark.parametrize('covariance_type',
+                         ["full", "tied", "diag", "spherical"])
+def test_positive_definite_covars(covariance_type):
     # Check positive definiteness for all covariance types
-    for covariance_type in ["full", "tied", "diag", "spherical"]:
-        yield check_positive_definite_covars, covariance_type
+    check_positive_definite_covars(covariance_type)
 
 
 # This function tests the deprecated old GMM class
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 2929916619769..a537b9f53518a 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -387,8 +387,8 @@ def test_cross_validate():
         scores = (train_mse_scores, test_mse_scores, train_r2_scores,
                   test_r2_scores, fitted_estimators)
 
-        yield check_cross_validate_single_metric, est, X, y, scores
-        yield check_cross_validate_multi_metric, est, X, y, scores
+        check_cross_validate_single_metric(est, X, y, scores)
+        check_cross_validate_multi_metric(est, X, y, scores)
 
 
 def test_cross_validate_return_train_score_warn():
diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py
index a91e4ac4edd27..de0d166fb8891 100644
--- a/sklearn/neighbors/tests/test_ball_tree.py
+++ b/sklearn/neighbors/tests/test_ball_tree.py
@@ -1,12 +1,15 @@
 import pickle
+import itertools
+
 import numpy as np
+import pytest
 from numpy.testing import assert_array_almost_equal
 from sklearn.neighbors.ball_tree import (BallTree, NeighborsHeap,
                                          simultaneous_sort, kernel_norm,
                                          nodeheap_sort, DTYPE, ITYPE)
 from sklearn.neighbors.dist_metrics import DistanceMetric
 from sklearn.utils import check_random_state
-from sklearn.utils.testing import SkipTest, assert_allclose
+from sklearn.utils.testing import assert_allclose
 
 rng = np.random.RandomState(10)
 V_mahalanobis = rng.rand(3, 3)
@@ -42,60 +45,44 @@ def brute_force_neighbors(X, Y, k, metric, **kwargs):
     return dist, ind
 
 
-def test_ball_tree_query():
+@pytest.mark.parametrize('metric', METRICS)
+@pytest.mark.parametrize('k', (1, 3, 5))
+@pytest.mark.parametrize('dualtree', (True, False))
+@pytest.mark.parametrize('breadth_first', (True, False))
+def test_ball_tree_query(metric, k, dualtree, breadth_first):
     rng = check_random_state(0)
     X = rng.random_sample((40, DIMENSION))
     Y = rng.random_sample((10, DIMENSION))
 
-    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
-        bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
-        dist1, ind1 = bt.query(Y, k, dualtree=dualtree,
-                               breadth_first=breadth_first)
-        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)
+    kwargs = METRICS[metric]
 
-        # don't check indices here: if there are any duplicate distances,
-        # the indices may not match.  Distances should not have this problem.
-        assert_array_almost_equal(dist1, dist2)
+    bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
+    dist1, ind1 = bt.query(Y, k, dualtree=dualtree,
+                           breadth_first=breadth_first)
+    dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)
 
-    for (metric, kwargs) in METRICS.items():
-        for k in (1, 3, 5):
-            for dualtree in (True, False):
-                for breadth_first in (True, False):
-                    yield (check_neighbors,
-                           dualtree, breadth_first,
-                           k, metric, kwargs)
+    # don't check indices here: if there are any duplicate distances,
+    # the indices may not match.  Distances should not have this problem.
+    assert_array_almost_equal(dist1, dist2)
 
 
-def test_ball_tree_query_boolean_metrics():
+@pytest.mark.parametrize('metric',
+                         itertools.chain(BOOLEAN_METRICS, DISCRETE_METRICS))
+def test_ball_tree_query_metrics(metric):
     rng = check_random_state(0)
-    X = rng.random_sample((40, 10)).round(0)
-    Y = rng.random_sample((10, 10)).round(0)
-    k = 5
-
-    def check_neighbors(metric):
-        bt = BallTree(X, leaf_size=1, metric=metric)
-        dist1, ind1 = bt.query(Y, k)
-        dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
-        assert_array_almost_equal(dist1, dist2)
+    if metric in BOOLEAN_METRICS:
+        X = rng.random_sample((40, 10)).round(0)
+        Y = rng.random_sample((10, 10)).round(0)
+    elif metric in DISCRETE_METRICS:
+        X = (4 * rng.random_sample((40, 10))).round(0)
+        Y = (4 * rng.random_sample((10, 10))).round(0)
 
-    for metric in BOOLEAN_METRICS:
-        yield check_neighbors, metric
-
-
-def test_ball_tree_query_discrete_metrics():
-    rng = check_random_state(0)
-    X = (4 * rng.random_sample((40, 10))).round(0)
-    Y = (4 * rng.random_sample((10, 10))).round(0)
     k = 5
 
-    def check_neighbors(metric):
-        bt = BallTree(X, leaf_size=1, metric=metric)
-        dist1, ind1 = bt.query(Y, k)
-        dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
-        assert_array_almost_equal(dist1, dist2)
-
-    for metric in DISCRETE_METRICS:
-        yield check_neighbors, metric
+    bt = BallTree(X, leaf_size=1, metric=metric)
+    dist1, ind1 = bt.query(Y, k)
+    dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
+    assert_array_almost_equal(dist1, dist2)
 
 
 def test_ball_tree_query_radius(n_samples=100, n_features=10):
@@ -157,7 +144,21 @@ def compute_kernel_slow(Y, X, kernel, h):
         raise ValueError('kernel not recognized')
 
 
-def check_results(kernel, h, atol, rtol, breadth_first, bt, Y, dens_true):
+@pytest.mark.parametrize("kernel", ['gaussian', 'tophat', 'epanechnikov',
+                                    'exponential', 'linear', 'cosine'])
+@pytest.mark.parametrize("h", [0.01, 0.1, 1])
+@pytest.mark.parametrize("rtol", [0, 1E-5])
+@pytest.mark.parametrize("atol", [1E-6, 1E-2])
+@pytest.mark.parametrize("breadth_first", [True, False])
+def test_ball_tree_kde(kernel, h, rtol, atol, breadth_first, n_samples=100,
+                       n_features=3):
+    np.random.seed(0)
+    X = np.random.random((n_samples, n_features))
+    Y = np.random.random((n_samples, n_features))
+    bt = BallTree(X, leaf_size=10)
+
+    dens_true = compute_kernel_slow(Y, X, kernel, h)
+
     dens = bt.kernel_density(Y, h, atol=atol, rtol=rtol,
                              kernel=kernel,
                              breadth_first=breadth_first)
@@ -165,24 +166,6 @@ def check_results(kernel, h, atol, rtol, breadth_first, bt, Y, dens_true):
                     atol=atol, rtol=max(rtol, 1e-7))
 
 
-def test_ball_tree_kde(n_samples=100, n_features=3):
-    rng = check_random_state(0)
-    X = rng.random_sample((n_samples, n_features))
-    Y = rng.random_sample((n_samples, n_features))
-    bt = BallTree(X, leaf_size=10)
-
-    for kernel in ['gaussian', 'tophat', 'epanechnikov',
-                   'exponential', 'linear', 'cosine']:
-        for h in [0.01, 0.1, 1]:
-            dens_true = compute_kernel_slow(Y, X, kernel, h)
-
-            for rtol in [0, 1E-5]:
-                for atol in [1E-6, 1E-2]:
-                    for breadth_first in (True, False):
-                        yield (check_results, kernel, h, atol, rtol,
-                               breadth_first, bt, Y, dens_true)
-
-
 def test_gaussian_kde(n_samples=1000):
     # Compare gaussian KDE results to scipy.stats.gaussian_kde
     from scipy.stats import gaussian_kde
@@ -215,7 +198,7 @@ def check_two_point(r, dualtree):
         assert_array_almost_equal(counts, counts_true)
 
     for dualtree in (True, False):
-        yield check_two_point, r, dualtree
+        check_two_point(r, dualtree)
 
 
 def test_ball_tree_pickle():
@@ -246,7 +229,7 @@ def check_pickle_protocol(protocol):
         assert_array_almost_equal(dist1_pyfunc, dist2_pyfunc)
 
     for protocol in (0, 1, 2):
-        yield check_pickle_protocol, protocol
+        check_pickle_protocol(protocol)
 
 
 def test_neighbors_heap(n_pts=5, n_nbrs=10):
diff --git a/sklearn/neighbors/tests/test_dist_metrics.py b/sklearn/neighbors/tests/test_dist_metrics.py
index 23b7656cb313b..f4d6dc3e74c5e 100644
--- a/sklearn/neighbors/tests/test_dist_metrics.py
+++ b/sklearn/neighbors/tests/test_dist_metrics.py
@@ -4,6 +4,8 @@
 import numpy as np
 from numpy.testing import assert_array_almost_equal
 
+import pytest
+
 from scipy.spatial.distance import cdist
 from sklearn.neighbors.dist_metrics import DistanceMetric
 from sklearn.neighbors import BallTree
@@ -15,107 +17,117 @@ def dist_func(x1, x2, p):
     return np.sum((x1 - x2) ** p) ** (1. / p)
 
 
-class TestMetrics(object):
-    n1 = 20
-    n2 = 25
-    d = 4
-    zero_frac = 0.5
-    rseed = 0
-    dtype = np.float64
-    rng = check_random_state(rseed)
-    X1 = rng.random_sample((n1, d)).astype(dtype)
-    X2 = rng.random_sample((n2, d)).astype(dtype)
-
-    # make boolean arrays: ones and zeros
-    X1_bool = X1.round(0)
-    X2_bool = X2.round(0)
-
-    V = rng.random_sample((d, d))
-    VI = np.dot(V, V.T)
-
-    metrics = {'euclidean': {},
-               'cityblock': {},
-               'minkowski': dict(p=(1, 1.5, 2, 3)),
-               'chebyshev': {},
-               'seuclidean': dict(V=(rng.random_sample(d),)),
-               'wminkowski': dict(p=(1, 1.5, 3),
-                                  w=(rng.random_sample(d),)),
-               'mahalanobis': dict(VI=(VI,)),
-               'hamming': {},
-               'canberra': {},
-               'braycurtis': {}}
-
-    bool_metrics = ['matching', 'jaccard', 'dice',
-                    'kulsinski', 'rogerstanimoto', 'russellrao',
-                    'sokalmichener', 'sokalsneath']
-
-    def test_cdist(self):
-        for metric, argdict in self.metrics.items():
-            keys = argdict.keys()
-            for vals in itertools.product(*argdict.values()):
-                kwargs = dict(zip(keys, vals))
-                D_true = cdist(self.X1, self.X2, metric, **kwargs)
-                yield self.check_cdist, metric, kwargs, D_true
-
-        for metric in self.bool_metrics:
-            D_true = cdist(self.X1_bool, self.X2_bool, metric)
-            yield self.check_cdist_bool, metric, D_true
-
-    def check_cdist(self, metric, kwargs, D_true):
-        dm = DistanceMetric.get_metric(metric, **kwargs)
-        D12 = dm.pairwise(self.X1, self.X2)
-        assert_array_almost_equal(D12, D_true)
-
-    def check_cdist_bool(self, metric, D_true):
-        dm = DistanceMetric.get_metric(metric)
-        D12 = dm.pairwise(self.X1_bool, self.X2_bool)
-        assert_array_almost_equal(D12, D_true)
-
-    def test_pdist(self):
-        for metric, argdict in self.metrics.items():
-            keys = argdict.keys()
-            for vals in itertools.product(*argdict.values()):
-                kwargs = dict(zip(keys, vals))
-                D_true = cdist(self.X1, self.X1, metric, **kwargs)
-                yield self.check_pdist, metric, kwargs, D_true
-
-        for metric in self.bool_metrics:
-            D_true = cdist(self.X1_bool, self.X1_bool, metric)
-            yield self.check_pdist_bool, metric, D_true
-
-    def check_pdist(self, metric, kwargs, D_true):
-        dm = DistanceMetric.get_metric(metric, **kwargs)
-        D12 = dm.pairwise(self.X1)
-        assert_array_almost_equal(D12, D_true)
-
-    def check_pdist_bool(self, metric, D_true):
-        dm = DistanceMetric.get_metric(metric)
-        D12 = dm.pairwise(self.X1_bool)
-        assert_array_almost_equal(D12, D_true)
-
-    def test_pickle(self):
-        for metric, argdict in self.metrics.items():
-            keys = argdict.keys()
-            for vals in itertools.product(*argdict.values()):
-                kwargs = dict(zip(keys, vals))
-                yield self.check_pickle, metric, kwargs
-
-        for metric in self.bool_metrics:
-            yield self.check_pickle_bool, metric
-
-    def check_pickle_bool(self, metric):
-        dm = DistanceMetric.get_metric(metric)
-        D1 = dm.pairwise(self.X1_bool)
-        dm2 = pickle.loads(pickle.dumps(dm))
-        D2 = dm2.pairwise(self.X1_bool)
-        assert_array_almost_equal(D1, D2)
-
-    def check_pickle(self, metric, kwargs):
-        dm = DistanceMetric.get_metric(metric, **kwargs)
-        D1 = dm.pairwise(self.X1)
-        dm2 = pickle.loads(pickle.dumps(dm))
-        D2 = dm2.pairwise(self.X1)
-        assert_array_almost_equal(D1, D2)
+rng = check_random_state(0)
+d = 4
+n1 = 20
+n2 = 25
+X1 = rng.random_sample((n1, d)).astype('float64')
+X2 = rng.random_sample((n2, d)).astype('float64')
+
+# make boolean arrays: ones and zeros
+X1_bool = X1.round(0)
+X2_bool = X2.round(0)
+
+V = rng.random_sample((d, d))
+VI = np.dot(V, V.T)
+
+BOOL_METRICS = ['matching', 'jaccard', 'dice',
+                'kulsinski', 'rogerstanimoto', 'russellrao',
+                'sokalmichener', 'sokalsneath']
+
+METRICS_DEFAULT_PARAMS = {'euclidean': {},
+                          'cityblock': {},
+                          'minkowski': dict(p=(1, 1.5, 2, 3)),
+                          'chebyshev': {},
+                          'seuclidean': dict(V=(rng.random_sample(d),)),
+                          'wminkowski': dict(p=(1, 1.5, 3),
+                                             w=(rng.random_sample(d),)),
+                          'mahalanobis': dict(VI=(VI,)),
+                          'hamming': {},
+                          'canberra': {},
+                          'braycurtis': {}}
+
+
+@pytest.mark.parametrize('metric', METRICS_DEFAULT_PARAMS)
+def test_cdist(metric):
+    argdict = METRICS_DEFAULT_PARAMS[metric]
+    keys = argdict.keys()
+    for vals in itertools.product(*argdict.values()):
+        kwargs = dict(zip(keys, vals))
+        D_true = cdist(X1, X2, metric, **kwargs)
+        check_cdist(metric, kwargs, D_true)
+
+
+@pytest.mark.parametrize('metric', BOOL_METRICS)
+def test_cdist_bool_metric(metric):
+    D_true = cdist(X1_bool, X2_bool, metric)
+    check_cdist_bool(metric, D_true)
+
+
+def check_cdist(metric, kwargs, D_true):
+    dm = DistanceMetric.get_metric(metric, **kwargs)
+    D12 = dm.pairwise(X1, X2)
+    assert_array_almost_equal(D12, D_true)
+
+
+def check_cdist_bool(metric, D_true):
+    dm = DistanceMetric.get_metric(metric)
+    D12 = dm.pairwise(X1_bool, X2_bool)
+    assert_array_almost_equal(D12, D_true)
+
+
+@pytest.mark.parametrize('metric', METRICS_DEFAULT_PARAMS)
+def test_pdist(metric):
+    argdict = METRICS_DEFAULT_PARAMS[metric]
+    keys = argdict.keys()
+    for vals in itertools.product(*argdict.values()):
+        kwargs = dict(zip(keys, vals))
+        D_true = cdist(X1, X1, metric, **kwargs)
+        check_pdist(metric, kwargs, D_true)
+
+
+@pytest.mark.parametrize('metric', BOOL_METRICS)
+def test_pdist_bool_metrics(metric):
+    D_true = cdist(X1_bool, X1_bool, metric)
+    check_pdist_bool(metric, D_true)
+
+
+def check_pdist(metric, kwargs, D_true):
+    dm = DistanceMetric.get_metric(metric, **kwargs)
+    D12 = dm.pairwise(X1)
+    assert_array_almost_equal(D12, D_true)
+
+
+def check_pdist_bool(metric, D_true):
+    dm = DistanceMetric.get_metric(metric)
+    D12 = dm.pairwise(X1_bool)
+    assert_array_almost_equal(D12, D_true)
+
+
+@pytest.mark.parametrize('metric', METRICS_DEFAULT_PARAMS)
+def test_pickle(metric):
+    argdict = METRICS_DEFAULT_PARAMS[metric]
+    keys = argdict.keys()
+    for vals in itertools.product(*argdict.values()):
+        kwargs = dict(zip(keys, vals))
+        check_pickle(metric, kwargs)
+
+
+@pytest.mark.parametrize('metric', BOOL_METRICS)
+def test_pickle_bool_metrics(metric):
+    dm = DistanceMetric.get_metric(metric)
+    D1 = dm.pairwise(X1_bool)
+    dm2 = pickle.loads(pickle.dumps(dm))
+    D2 = dm2.pairwise(X1_bool)
+    assert_array_almost_equal(D1, D2)
+
+
+def check_pickle(metric, kwargs):
+    dm = DistanceMetric.get_metric(metric, **kwargs)
+    D1 = dm.pairwise(X1)
+    dm2 = pickle.loads(pickle.dumps(dm))
+    D2 = dm2.pairwise(X1)
+    assert_array_almost_equal(D1, D2)
 
 
 def test_haversine_metric():
diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py
index e1b7cb1965987..46cddc711e769 100644
--- a/sklearn/neighbors/tests/test_kd_tree.py
+++ b/sklearn/neighbors/tests/test_kd_tree.py
@@ -1,5 +1,8 @@
 import numpy as np
 from numpy.testing import assert_array_almost_equal
+
+import pytest
+
 from sklearn.neighbors.kd_tree import (KDTree, NeighborsHeap,
                                        simultaneous_sort, kernel_norm,
                                        nodeheap_sort, DTYPE, ITYPE)
@@ -37,18 +40,17 @@ def check_neighbors(dualtree, breadth_first, k, metric, X, Y, kwargs):
     assert_array_almost_equal(dist1, dist2)
 
 
-def test_kd_tree_query():
+@pytest.mark.parametrize('metric', METRICS)
+@pytest.mark.parametrize('k', (1, 3, 5))
+@pytest.mark.parametrize('dualtree', (True, False))
+@pytest.mark.parametrize('breadth_first', (True, False))
+def test_kd_tree_query(metric, k, dualtree, breadth_first):
     rng = check_random_state(0)
     X = rng.random_sample((40, DIMENSION))
     Y = rng.random_sample((10, DIMENSION))
 
-    for (metric, kwargs) in METRICS.items():
-        for k in (1, 3, 5):
-            for dualtree in (True, False):
-                for breadth_first in (True, False):
-                    yield (check_neighbors,
-                           dualtree, breadth_first,
-                           k, metric, X, Y, kwargs)
+    kwargs = METRICS[metric]
+    check_neighbors(dualtree, breadth_first, k, metric, X, Y, kwargs)
 
 
 def test_kd_tree_query_radius(n_samples=100, n_features=10):
@@ -118,22 +120,24 @@ def check_results(kernel, h, atol, rtol, breadth_first, Y, kdt, dens_true):
                     rtol=max(rtol, 1e-7))
 
 
-def test_kd_tree_kde(n_samples=100, n_features=3):
+@pytest.mark.parametrize('kernel',
+                         ['gaussian', 'tophat', 'epanechnikov',
+                          'exponential', 'linear', 'cosine'])
+@pytest.mark.parametrize('h', [0.01, 0.1, 1])
+def test_kd_tree_kde(kernel, h):
+    n_samples, n_features = (100, 3)
     rng = check_random_state(0)
     X = rng.random_sample((n_samples, n_features))
     Y = rng.random_sample((n_samples, n_features))
     kdt = KDTree(X, leaf_size=10)
 
-    for kernel in ['gaussian', 'tophat', 'epanechnikov',
-                   'exponential', 'linear', 'cosine']:
-        for h in [0.01, 0.1, 1]:
-            dens_true = compute_kernel_slow(Y, X, kernel, h)
+    dens_true = compute_kernel_slow(Y, X, kernel, h)
 
-            for rtol in [0, 1E-5]:
-                for atol in [1E-6, 1E-2]:
-                    for breadth_first in (True, False):
-                        yield (check_results, kernel, h, atol, rtol,
-                               breadth_first, Y, kdt, dens_true)
+    for rtol in [0, 1E-5]:
+        for atol in [1E-6, 1E-2]:
+            for breadth_first in (True, False):
+                check_results(kernel, h, atol, rtol,
+                              breadth_first, Y, kdt, dens_true)
 
 
 def test_gaussian_kde(n_samples=1000):
@@ -153,7 +157,9 @@ def test_gaussian_kde(n_samples=1000):
         assert_array_almost_equal(dens_kdt, dens_gkde, decimal=3)
 
 
-def test_kd_tree_two_point(n_samples=100, n_features=3):
+@pytest.mark.parametrize('dualtree', (True, False))
+def test_kd_tree_two_point(dualtree):
+    n_samples, n_features = (100, 3)
     rng = check_random_state(0)
     X = rng.random_sample((n_samples, n_features))
     Y = rng.random_sample((n_samples, n_features))
@@ -163,15 +169,12 @@ def test_kd_tree_two_point(n_samples=100, n_features=3):
     D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
     counts_true = [(D <= ri).sum() for ri in r]
 
-    def check_two_point(r, dualtree):
-        counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree)
-        assert_array_almost_equal(counts, counts_true)
-
-    for dualtree in (True, False):
-        yield check_two_point, r, dualtree
+    counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree)
+    assert_array_almost_equal(counts, counts_true)
 
 
-def test_kd_tree_pickle():
+@pytest.mark.parametrize('protocol', (0, 1, 2))
+def test_kd_tree_pickle(protocol):
     import pickle
     rng = check_random_state(0)
     X = rng.random_sample((10, 3))
@@ -185,8 +188,7 @@ def check_pickle_protocol(protocol):
         assert_array_almost_equal(ind1, ind2)
         assert_array_almost_equal(dist1, dist2)
 
-    for protocol in (0, 1, 2):
-        yield check_pickle_protocol, protocol
+    check_pickle_protocol(protocol)
 
 
 def test_neighbors_heap(n_pts=5, n_nbrs=10):
diff --git a/sklearn/neighbors/tests/test_kde.py b/sklearn/neighbors/tests/test_kde.py
index 60f294a3df0a9..caffb662608e0 100644
--- a/sklearn/neighbors/tests/test_kde.py
+++ b/sklearn/neighbors/tests/test_kde.py
@@ -1,4 +1,7 @@
 import numpy as np
+
+import pytest
+
 from sklearn.utils.testing import (assert_allclose, assert_raises,
                                    assert_equal)
 from sklearn.neighbors import KernelDensity, KDTree, NearestNeighbors
@@ -40,21 +43,25 @@ def check_results(kernel, bandwidth, atol, rtol, X, Y, dens_true):
                     atol=atol, rtol=max(1E-7, rtol))
 
 
-def test_kernel_density(n_samples=100, n_features=3):
+@pytest.mark.parametrize(
+        'kernel',
+        ['gaussian', 'tophat', 'epanechnikov',
+         'exponential', 'linear', 'cosine'])
+@pytest.mark.parametrize('bandwidth', [0.01, 0.1, 1])
+def test_kernel_density(kernel, bandwidth):
+    n_samples, n_features = (100, 3)
+
     rng = np.random.RandomState(0)
     X = rng.randn(n_samples, n_features)
     Y = rng.randn(n_samples, n_features)
 
-    for kernel in ['gaussian', 'tophat', 'epanechnikov',
-                   'exponential', 'linear', 'cosine']:
-        for bandwidth in [0.01, 0.1, 1]:
-            dens_true = compute_kernel_slow(Y, X, kernel, bandwidth)
+    dens_true = compute_kernel_slow(Y, X, kernel, bandwidth)
 
-            for rtol in [0, 1E-5]:
-                for atol in [1E-6, 1E-2]:
-                    for breadth_first in (True, False):
-                        yield (check_results, kernel, bandwidth, atol, rtol,
-                               X, Y, dens_true)
+    for rtol in [0, 1E-5]:
+        for atol in [1E-6, 1E-2]:
+            for breadth_first in (True, False):
+                check_results(kernel, bandwidth, atol, rtol,
+                              X, Y, dens_true)
 
 
 def test_kernel_density_sampling(n_samples=100, n_features=3):
@@ -91,23 +98,24 @@ def test_kernel_density_sampling(n_samples=100, n_features=3):
     assert_equal(kde.sample().shape, (1, 1))
 
 
-def test_kde_algorithm_metric_choice():
+@pytest.mark.parametrize('algorithm', ['auto', 'ball_tree', 'kd_tree'])
+@pytest.mark.parametrize('metric',
+                         ['euclidean', 'minkowski', 'manhattan',
+                          'chebyshev', 'haversine'])
+def test_kde_algorithm_metric_choice(algorithm, metric):
     # Smoke test for various metrics and algorithms
     rng = np.random.RandomState(0)
     X = rng.randn(10, 2)    # 2 features required for haversine dist.
     Y = rng.randn(10, 2)
 
-    for algorithm in ['auto', 'ball_tree', 'kd_tree']:
-        for metric in ['euclidean', 'minkowski', 'manhattan',
-                       'chebyshev', 'haversine']:
-            if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
-                assert_raises(ValueError, KernelDensity,
-                              algorithm=algorithm, metric=metric)
-            else:
-                kde = KernelDensity(algorithm=algorithm, metric=metric)
-                kde.fit(X)
-                y_dens = kde.score_samples(Y)
-                assert_equal(y_dens.shape, Y.shape[:1])
+    if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
+        assert_raises(ValueError, KernelDensity,
+                      algorithm=algorithm, metric=metric)
+    else:
+        kde = KernelDensity(algorithm=algorithm, metric=metric)
+        kde.fit(X)
+        y_dens = kde.score_samples(Y)
+        assert_equal(y_dens.shape, Y.shape[:1])
 
 
 def test_kde_score(n_samples=100, n_features=3):
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index a95a906ad3cbd..e1acaa4c6f139 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -4,6 +4,8 @@
 from scipy.sparse import (bsr_matrix, coo_matrix, csc_matrix, csr_matrix,
                           dok_matrix, lil_matrix, issparse)
 
+import pytest
+
 from sklearn import metrics
 from sklearn import neighbors, datasets
 from sklearn.exceptions import DataConversionWarning
@@ -1260,63 +1262,57 @@ def test_include_self_neighbors_graph():
     assert_array_equal(rng_not_self, [[0., 1.], [1., 0.]])
 
 
-def test_same_knn_parallel():
+@pytest.mark.parametrize('algorithm', ALGORITHMS)
+def test_same_knn_parallel(algorithm):
     X, y = datasets.make_classification(n_samples=30, n_features=5,
                                         n_redundant=0, random_state=0)
     X_train, X_test, y_train, y_test = train_test_split(X, y)
 
-    def check_same_knn_parallel(algorithm):
-        clf = neighbors.KNeighborsClassifier(n_neighbors=3,
-                                             algorithm=algorithm)
-        clf.fit(X_train, y_train)
-        y = clf.predict(X_test)
-        dist, ind = clf.kneighbors(X_test)
-        graph = clf.kneighbors_graph(X_test, mode='distance').toarray()
-
-        clf.set_params(n_jobs=3)
-        clf.fit(X_train, y_train)
-        y_parallel = clf.predict(X_test)
-        dist_parallel, ind_parallel = clf.kneighbors(X_test)
-        graph_parallel = \
-            clf.kneighbors_graph(X_test, mode='distance').toarray()
-
-        assert_array_equal(y, y_parallel)
-        assert_array_almost_equal(dist, dist_parallel)
-        assert_array_equal(ind, ind_parallel)
-        assert_array_almost_equal(graph, graph_parallel)
+    clf = neighbors.KNeighborsClassifier(n_neighbors=3,
+                                         algorithm=algorithm)
+    clf.fit(X_train, y_train)
+    y = clf.predict(X_test)
+    dist, ind = clf.kneighbors(X_test)
+    graph = clf.kneighbors_graph(X_test, mode='distance').toarray()
 
-    for algorithm in ALGORITHMS:
-        yield check_same_knn_parallel, algorithm
+    clf.set_params(n_jobs=3)
+    clf.fit(X_train, y_train)
+    y_parallel = clf.predict(X_test)
+    dist_parallel, ind_parallel = clf.kneighbors(X_test)
+    graph_parallel = \
+        clf.kneighbors_graph(X_test, mode='distance').toarray()
+
+    assert_array_equal(y, y_parallel)
+    assert_array_almost_equal(dist, dist_parallel)
+    assert_array_equal(ind, ind_parallel)
+    assert_array_almost_equal(graph, graph_parallel)
 
 
-def test_same_radius_neighbors_parallel():
+@pytest.mark.parametrize('algorithm', ALGORITHMS)
+def test_same_radius_neighbors_parallel(algorithm):
     X, y = datasets.make_classification(n_samples=30, n_features=5,
                                         n_redundant=0, random_state=0)
     X_train, X_test, y_train, y_test = train_test_split(X, y)
 
-    def check_same_radius_neighbors_parallel(algorithm):
-        clf = neighbors.RadiusNeighborsClassifier(radius=10,
-                                                  algorithm=algorithm)
-        clf.fit(X_train, y_train)
-        y = clf.predict(X_test)
-        dist, ind = clf.radius_neighbors(X_test)
-        graph = clf.radius_neighbors_graph(X_test, mode='distance').toarray()
-
-        clf.set_params(n_jobs=3)
-        clf.fit(X_train, y_train)
-        y_parallel = clf.predict(X_test)
-        dist_parallel, ind_parallel = clf.radius_neighbors(X_test)
-        graph_parallel = \
-            clf.radius_neighbors_graph(X_test, mode='distance').toarray()
-
-        assert_array_equal(y, y_parallel)
-        for i in range(len(dist)):
-            assert_array_almost_equal(dist[i], dist_parallel[i])
-            assert_array_equal(ind[i], ind_parallel[i])
-        assert_array_almost_equal(graph, graph_parallel)
-
-    for algorithm in ALGORITHMS:
-        yield check_same_radius_neighbors_parallel, algorithm
+    clf = neighbors.RadiusNeighborsClassifier(radius=10,
+                                              algorithm=algorithm)
+    clf.fit(X_train, y_train)
+    y = clf.predict(X_test)
+    dist, ind = clf.radius_neighbors(X_test)
+    graph = clf.radius_neighbors_graph(X_test, mode='distance').toarray()
+
+    clf.set_params(n_jobs=3)
+    clf.fit(X_train, y_train)
+    y_parallel = clf.predict(X_test)
+    dist_parallel, ind_parallel = clf.radius_neighbors(X_test)
+    graph_parallel = \
+        clf.radius_neighbors_graph(X_test, mode='distance').toarray()
+
+    assert_array_equal(y, y_parallel)
+    for i in range(len(dist)):
+        assert_array_almost_equal(dist[i], dist_parallel[i])
+        assert_array_equal(ind[i], ind_parallel[i])
+    assert_array_almost_equal(graph, graph_parallel)
 
 
 def test_dtype_convert():
diff --git a/sklearn/neighbors/tests/test_quad_tree.py b/sklearn/neighbors/tests/test_quad_tree.py
index 6cfa4bcc562e2..156bfc232a55d 100644
--- a/sklearn/neighbors/tests/test_quad_tree.py
+++ b/sklearn/neighbors/tests/test_quad_tree.py
@@ -1,5 +1,8 @@
 import pickle
 import numpy as np
+
+import pytest
+
 from sklearn.neighbors.quad_tree import _QuadTree
 from sklearn.utils import check_random_state
 
@@ -58,50 +61,43 @@ def test_quadtree_similar_point():
         tree._check_coherence()
 
 
-def test_quad_tree_pickle():
+@pytest.mark.parametrize('n_dimensions', (2, 3))
+@pytest.mark.parametrize('protocol', (0, 1, 2))
+def test_quad_tree_pickle(n_dimensions, protocol):
     rng = check_random_state(0)
 
-    for n_dimensions in (2, 3):
-        X = rng.random_sample((10, n_dimensions))
-
-        tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
-        tree.build_tree(X)
+    X = rng.random_sample((10, n_dimensions))
 
-        def check_pickle_protocol(protocol):
-            s = pickle.dumps(tree, protocol=protocol)
-            bt2 = pickle.loads(s)
+    tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
+    tree.build_tree(X)
 
-            for x in X:
-                cell_x_tree = tree.get_cell(x)
-                cell_x_bt2 = bt2.get_cell(x)
-                assert cell_x_tree == cell_x_bt2
+    s = pickle.dumps(tree, protocol=protocol)
+    bt2 = pickle.loads(s)
 
-        for protocol in (0, 1, 2):
-            yield check_pickle_protocol, protocol
+    for x in X:
+        cell_x_tree = tree.get_cell(x)
+        cell_x_bt2 = bt2.get_cell(x)
+        assert cell_x_tree == cell_x_bt2
 
 
-def test_qt_insert_duplicate():
+@pytest.mark.parametrize('n_dimensions', (2, 3))
+def test_qt_insert_duplicate(n_dimensions):
     rng = check_random_state(0)
 
-    def check_insert_duplicate(n_dimensions=2):
-
-        X = rng.random_sample((10, n_dimensions))
-        Xd = np.r_[X, X[:5]]
-        tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
-        tree.build_tree(Xd)
-
-        cumulative_size = tree.cumulative_size
-        leafs = tree.leafs
+    X = rng.random_sample((10, n_dimensions))
+    Xd = np.r_[X, X[:5]]
+    tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
+    tree.build_tree(Xd)
 
-        # Assert that the first 5 are indeed duplicated and that the next
-        # ones are single point leaf
-        for i, x in enumerate(X):
-            cell_id = tree.get_cell(x)
-            assert leafs[cell_id]
-            assert cumulative_size[cell_id] == 1 + (i < 5)
+    cumulative_size = tree.cumulative_size
+    leafs = tree.leafs
 
-    for n_dimensions in (2, 3):
-        yield check_insert_duplicate, n_dimensions
+    # Assert that the first 5 are indeed duplicated and that the next
+    # ones are single point leaf
+    for i, x in enumerate(X):
+        cell_id = tree.get_cell(x)
+        assert leafs[cell_id]
+        assert cumulative_size[cell_id] == 1 + (i < 5)
 
 
 def test_summarize():
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index 14788b14b5218..faa0cc3ce275b 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -482,7 +482,7 @@ def test_label_binarize_binary():
     neg_label = -1
     expected = np.array([[2, -1], [-1, 2], [2, -1]])[:, 1].reshape((-1, 1))
 
-    yield check_binarized_results, y, classes, pos_label, neg_label, expected
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
 
     # Binary case where sparse_output = True will not result in a ValueError
     y = [0, 1, 0]
@@ -491,7 +491,7 @@ def test_label_binarize_binary():
     neg_label = 0
     expected = np.array([[3, 0], [0, 3], [3, 0]])[:, 1].reshape((-1, 1))
 
-    yield check_binarized_results, y, classes, pos_label, neg_label, expected
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
 
 
 def test_label_binarize_multiclass():
@@ -501,7 +501,7 @@ def test_label_binarize_multiclass():
     neg_label = 0
     expected = 2 * np.eye(3)
 
-    yield check_binarized_results, y, classes, pos_label, neg_label, expected
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
 
     assert_raises(ValueError, label_binarize, y, classes, neg_label=-1,
                   pos_label=pos_label, sparse_output=True)
@@ -518,8 +518,8 @@ def test_label_binarize_multilabel():
                                       dok_matrix, lil_matrix]]
 
     for y in [y_ind] + y_sparse:
-        yield (check_binarized_results, y, classes, pos_label, neg_label,
-               expected)
+        check_binarized_results(y, classes, pos_label, neg_label,
+                                expected)
 
     assert_raises(ValueError, label_binarize, y, classes, neg_label=-1,
                   pos_label=pos_label, sparse_output=True)
diff --git a/sklearn/svm/tests/test_bounds.py b/sklearn/svm/tests/test_bounds.py
index e46dbb92df44a..d02c53b05d8b7 100644
--- a/sklearn/svm/tests/test_bounds.py
+++ b/sklearn/svm/tests/test_bounds.py
@@ -1,6 +1,8 @@
 import numpy as np
 from scipy import sparse as sp
 
+import pytest
+
 from sklearn.svm.bounds import l1_min_c
 from sklearn.svm import LinearSVC
 from sklearn.linear_model.logistic import LogisticRegression
@@ -16,25 +18,24 @@
 Y2 = [2, 1, 0, 0]
 
 
-def test_l1_min_c():
-    losses = ['squared_hinge', 'log']
+@pytest.mark.parametrize('loss', ['squared_hinge', 'log'])
+@pytest.mark.parametrize('X_label', ['sparse', 'dense'])
+@pytest.mark.parametrize('Y_label', ['two-classes', 'multi-class'])
+@pytest.mark.parametrize('intercept_label', ['no-intercept', 'fit-intercept'])
+def test_l1_min_c(loss, X_label, Y_label, intercept_label):
     Xs = {'sparse': sparse_X, 'dense': dense_X}
     Ys = {'two-classes': Y1, 'multi-class': Y2}
     intercepts = {'no-intercept': {'fit_intercept': False},
                   'fit-intercept': {'fit_intercept': True,
                                     'intercept_scaling': 10}}
 
-    for loss in losses:
-        for X_label, X in Xs.items():
-            for Y_label, Y in Ys.items():
-                for intercept_label, intercept_params in intercepts.items():
-                    check = lambda: check_l1_min_c(X, Y, loss,
-                                                   **intercept_params)
-                    check.description = ('Test l1_min_c loss=%r %s %s %s' %
-                                         (loss, X_label, Y_label,
-                                          intercept_label))
-                    yield check
+    X = Xs[X_label]
+    Y = Ys[Y_label]
+    intercept_params = intercepts[intercept_label]
+    check_l1_min_c(X, Y, loss, **intercept_params)
+
 
+def test_l1_min_c_l2_loss():
     # loss='l2' should raise ValueError
     assert_raise_message(ValueError, "loss type not in",
                          l1_min_c, dense_X, Y1, "l2")
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 2f15163d09dda..6b090ce4684f9 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -4,6 +4,7 @@
 from io import BytesIO
 import numpy as np
 import scipy.sparse
+import pytest
 
 from sklearn.datasets import load_digits, load_iris
 
@@ -177,51 +178,56 @@ def test_discrete_prior():
                                   clf.class_log_prior_, 8)
 
 
-def test_mnnb():
+@pytest.mark.parametrize('kind', ('dense', 'sparse'))
+def test_mnnb(kind):
     # Test Multinomial Naive Bayes classification.
     # This checks that MultinomialNB implements fit and predict and returns
     # correct values for a simple toy dataset.
 
-    for X in [X2, scipy.sparse.csr_matrix(X2)]:
-        # Check the ability to predict the learning set.
-        clf = MultinomialNB()
-        assert_raises(ValueError, clf.fit, -X, y2)
-        y_pred = clf.fit(X, y2).predict(X)
+    if kind == 'dense':
+        X = X2
+    elif kind == 'sparse':
+        X = scipy.sparse.csr_matrix(X2)
 
-        assert_array_equal(y_pred, y2)
+    # Check the ability to predict the learning set.
+    clf = MultinomialNB()
+    assert_raises(ValueError, clf.fit, -X, y2)
+    y_pred = clf.fit(X, y2).predict(X)
+
+    assert_array_equal(y_pred, y2)
 
-        # Verify that np.log(clf.predict_proba(X)) gives the same results as
-        # clf.predict_log_proba(X)
-        y_pred_proba = clf.predict_proba(X)
-        y_pred_log_proba = clf.predict_log_proba(X)
-        assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)
+    # Verify that np.log(clf.predict_proba(X)) gives the same results as
+    # clf.predict_log_proba(X)
+    y_pred_proba = clf.predict_proba(X)
+    y_pred_log_proba = clf.predict_log_proba(X)
+    assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)
 
-        # Check that incremental fitting yields the same results
-        clf2 = MultinomialNB()
-        clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2))
-        clf2.partial_fit(X[2:5], y2[2:5])
-        clf2.partial_fit(X[5:], y2[5:])
+    # Check that incremental fitting yields the same results
+    clf2 = MultinomialNB()
+    clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2))
+    clf2.partial_fit(X[2:5], y2[2:5])
+    clf2.partial_fit(X[5:], y2[5:])
 
-        y_pred2 = clf2.predict(X)
-        assert_array_equal(y_pred2, y2)
+    y_pred2 = clf2.predict(X)
+    assert_array_equal(y_pred2, y2)
 
-        y_pred_proba2 = clf2.predict_proba(X)
-        y_pred_log_proba2 = clf2.predict_log_proba(X)
-        assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8)
-        assert_array_almost_equal(y_pred_proba2, y_pred_proba)
-        assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba)
+    y_pred_proba2 = clf2.predict_proba(X)
+    y_pred_log_proba2 = clf2.predict_log_proba(X)
+    assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8)
+    assert_array_almost_equal(y_pred_proba2, y_pred_proba)
+    assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba)
 
-        # Partial fit on the whole data at once should be the same as fit too
-        clf3 = MultinomialNB()
-        clf3.partial_fit(X, y2, classes=np.unique(y2))
+    # Partial fit on the whole data at once should be the same as fit too
+    clf3 = MultinomialNB()
+    clf3.partial_fit(X, y2, classes=np.unique(y2))
 
-        y_pred3 = clf3.predict(X)
-        assert_array_equal(y_pred3, y2)
-        y_pred_proba3 = clf3.predict_proba(X)
-        y_pred_log_proba3 = clf3.predict_log_proba(X)
-        assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8)
-        assert_array_almost_equal(y_pred_proba3, y_pred_proba)
-        assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)
+    y_pred3 = clf3.predict(X)
+    assert_array_equal(y_pred3, y2)
+    y_pred_proba3 = clf3.predict_proba(X)
+    y_pred_log_proba3 = clf3.predict_log_proba(X)
+    assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8)
+    assert_array_almost_equal(y_pred_proba3, y_pred_proba)
+    assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)
 
 
 def check_partial_fit(cls):
@@ -240,9 +246,9 @@ def check_partial_fit(cls):
     assert_array_equal(clf1.feature_count_, clf3.feature_count_)
 
 
-def test_discretenb_partial_fit():
-    for cls in [MultinomialNB, BernoulliNB]:
-        yield check_partial_fit, cls
+@pytest.mark.parametrize("cls", [MultinomialNB, BernoulliNB])
+def test_discretenb_partial_fit(cls):
+    check_partial_fit(cls)
 
 
 def test_gnb_partial_fit():
@@ -259,62 +265,63 @@ def test_gnb_partial_fit():
     assert_array_almost_equal(clf.class_prior_, clf_pf2.class_prior_)
 
 
-def test_discretenb_pickle():
+@pytest.mark.parametrize('cls', [BernoulliNB, MultinomialNB, GaussianNB])
+def test_discretenb_pickle(cls):
     # Test picklability of discrete naive Bayes classifiers
 
-    for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
-        clf = cls().fit(X2, y2)
-        y_pred = clf.predict(X2)
+    clf = cls().fit(X2, y2)
+    y_pred = clf.predict(X2)
 
-        store = BytesIO()
-        pickle.dump(clf, store)
-        clf = pickle.load(BytesIO(store.getvalue()))
+    store = BytesIO()
+    pickle.dump(clf, store)
+    clf = pickle.load(BytesIO(store.getvalue()))
 
-        assert_array_equal(y_pred, clf.predict(X2))
+    assert_array_equal(y_pred, clf.predict(X2))
 
-        if cls is not GaussianNB:
-            # TODO re-enable me when partial_fit is implemented for GaussianNB
+    if cls is not GaussianNB:
+        # TODO re-enable me when partial_fit is implemented for GaussianNB
 
-            # Test pickling of estimator trained with partial_fit
-            clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2))
-            clf2.partial_fit(X2[3:], y2[3:])
-            store = BytesIO()
-            pickle.dump(clf2, store)
-            clf2 = pickle.load(BytesIO(store.getvalue()))
-            assert_array_equal(y_pred, clf2.predict(X2))
+        # Test pickling of estimator trained with partial_fit
+        clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2))
+        clf2.partial_fit(X2[3:], y2[3:])
+        store = BytesIO()
+        pickle.dump(clf2, store)
+        clf2 = pickle.load(BytesIO(store.getvalue()))
+        assert_array_equal(y_pred, clf2.predict(X2))
 
 
-def test_input_check_fit():
+@pytest.mark.parametrize('cls', [BernoulliNB, MultinomialNB, GaussianNB])
+def test_input_check_fit(cls):
     # Test input checks for the fit method
-    for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
-        # check shape consistency for number of samples at fit time
-        assert_raises(ValueError, cls().fit, X2, y2[:-1])
 
-        # check shape consistency for number of input features at predict time
-        clf = cls().fit(X2, y2)
-        assert_raises(ValueError, clf.predict, X2[:, :-1])
+    # check shape consistency for number of samples at fit time
+    assert_raises(ValueError, cls().fit, X2, y2[:-1])
 
+    # check shape consistency for number of input features at predict time
+    clf = cls().fit(X2, y2)
+    assert_raises(ValueError, clf.predict, X2[:, :-1])
 
-def test_input_check_partial_fit():
-    for cls in [BernoulliNB, MultinomialNB]:
-        # check shape consistency
-        assert_raises(ValueError, cls().partial_fit, X2, y2[:-1],
-                      classes=np.unique(y2))
 
-        # classes is required for first call to partial fit
-        assert_raises(ValueError, cls().partial_fit, X2, y2)
+@pytest.mark.parametrize('cls', [BernoulliNB, MultinomialNB])
+def test_input_check_partial_fit(cls):
+    # check shape consistency
+    assert_raises(ValueError, cls().partial_fit, X2, y2[:-1],
+                  classes=np.unique(y2))
+
+    # classes is required for first call to partial fit
+    assert_raises(ValueError, cls().partial_fit, X2, y2)
 
-        # check consistency of consecutive classes values
-        clf = cls()
-        clf.partial_fit(X2, y2, classes=np.unique(y2))
-        assert_raises(ValueError, clf.partial_fit, X2, y2,
-                      classes=np.arange(42))
+    # check consistency of consecutive classes values
+    clf = cls()
+    clf.partial_fit(X2, y2, classes=np.unique(y2))
+    assert_raises(ValueError, clf.partial_fit, X2, y2,
+                  classes=np.arange(42))
 
-        # check consistency of input shape for partial_fit
-        assert_raises(ValueError, clf.partial_fit, X2[:, :-1], y2)
+    # check consistency of input shape for partial_fit
+    assert_raises(ValueError, clf.partial_fit, X2[:, :-1], y2)
 
-        # check consistency of input shape for predict
-        assert_raises(ValueError, clf.predict, X2[:, :-1])
+    # check consistency of input shape for predict
+    assert_raises(ValueError, clf.predict, X2[:, :-1])
 
 
 def test_discretenb_predict_proba():
@@ -348,34 +355,35 @@ def test_discretenb_predict_proba():
         assert_almost_equal(np.sum(np.exp(clf.intercept_)), 1)
 
 
-def test_discretenb_uniform_prior():
+@pytest.mark.parametrize('cls', [BernoulliNB, MultinomialNB])
+def test_discretenb_uniform_prior(cls):
     # Test whether discrete NB classes fit a uniform prior
     # when fit_prior=False and class_prior=None
 
-    for cls in [BernoulliNB, MultinomialNB]:
-        clf = cls()
-        clf.set_params(fit_prior=False)
-        clf.fit([[0], [0], [1]], [0, 0, 1])
-        prior = np.exp(clf.class_log_prior_)
-        assert_array_almost_equal(prior, np.array([.5, .5]))
+    clf = cls()
+    clf.set_params(fit_prior=False)
+    clf.fit([[0], [0], [1]], [0, 0, 1])
+    prior = np.exp(clf.class_log_prior_)
+    assert_array_almost_equal(prior, np.array([.5, .5]))
 
 
-def test_discretenb_provide_prior():
+@pytest.mark.parametrize('cls', [BernoulliNB, MultinomialNB])
+def test_discretenb_provide_prior(cls):
     # Test whether discrete NB classes use provided prior
 
-    for cls in [BernoulliNB, MultinomialNB]:
-        clf = cls(class_prior=[0.5, 0.5])
-        clf.fit([[0], [0], [1]], [0, 0, 1])
-        prior = np.exp(clf.class_log_prior_)
-        assert_array_almost_equal(prior, np.array([.5, .5]))
+    clf = cls(class_prior=[0.5, 0.5])
+    clf.fit([[0], [0], [1]], [0, 0, 1])
+    prior = np.exp(clf.class_log_prior_)
+    assert_array_almost_equal(prior, np.array([.5, .5]))
 
-        # Inconsistent number of classes with prior
-        assert_raises(ValueError, clf.fit, [[0], [1], [2]], [0, 1, 2])
-        assert_raises(ValueError, clf.partial_fit, [[0], [1]], [0, 1],
-                      classes=[0, 1, 1])
+    # Inconsistent number of classes with prior
+    assert_raises(ValueError, clf.fit, [[0], [1], [2]], [0, 1, 2])
+    assert_raises(ValueError, clf.partial_fit, [[0], [1]], [0, 1],
+                  classes=[0, 1, 1])
 
 
-def test_discretenb_provide_prior_with_partial_fit():
+@pytest.mark.parametrize('cls', [BernoulliNB, MultinomialNB])
+def test_discretenb_provide_prior_with_partial_fit(cls):
     # Test whether discrete NB classes use provided prior
     # when using partial_fit
 
@@ -383,22 +391,21 @@ def test_discretenb_provide_prior_with_partial_fit():
     iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split(
         iris.data, iris.target, test_size=0.4, random_state=415)
 
-    for cls in [BernoulliNB, MultinomialNB]:
-        for prior in [None, [0.3, 0.3, 0.4]]:
-            clf_full = cls(class_prior=prior)
-            clf_full.fit(iris.data, iris.target)
-            clf_partial = cls(class_prior=prior)
-            clf_partial.partial_fit(iris_data1, iris_target1,
-                                    classes=[0, 1, 2])
-            clf_partial.partial_fit(iris_data2, iris_target2)
-            assert_array_almost_equal(clf_full.class_log_prior_,
-                                      clf_partial.class_log_prior_)
-
-
-def test_sample_weight_multiclass():
-    for cls in [BernoulliNB, MultinomialNB]:
-        # check shape consistency for number of samples at fit time
-        yield check_sample_weight_multiclass, cls
+    for prior in [None, [0.3, 0.3, 0.4]]:
+        clf_full = cls(class_prior=prior)
+        clf_full.fit(iris.data, iris.target)
+        clf_partial = cls(class_prior=prior)
+        clf_partial.partial_fit(iris_data1, iris_target1,
+                                classes=[0, 1, 2])
+        clf_partial.partial_fit(iris_data2, iris_target2)
+        assert_array_almost_equal(clf_full.class_log_prior_,
+                                  clf_partial.class_log_prior_)
+
+
+@pytest.mark.parametrize('cls', [BernoulliNB, MultinomialNB])
+def test_sample_weight_multiclass(cls):
+    # check shape consistency for number of samples at fit time
+    check_sample_weight_multiclass(cls)
 
 
 def check_sample_weight_multiclass(cls):
diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py
index dcbe97c7d6d7f..975922a341163 100644
--- a/sklearn/tests/test_random_projection.py
+++ b/sklearn/tests/test_random_projection.py
@@ -1,7 +1,10 @@
 from __future__ import division
 
+import functools
+
 import numpy as np
 import scipy.sparse as sp
+import pytest
 
 from sklearn.metrics import euclidean_distances
 
@@ -113,21 +116,21 @@ def check_input_with_sparse_random_matrix(random_matrix):
                       random_matrix, n_components, n_features, density=density)
 
 
-def test_basic_property_of_random_matrix():
+@pytest.mark.parametrize("random_matrix", all_random_matrix)
+def test_basic_property_of_random_matrix(random_matrix):
     # Check basic properties of random matrix generation
-    for random_matrix in all_random_matrix:
-        yield check_input_size_random_matrix, random_matrix
-        yield check_size_generated, random_matrix
-        yield check_zero_mean_and_unit_norm, random_matrix
-
-    for random_matrix in all_sparse_random_matrix:
-        yield check_input_with_sparse_random_matrix, random_matrix
-
-        random_matrix_dense = \
-            lambda n_components, n_features, random_state: random_matrix(
-                n_components, n_features, random_state=random_state,
-                density=1.0)
-        yield check_zero_mean_and_unit_norm, random_matrix_dense
+    check_input_size_random_matrix(random_matrix)
+    check_size_generated(random_matrix)
+    check_zero_mean_and_unit_norm(random_matrix)
+
+
+@pytest.mark.parametrize("random_matrix", all_sparse_random_matrix)
+def test_basic_property_of_sparse_random_matrix(random_matrix):
+    check_input_with_sparse_random_matrix(random_matrix)
+
+    random_matrix_dense = functools.partial(random_matrix, density=1.0)
+
+    check_zero_mean_and_unit_norm(random_matrix_dense)
 
 
 def test_gaussian_random_matrix():
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index f85493543b1ef..bb117d8a29863 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -7,6 +7,7 @@
 from itertools import product
 import struct
 
+import pytest
 import numpy as np
 from scipy.sparse import csc_matrix
 from scipy.sparse import csr_matrix
@@ -701,14 +702,14 @@ def check_min_weight_fraction_leaf(name, datasets, sparse=False):
                 name, est.min_weight_fraction_leaf))
 
 
-def test_min_weight_fraction_leaf():
-    # Check on dense input
-    for name in ALL_TREES:
-        yield check_min_weight_fraction_leaf, name, "iris"
+@pytest.mark.parametrize("name", ALL_TREES)
+def test_min_weight_fraction_leaf_on_dense_input(name):
+    check_min_weight_fraction_leaf(name, "iris")
 
-    # Check on sparse input
-    for name in SPARSE_TREES:
-        yield check_min_weight_fraction_leaf, name, "multilabel", True
+
+@pytest.mark.parametrize("name", SPARSE_TREES)
+def test_min_weight_fraction_leaf_on_sparse_input(name):
+    check_min_weight_fraction_leaf(name, "multilabel", True)
 
 
 def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets,
@@ -775,16 +776,15 @@ def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets,
                                           est.min_samples_leaf))
 
 
-def test_min_weight_fraction_leaf_with_min_samples_leaf():
-    # Check on dense input
-    for name in ALL_TREES:
-        yield (check_min_weight_fraction_leaf_with_min_samples_leaf,
-               name, "iris")
+@pytest.mark.parametrize("name", ALL_TREES)
+def test_min_weight_fraction_leaf_with_min_samples_leaf_on_dense_input(name):
+    check_min_weight_fraction_leaf_with_min_samples_leaf(name, "iris")
+
 
-    # Check on sparse input
-    for name in SPARSE_TREES:
-        yield (check_min_weight_fraction_leaf_with_min_samples_leaf,
-               name, "multilabel", True)
+@pytest.mark.parametrize("name", SPARSE_TREES)
+def test_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input(name):
+    check_min_weight_fraction_leaf_with_min_samples_leaf(
+            name, "multilabel", True)
 
 
 def test_min_impurity_split():
@@ -1178,9 +1178,9 @@ def check_class_weights(name):
     assert_almost_equal(clf1.feature_importances_, clf2.feature_importances_)
 
 
-def test_class_weights():
-    for name in CLF_TREES:
-        yield check_class_weights, name
+@pytest.mark.parametrize("name", CLF_TREES)
+def test_class_weights(name):
+    check_class_weights(name)
 
 
 def check_class_weight_errors(name):
@@ -1202,9 +1202,9 @@ def check_class_weight_errors(name):
     assert_raises(ValueError, clf.fit, X, _y)
 
 
-def test_class_weight_errors():
-    for name in CLF_TREES:
-        yield check_class_weight_errors, name
+@pytest.mark.parametrize("name", CLF_TREES)
+def test_class_weight_errors(name):
+    check_class_weight_errors(name)
 
 
 def test_max_leaf_nodes():
@@ -1364,20 +1364,25 @@ def check_sparse_input(tree, dataset, max_depth=None):
                                           y_log_proba)
 
 
-def test_sparse_input():
-    for tree_type, dataset in product(SPARSE_TREES, ("clf_small", "toy",
-                                                     "digits", "multilabel",
-                                                     "sparse-pos",
-                                                     "sparse-neg",
-                                                     "sparse-mix", "zeros")):
-        max_depth = 3 if dataset == "digits" else None
-        yield (check_sparse_input, tree_type, dataset, max_depth)
+@pytest.mark.parametrize("tree_type", SPARSE_TREES)
+@pytest.mark.parametrize(
+        "dataset",
+        ("clf_small", "toy", "digits", "multilabel",
+         "sparse-pos", "sparse-neg", "sparse-mix",
+         "zeros")
+)
+def test_sparse_input(tree_type, dataset):
+    max_depth = 3 if dataset == "digits" else None
+    check_sparse_input(tree_type, dataset, max_depth)
 
+
+@pytest.mark.parametrize("tree_type",
+                         set(SPARSE_TREES).intersection(REG_TREES))
+@pytest.mark.parametrize("dataset", ["boston", "reg_small"])
+def test_sparse_input_reg_trees(tree_type, dataset):
     # Due to numerical instability of MSE and too strict test, we limit the
     # maximal depth
-    for tree_type, dataset in product(SPARSE_TREES, ["boston", "reg_small"]):
-        if tree_type in REG_TREES:
-            yield (check_sparse_input, tree_type, dataset, 2)
+    check_sparse_input(tree_type, dataset, 2)
 
 
 def check_sparse_parameters(tree, dataset):
@@ -1424,13 +1429,6 @@ def check_sparse_parameters(tree, dataset):
     assert_array_almost_equal(s.predict(X), d.predict(X))
 
 
-def test_sparse_parameters():
-    for tree_type, dataset in product(SPARSE_TREES, ["sparse-pos",
-                                                     "sparse-neg",
-                                                     "sparse-mix", "zeros"]):
-        yield (check_sparse_parameters, tree_type, dataset)
-
-
 def check_sparse_criterion(tree, dataset):
     TreeEstimator = ALL_TREES[tree]
     X = DATASETS[dataset]["X"]
@@ -1451,11 +1449,13 @@ def check_sparse_criterion(tree, dataset):
         assert_array_almost_equal(s.predict(X), d.predict(X))
 
 
-def test_sparse_criterion():
-    for tree_type, dataset in product(SPARSE_TREES, ["sparse-pos",
-                                                     "sparse-neg",
-                                                     "sparse-mix", "zeros"]):
-        yield (check_sparse_criterion, tree_type, dataset)
+@pytest.mark.parametrize("tree_type", SPARSE_TREES)
+@pytest.mark.parametrize("dataset",
+                         ["sparse-pos", "sparse-neg", "sparse-mix", "zeros"])
+@pytest.mark.parametrize("check",
+                         [check_sparse_parameters, check_sparse_criterion])
+def test_sparse(tree_type, dataset, check):
+    check(tree_type, dataset)
 
 
 def check_explicit_sparse_zeros(tree, max_depth=3,
@@ -1527,9 +1527,9 @@ def check_explicit_sparse_zeros(tree, max_depth=3,
                                       d.predict_proba(X2))
 
 
-def test_explicit_sparse_zeros():
-    for tree_type in SPARSE_TREES:
-        yield (check_explicit_sparse_zeros, tree_type)
+@pytest.mark.parametrize("tree_type", SPARSE_TREES)
+def test_explicit_sparse_zeros(tree_type):
+    check_explicit_sparse_zeros(tree_type)
 
 
 @ignore_warnings
@@ -1547,10 +1547,10 @@ def check_raise_error_on_1d_input(name):
     assert_raises(ValueError, est.predict, [X])
 
 
-@ignore_warnings
-def test_1d_input():
-    for name in ALL_TREES:
-        yield check_raise_error_on_1d_input, name
+@pytest.mark.parametrize("name", ALL_TREES)
+def test_1d_input(name):
+    with ignore_warnings():
+        check_raise_error_on_1d_input(name)
 
 
 def _check_min_weight_leaf_split_level(TreeEstimator, X, y, sample_weight):
@@ -1576,9 +1576,9 @@ def check_min_weight_leaf_split_level(name):
                                            sample_weight)
 
 
-def test_min_weight_leaf_split_level():
-    for name in ALL_TREES:
-        yield check_min_weight_leaf_split_level, name
+@pytest.mark.parametrize("name", ALL_TREES)
+def test_min_weight_leaf_split_level(name):
+    check_min_weight_leaf_split_level(name)
 
 
 def check_public_apply(name):
@@ -1599,12 +1599,14 @@ def check_public_apply_sparse(name):
                        est.tree_.apply(X_small32))
 
 
-def test_public_apply():
-    for name in ALL_TREES:
-        yield (check_public_apply, name)
+@pytest.mark.parametrize("name", ALL_TREES)
+def test_public_apply_all_trees(name):
+    check_public_apply(name)
 
-    for name in SPARSE_TREES:
-        yield (check_public_apply_sparse, name)
+
+@pytest.mark.parametrize("name", SPARSE_TREES)
+def test_public_apply_sparse_trees(name):
+    check_public_apply_sparse(name)
 
 
 def check_presort_sparse(est, X, y):
@@ -1623,19 +1625,18 @@ def test_presort_sparse():
     y = y[:, 0]
 
     for est, sparse_matrix in product(ests, sparse_matrices):
-        yield check_presort_sparse, est, sparse_matrix(X), y
+        check_presort_sparse(est, sparse_matrix(X), y)
 
 
-def test_invalid_presort():
-    classes = (DecisionTreeRegressor, DecisionTreeClassifier)
+@pytest.mark.parametrize('cls',
+                         (DecisionTreeRegressor, DecisionTreeClassifier))
+def test_invalid_presort(cls):
     allowed_presort = ('auto', True, False)
     invalid_presort = 'invalid'
     msg = ("'presort' should be in {}. "
            "Got {!r} instead.".format(allowed_presort, invalid_presort))
-    for cls in classes:
-        est = cls(presort=invalid_presort)
-        assert_raise_message(ValueError, msg,
-                             est.fit, X, y)
+    est = cls(presort=invalid_presort)
+    assert_raise_message(ValueError, msg, est.fit, X, y)
 
 
 def test_decision_path_hardcoded():
@@ -1674,9 +1675,9 @@ def check_decision_path(name):
     assert_less_equal(est.tree_.max_depth, max_depth)
 
 
-def test_decision_path():
-    for name in ALL_TREES:
-        yield (check_decision_path, name)
+@pytest.mark.parametrize("name", ALL_TREES)
+def test_decision_path(name):
+    check_decision_path(name)
 
 
 def check_no_sparse_y_support(name):
@@ -1685,10 +1686,10 @@ def check_no_sparse_y_support(name):
     assert_raises(TypeError, TreeEstimator(random_state=0).fit, X, y)
 
 
-def test_no_sparse_y_support():
+@pytest.mark.parametrize("name", ALL_TREES)
+def test_no_sparse_y_support(name):
     # Currently we don't support sparse y
-    for name in ALL_TREES:
-        yield (check_no_sparse_y_support, name)
+    check_no_sparse_y_support(name)
 
 
 def test_mae():
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index f53b814c70084..d89e2a1aa1223 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -9,6 +9,8 @@
 from scipy import linalg
 from scipy import stats
 
+import pytest
+
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
@@ -170,9 +172,10 @@ def check_randomized_svd_low_rank(dtype):
         assert_almost_equal(s[:rank], sa[:rank], decimal=decimal)
 
 
-def test_randomized_svd_low_rank_all_dtypes():
-    for dtype in (np.int32, np.int64, np.float32, np.float64):
-        yield check_randomized_svd_low_rank, dtype
+@pytest.mark.parametrize('dtype',
+                         (np.int32, np.int64, np.float32, np.float64))
+def test_randomized_svd_low_rank_all_dtypes(dtype):
+    check_randomized_svd_low_rank(dtype)
 
 
 @ignore_warnings  # extmath.norm is deprecated to be removed in 0.21
@@ -191,34 +194,35 @@ def test_norm_squared_norm():
                     squared_norm, X.astype(int))
 
 
-def test_row_norms():
+@pytest.mark.parametrize('dtype',
+                         (np.float32, np.float64))
+def test_row_norms(dtype):
     X = np.random.RandomState(42).randn(100, 100)
-    for dtype in (np.float32, np.float64):
-        if dtype is np.float32:
-            precision = 4
-        else:
-            precision = 5
-
-        X = X.astype(dtype)
-        sq_norm = (X ** 2).sum(axis=1)
-
-        assert_array_almost_equal(sq_norm, row_norms(X, squared=True),
+    if dtype is np.float32:
+        precision = 4
+    else:
+        precision = 5
+
+    X = X.astype(dtype)
+    sq_norm = (X ** 2).sum(axis=1)
+
+    assert_array_almost_equal(sq_norm, row_norms(X, squared=True),
+                              precision)
+    assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X), precision)
+
+    for csr_index_dtype in [np.int32, np.int64]:
+        Xcsr = sparse.csr_matrix(X, dtype=dtype)
+        # csr_matrix will use int32 indices by default,
+        # up-casting those to int64 when necessary
+        if csr_index_dtype is np.int64:
+            Xcsr.indptr = Xcsr.indptr.astype(csr_index_dtype)
+            Xcsr.indices = Xcsr.indices.astype(csr_index_dtype)
+        assert Xcsr.indices.dtype == csr_index_dtype
+        assert Xcsr.indptr.dtype == csr_index_dtype
+        assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True),
+                                  precision)
+        assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr),
                                   precision)
-        assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X), precision)
-
-        for csr_index_dtype in [np.int32, np.int64]:
-            Xcsr = sparse.csr_matrix(X, dtype=dtype)
-            # csr_matrix will use int32 indices by default,
-            # up-casting those to int64 when necessary
-            if csr_index_dtype is np.int64:
-                Xcsr.indptr = Xcsr.indptr.astype(csr_index_dtype)
-                Xcsr.indices = Xcsr.indices.astype(csr_index_dtype)
-            assert Xcsr.indices.dtype == csr_index_dtype
-            assert Xcsr.indptr.dtype == csr_index_dtype
-            assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True),
-                                      precision)
-            assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr),
-                                      precision)
 
 
 def test_randomized_svd_low_rank_with_noise():
diff --git a/sklearn/utils/tests/test_stats.py b/sklearn/utils/tests/test_stats.py
index fbd05031c87b3..36e3bf72b609b 100644
--- a/sklearn/utils/tests/test_stats.py
+++ b/sklearn/utils/tests/test_stats.py
@@ -1,3 +1,4 @@
+import pytest
 from sklearn.utils.testing import assert_array_equal, ignore_warnings
 
 from sklearn.utils.stats import rankdata
@@ -13,12 +14,10 @@
 )
 
 
-@ignore_warnings  # Test deprecated backport to be removed in 0.21
-def test_cases():
+@pytest.mark.parametrize("values, method, expected", _cases)
+def test_cases_rankdata(values, method, expected):
 
-    def check_case(values, method, expected):
+    # Test deprecated backport to be removed in 0.21
+    with ignore_warnings():
         r = rankdata(values, method=method)
         assert_array_equal(r, expected)
-
-    for values, method, expected in _cases:
-        yield check_case, values, method, expected