scikit-learn · adrinjalali · Jun 22, 2019 · Jun 17, 2019 · Jun 17, 2019 · Jun 17, 2019
diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py
@@ -63,6 +63,8 @@ def test_connect_regions():
         # Newer versions of scipy have face in misc
         from scipy import misc
         face = misc.face(gray=True)
+    # subsample by 4 to reduce run time
+    face = face[::4, ::4]
     for thr in (50, 150):
         mask = face > thr
         graph = img_to_graph(face, mask)
@@ -77,6 +79,10 @@ def test_connect_regions_with_grid():
         # Newer versions of scipy have face in misc
         from scipy import misc
         face = misc.face(gray=True)
+
+    # subsample by 4 to reduce run time
+    face = face[::4, ::4]
+
     mask = face > 50
     graph = grid_to_graph(*face.shape, mask=mask)
     assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
@@ -6,6 +6,8 @@
 import numpy as np
 from scipy import stats, sparse
 
+import pytest
+
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
@@ -16,8 +18,6 @@
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils import safe_mask
 
 from sklearn.datasets.samples_generator import (make_classification,
@@ -408,7 +408,9 @@ def test_boundary_case_ch2():
     assert_array_equal(support_fwe, np.array([True, False]))
 
 
-def test_select_fdr_regression():
+@pytest.mark.parametrize("alpha", [0.001, 0.01, 0.1])
+@pytest.mark.parametrize("n_informative", [1, 5, 10])
+def test_select_fdr_regression(alpha, n_informative):
     # Test that fdr heuristic actually has low FDR.
     def single_fdr(alpha, n_informative, random_state):
         X, y = make_regression(n_samples=150, n_features=20,
@@ -434,20 +436,18 @@ def single_fdr(alpha, n_informative, random_state):
                                 (num_true_positives + num_false_positives))
         return false_discovery_rate
 
-    for alpha in [0.001, 0.01, 0.1]:
-        for n_informative in [1, 5, 10]:
-            # As per Benjamini-Hochberg, the expected false discovery rate
-            # should be lower than alpha:
-            # FDR = E(FP / (TP + FP)) <= alpha
-            false_discovery_rate = np.mean([single_fdr(alpha, n_informative,
-                                                       random_state) for
-                                            random_state in range(100)])
-            assert_greater_equal(alpha, false_discovery_rate)
-
-            # Make sure that the empirical false discovery rate increases
-            # with alpha:
-            if false_discovery_rate != 0:
-                assert_greater(false_discovery_rate, alpha / 10)
+    # As per Benjamini-Hochberg, the expected false discovery rate
+    # should be lower than alpha:
+    # FDR = E(FP / (TP + FP)) <= alpha
+    false_discovery_rate = np.mean([single_fdr(alpha, n_informative,
+                                               random_state) for
+                                    random_state in range(100)])
+    assert alpha >= false_discovery_rate
+
+    # Make sure that the empirical false discovery rate increases
+    # with alpha:
+    if false_discovery_rate != 0:
+        assert false_discovery_rate > alpha / 10
 
 
 def test_select_fwe_regression():

diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py
@@ -113,12 +113,12 @@ def test_random_starts():
 @pytest.mark.parametrize('kernel', non_fixed_kernels)
 def test_custom_optimizer(kernel):
     # Test that GPC can use externally defined optimizers.
-    # Define a dummy optimizer that simply tests 50 random hyperparameters
+    # Define a dummy optimizer that simply tests 10 random hyperparameters
     def optimizer(obj_func, initial_theta, bounds):
         rng = np.random.RandomState(0)
         theta_opt, func_min = \
             initial_theta, obj_func(initial_theta, eval_gradient=False)
-        for _ in range(50):
+        for _ in range(10):
             theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]),
                                               np.minimum(1, bounds[:, 1])))
             f = obj_func(theta, eval_gradient=False)

diff --git a/sklearn/impute/tests/test_impute.py b/sklearn/impute/tests/test_impute.py
@@ -796,7 +796,7 @@ def test_iterative_imputer_no_missing():
 
 def test_iterative_imputer_rank_one():
     rng = np.random.RandomState(0)
-    d = 100
+    d = 50
     A = rng.rand(d, 1)
     B = rng.rand(1, d)
     X = np.dot(A, B)
@@ -808,7 +808,7 @@ def test_iterative_imputer_rank_one():
                                verbose=1,
                                random_state=rng)
     X_filled = imputer.fit_transform(X_missing)
-    assert_allclose(X_filled, X, atol=0.01)
+    assert_allclose(X_filled, X, atol=0.02)
 
 
 @pytest.mark.parametrize(
@@ -817,8 +817,8 @@ def test_iterative_imputer_rank_one():
 )
 def test_iterative_imputer_transform_recovery(rank):
     rng = np.random.RandomState(0)
-    n = 100
-    d = 100
+    n = 70
+    d = 70
     A = rng.rand(n, rank)
     B = rng.rand(rank, d)
     X_filled = np.dot(A, B)
@@ -832,7 +832,7 @@ def test_iterative_imputer_transform_recovery(rank):
     X_test_filled = X_filled[n:]
     X_test = X_missing[n:]
 
-    imputer = IterativeImputer(max_iter=10,
+    imputer = IterativeImputer(max_iter=5,
                                verbose=1,
                                random_state=rng).fit(X_train)
     X_test_est = imputer.transform(X_test)
@@ -890,7 +890,7 @@ def test_iterative_imputer_early_stopping():
     X_missing[nan_mask] = np.nan
 
     imputer = IterativeImputer(max_iter=100,
-                               tol=1e-3,
+                               tol=1e-2,
                                sample_posterior=False,
                                verbose=1,
                                random_state=rng)

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
@@ -37,12 +37,15 @@
 
 
 # (X, y), n_targets  <-- as expected in the output of partial_dep()
-binary_classification_data = (make_classification(random_state=0), 1)
-multiclass_classification_data = (make_classification(n_classes=3,
+binary_classification_data = (make_classification(n_samples=50,
+                                                  random_state=0), 1)
+multiclass_classification_data = (make_classification(n_samples=50,
+                                                      n_classes=3,
                                                       n_clusters_per_class=1,
                                                       random_state=0), 3)
-regression_data = (make_regression(random_state=0), 1)
-multioutput_regression_data = (make_regression(n_targets=2, random_state=0), 2)
+regression_data = (make_regression(n_samples=50, random_state=0), 1)
+multioutput_regression_data = (make_regression(n_samples=50, n_targets=2,
+                                               random_state=0), 2)
 
 
 @pytest.mark.parametrize('Estimator, method, data', [

diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
@@ -126,7 +126,7 @@ def test_binary_search_neighbors():
     # Binary perplexity search approximation.
     # Should be approximately equal to the slow method when we use
     # all points as neighbors.
-    n_samples = 500
+    n_samples = 200
     desired_perplexity = 25.0
     random_state = check_random_state(0)
     distances = random_state.randn(n_samples, 2).astype(np.float32)
@@ -239,21 +239,18 @@ def test_trustworthiness():
     assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 0.2)
 
 
-def test_preserve_trustworthiness_approximately():
+@pytest.mark.parametrize("method", ['exact', 'barnes_hut'])
+@pytest.mark.parametrize("init", ('random', 'pca'))
+def test_preserve_trustworthiness_approximately(method, init):
     # Nearest neighbors should be preserved approximately.
     random_state = check_random_state(0)
     n_components = 2
-    methods = ['exact', 'barnes_hut']
     X = random_state.randn(50, n_components).astype(np.float32)
-    for init in ('random', 'pca'):
-        for method in methods:
-            tsne = TSNE(n_components=n_components, init=init, random_state=0,
-                        method=method)
-            X_embedded = tsne.fit_transform(X)
-            t = trustworthiness(X, X_embedded, n_neighbors=1)
-            assert_greater(t, 0.85, msg='Trustworthiness={:0.3f} < 0.85 '
-                                        'for method={} and '
-                                        'init={}'.format(t, method, init))
+    tsne = TSNE(n_components=n_components, init=init, random_state=0,
+                method=method, n_iter=700)
+    X_embedded = tsne.fit_transform(X)
+    t = trustworthiness(X, X_embedded, n_neighbors=1)
+    assert t > 0.85
 
 
 def test_optimization_minimizes_kl_divergence():
@@ -273,11 +270,11 @@ def test_optimization_minimizes_kl_divergence():
 def test_fit_csr_matrix():
     # X can be a sparse matrix.
     random_state = check_random_state(0)
-    X = random_state.randn(100, 2)
-    X[(np.random.randint(0, 100, 50), np.random.randint(0, 2, 50))] = 0.0
+    X = random_state.randn(50, 2)
+    X[(np.random.randint(0, 50, 25), np.random.randint(0, 2, 25))] = 0.0
     X_csr = sp.csr_matrix(X)
     tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
-                random_state=0, method='exact')
+                random_state=0, method='exact', n_iter=500)
     X_embedded = tsne.fit_transform(X_csr)
     assert_almost_equal(trustworthiness(X_csr, X_embedded, n_neighbors=1), 1.0,
                         decimal=1)
@@ -287,11 +284,11 @@ def test_preserve_trustworthiness_approximately_with_precomputed_distances():
     # Nearest neighbors should be preserved approximately.
     random_state = check_random_state(0)
     for i in range(3):
-        X = random_state.randn(100, 2)
+        X = random_state.randn(80, 2)
         D = squareform(pdist(X), "sqeuclidean")
         tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
                     early_exaggeration=2.0, metric="precomputed",
-                    random_state=i, verbose=0)
+                    random_state=i, verbose=0, n_iter=500)
         X_embedded = tsne.fit_transform(D)
         t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed")
         assert t > .95
@@ -420,11 +417,11 @@ def test_early_exaggeration_used():
     for method in methods:
         tsne = TSNE(n_components=n_components, perplexity=1,
                     learning_rate=100.0, init="pca", random_state=0,
-                    method=method, early_exaggeration=1.0)
+                    method=method, early_exaggeration=1.0, n_iter=250)
         X_embedded1 = tsne.fit_transform(X)
         tsne = TSNE(n_components=n_components, perplexity=1,
                     learning_rate=100.0, init="pca", random_state=0,
-                    method=method, early_exaggeration=10.0)
+                    method=method, early_exaggeration=10.0, n_iter=250)
         X_embedded2 = tsne.fit_transform(X)
 
         assert not np.allclose(X_embedded1, X_embedded2)
@@ -586,9 +583,10 @@ def test_64bit(method, dt):
     # Ensure 64bit arrays are handled correctly.
     random_state = check_random_state(0)
 
-    X = random_state.randn(50, 2).astype(dt, copy=False)
+    X = random_state.randn(10, 2).astype(dt, copy=False)
     tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
-                random_state=0, method=method, verbose=0)
+                random_state=0, method=method, verbose=0,
+                n_iter=300)
     X_embedded = tsne.fit_transform(X)
     effective_type = X_embedded.dtype
 
@@ -605,7 +603,7 @@ def test_kl_divergence_not_nan(method):
 
     X = random_state.randn(50, 2)
     tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
-                random_state=0, method=method, verbose=0, n_iter=1003)
+                random_state=0, method=method, verbose=0, n_iter=503)
     tsne.fit_transform(X)
 
     assert not np.isnan(tsne.kl_divergence_)
@@ -722,9 +720,10 @@ def test_min_grad_norm():
 def test_accessible_kl_divergence():
     # Ensures that the accessible kl_divergence matches the computed value
     random_state = check_random_state(0)
-    X = random_state.randn(100, 2)
+    X = random_state.randn(50, 2)
     tsne = TSNE(n_iter_without_progress=2, verbose=2,
-                random_state=0, method='exact')
+                random_state=0, method='exact',
+                n_iter=500)
 
     old_stdout = sys.stdout
     sys.stdout = StringIO()
@@ -746,7 +745,8 @@ def test_accessible_kl_divergence():
     assert_almost_equal(tsne.kl_divergence_, float(error), decimal=5)
 
 
-def check_uniform_grid(method, seeds=[0, 1, 2], n_iter=1000):
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+def test_uniform_grid(method):
     """Make sure that TSNE can approximately recover a uniform 2D grid
 
     Due to ties in distances between point in X_2d_grid, this test is platform
@@ -758,6 +758,8 @@ def check_uniform_grid(method, seeds=[0, 1, 2], n_iter=1000):
     we re-run t-SNE from the final point when the convergence is not good
     enough.
     """
+    seeds = [0, 1, 2]
+    n_iter = 500
     for seed in seeds:
         tsne = TSNE(n_components=2, init='random', random_state=seed,
                     perplexity=20, n_iter=n_iter, method=method)
@@ -791,11 +793,6 @@ def assert_uniform_grid(Y, try_name=None):
     assert_less(largest_to_mean, 2, msg=try_name)
 
 
-@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
-def test_uniform_grid(method):
-    check_uniform_grid(method)
-
-
 def test_bh_match_exact():
     # check that the ``barnes_hut`` method match the exact one when
     # ``angle = 0`` and ``perplexity > n_samples / 3``
@@ -829,8 +826,8 @@ def test_tsne_with_different_distance_metrics():
     for metric, dist_func in zip(metrics, dist_funcs):
         X_transformed_tsne = TSNE(
             metric=metric, n_components=n_components_embedding,
-            random_state=0).fit_transform(X)
+            random_state=0, n_iter=300).fit_transform(X)
         X_transformed_tsne_precomputed = TSNE(
             metric='precomputed', n_components=n_components_embedding,
-            random_state=0).fit_transform(dist_func(X))
+            random_state=0, n_iter=300).fit_transform(dist_func(X))
         assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
@@ -678,7 +678,7 @@ def test_matthews_corrcoef_multiclass():
     assert_almost_equal(mcc, 0.)
 
 
-@pytest.mark.parametrize('n_points', [100, 10000, 1000000])
+@pytest.mark.parametrize('n_points', [100, 10000])
 def test_matthews_corrcoef_overflow(n_points):
     # https://github.com/scikit-learn/scikit-learn/issues/9622
     rng = np.random.RandomState(20170906)

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
@@ -1197,11 +1197,11 @@ def test_multiclass_sample_weight_invariance(name):
 def test_multilabel_sample_weight_invariance(name):
     # multilabel indicator
     random_state = check_random_state(0)
-    _, ya = make_multilabel_classification(n_features=1, n_classes=20,
-                                           random_state=0, n_samples=100,
+    _, ya = make_multilabel_classification(n_features=1, n_classes=10,
+                                           random_state=0, n_samples=50,
                                            allow_unlabeled=False)
-    _, yb = make_multilabel_classification(n_features=1, n_classes=20,
-                                           random_state=1, n_samples=100,
+    _, yb = make_multilabel_classification(n_features=1, n_classes=10,
+                                           random_state=1, n_samples=50,
                                            allow_unlabeled=False)
     y_true = np.vstack([ya, yb])
     y_pred = np.vstack([ya, ya])

diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
@@ -558,7 +558,7 @@ def test_pairwise_distances_chunked():
     # Test the pairwise_distance helper function.
     rng = np.random.RandomState(0)
     # Euclidean distance should be equivalent to calling the function.
-    X = rng.random_sample((400, 4))
+    X = rng.random_sample((200, 4))
     check_pairwise_distances_chunked(X, None, working_memory=1,
                                      metric='euclidean')
     # Test small amounts of memory
@@ -569,7 +569,7 @@ def test_pairwise_distances_chunked():
     check_pairwise_distances_chunked(X.tolist(), None, working_memory=1,
                                      metric='euclidean')
     # Euclidean distance, with Y != X.
-    Y = rng.random_sample((200, 4))
+    Y = rng.random_sample((100, 4))
     check_pairwise_distances_chunked(X, Y, working_memory=1,
                                      metric='euclidean')
     check_pairwise_distances_chunked(X.tolist(), Y.tolist(), working_memory=1,
@@ -1103,9 +1103,9 @@ def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function,
                                                 y_is_x):
     # check that pairwise_distances give the same result in sequential and
     # parallel, when metric has data-derived parameters.
-    with config_context(working_memory=1):  # to have more than 1 chunk
+    with config_context(working_memory=0.1):  # to have more than 1 chunk
         rng = np.random.RandomState(0)
-        X = rng.random_sample((1000, 10))
+        X = rng.random_sample((100, 10))
 
         if y_is_x:
             Y = X
@@ -1115,7 +1115,7 @@ def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function,
             else:
                 params = {'VI': np.linalg.inv(np.cov(X.T)).T}
         else:
-            Y = rng.random_sample((1000, 10))
+            Y = rng.random_sample((100, 10))
             expected_dist_default_params = cdist(X, Y, metric=metric)
             if metric == "seuclidean":
                 params = {'V': np.var(np.vstack([X, Y]), axis=0, ddof=1)}