From 61a19660ea333b5f0ad355a8c2db86630ed31df3 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 3 Mar 2022 17:21:22 +0100
Subject: [PATCH 01/10] TST Adapt test_mutual_info.py to test implementations
 on 32bit datasets

---
 .../tests/test_mutual_info.py                 | 56 +++++++++++--------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index bb98dfaee4db9..25938da87800b 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -7,12 +7,15 @@
 from sklearn.feature_selection._mutual_info import _compute_mi
 from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
 
+DTYPES = (np.float64, np.float32)
 
-def test_compute_mi_dd():
+
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_compute_mi_dd(dtype):
     # In discrete case computations are straightforward and can be done
     # by hand on given vectors.
-    x = np.array([0, 1, 1, 0, 0])
-    y = np.array([1, 0, 0, 0, 1])
+    x = np.array([0, 1, 1, 0, 0], dtype=dtype)
+    y = np.array([1, 0, 0, 0, 1], dtype=dtype)
 
     H_x = H_y = -(3 / 5) * np.log(3 / 5) - (2 / 5) * np.log(2 / 5)
     H_xy = -1 / 5 * np.log(1 / 5) - 2 / 5 * np.log(2 / 5) - 2 / 5 * np.log(2 / 5)
@@ -21,7 +24,8 @@ def test_compute_mi_dd():
     assert_almost_equal(_compute_mi(x, y, True, True), I_xy)
 
 
-def test_compute_mi_cc():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_compute_mi_cc(dtype):
     # For two continuous variables a good approach is to test on bivariate
     # normal distribution, where mutual information is known.
 
@@ -43,7 +47,7 @@ def test_compute_mi_cc():
     I_theory = np.log(sigma_1) + np.log(sigma_2) - 0.5 * np.log(np.linalg.det(cov))
 
     rng = check_random_state(0)
-    Z = rng.multivariate_normal(mean, cov, size=1000)
+    Z = rng.multivariate_normal(mean, cov, size=1000).astype(dtype)
 
     x, y = Z[:, 0], Z[:, 1]
 
@@ -54,7 +58,8 @@ def test_compute_mi_cc():
         assert_almost_equal(I_computed, I_theory, 1)
 
 
-def test_compute_mi_cd():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_compute_mi_cd(dtype):
     # To test define a joint distribution as follows:
     # p(x, y) = p(x) p(y | x)
     # X ~ Bernoulli(p)
@@ -76,10 +81,10 @@ def test_compute_mi_cd():
     for p in [0.3, 0.5, 0.7]:
         x = rng.uniform(size=n_samples) > p
 
-        y = np.empty(n_samples)
+        y = np.empty(n_samples, dtype)
         mask = x == 0
-        y[mask] = rng.uniform(-1, 1, size=np.sum(mask))
-        y[~mask] = rng.uniform(0, 2, size=np.sum(~mask))
+        y[mask] = rng.uniform(-1, 1, size=np.sum(mask)).astype(dtype)
+        y[~mask] = rng.uniform(0, 2, size=np.sum(~mask)).astype(dtype)
 
         I_theory = -0.5 * (
             (1 - p) * np.log(0.5 * (1 - p)) + p * np.log(0.5 * p) + np.log(0.5)
@@ -91,15 +96,16 @@ def test_compute_mi_cd():
             assert_almost_equal(I_computed, I_theory, 1)
 
 
-def test_compute_mi_cd_unique_label():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_compute_mi_cd_unique_label(dtype):
     # Test that adding unique label doesn't change MI.
     n_samples = 100
     x = np.random.uniform(size=n_samples) > 0.5
 
-    y = np.empty(n_samples)
+    y = np.empty(n_samples, dtype)
     mask = x == 0
-    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask))
-    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask))
+    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask)).astype(dtype)
+    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask)).astype(dtype)
 
     mi_1 = _compute_mi(x, y, True, False)
 
@@ -111,9 +117,10 @@ def test_compute_mi_cd_unique_label():
 
 
 # We are going test that feature ordering by MI matches our expectations.
-def test_mutual_info_classif_discrete():
-    X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]])
-    y = np.array([0, 1, 2, 2, 1])
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_mutual_info_classif_discrete(dtype):
+    X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=dtype)
+    y = np.array([0, 1, 2, 2, 1], dtype=dtype)
 
     # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
     # informative.
@@ -121,7 +128,8 @@ def test_mutual_info_classif_discrete():
     assert_array_equal(np.argsort(-mi), np.array([0, 2, 1]))
 
 
-def test_mutual_info_regression():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_mutual_info_regression(dtype):
     # We generate sample from multivariate normal distribution, using
     # transformation from initially uncorrelated variables. The zero
     # variables after transformation is selected as the target vector,
@@ -132,7 +140,7 @@ def test_mutual_info_regression():
     mean = np.zeros(4)
 
     rng = check_random_state(0)
-    Z = rng.multivariate_normal(mean, cov, size=1000)
+    Z = rng.multivariate_normal(mean, cov, size=1000).astype(dtype)
     X = Z[:, 1:]
     y = Z[:, 0]
 
@@ -140,11 +148,12 @@ def test_mutual_info_regression():
     assert_array_equal(np.argsort(-mi), np.array([1, 2, 0]))
 
 
-def test_mutual_info_classif_mixed():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_mutual_info_classif_mixed(dtype):
     # Here the target is discrete and there are two continuous and one
     # discrete feature. The idea of this test is clear from the code.
     rng = check_random_state(0)
-    X = rng.rand(1000, 3)
+    X = rng.rand(1000, 3).astype(dtype)
     X[:, 1] += X[:, 0]
     y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int)
     X[:, 2] = X[:, 2] > 0.5
@@ -164,9 +173,10 @@ def test_mutual_info_classif_mixed():
         assert mi_nn[2] == mi[2]
 
 
-def test_mutual_info_options():
-    X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=float)
-    y = np.array([0, 1, 2, 2, 1], dtype=float)
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_mutual_info_options(dtype):
+    X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=dtype)
+    y = np.array([0, 1, 2, 2, 1], dtype=dtype)
     X_csr = csr_matrix(X)
 
     for mutual_info in (mutual_info_regression, mutual_info_classif):

From 38b1c6589c0dd8842fde28800a4a4d4ce22ccac7 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 9 Mar 2022 11:59:00 +0100
Subject: [PATCH 02/10] Apply comments from review

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 .../tests/test_mutual_info.py                 | 32 ++++++++++++-------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index 25938da87800b..d4251776bf69d 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -10,18 +10,17 @@
 DTYPES = (np.float64, np.float32)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_compute_mi_dd(dtype):
+def test_compute_mi_dd():
     # In discrete case computations are straightforward and can be done
     # by hand on given vectors.
-    x = np.array([0, 1, 1, 0, 0], dtype=dtype)
-    y = np.array([1, 0, 0, 0, 1], dtype=dtype)
+    x = np.array([0, 1, 1, 0, 0])
+    y = np.array([1, 0, 0, 0, 1])
 
     H_x = H_y = -(3 / 5) * np.log(3 / 5) - (2 / 5) * np.log(2 / 5)
     H_xy = -1 / 5 * np.log(1 / 5) - 2 / 5 * np.log(2 / 5) - 2 / 5 * np.log(2 / 5)
     I_xy = H_x + H_y - H_xy
 
-    assert_almost_equal(_compute_mi(x, y, True, True), I_xy)
+    assert_almost_equal(_compute_mi(x, y, x_discrete=True, y_discrete=True), I_xy)
 
 
 @pytest.mark.parametrize("dtype", DTYPES)
@@ -54,7 +53,9 @@ def test_compute_mi_cc(dtype):
     # Theory and computed values won't be very close, assert that the
     # first figures after decimal point match.
     for n_neighbors in [3, 5, 7]:
-        I_computed = _compute_mi(x, y, False, False, n_neighbors)
+        I_computed = _compute_mi(
+            x, y, x_discrete=False, y_discrete=False, n_neighbors=n_neighbors
+        )
         assert_almost_equal(I_computed, I_theory, 1)
 
 
@@ -92,7 +93,9 @@ def test_compute_mi_cd(dtype):
 
         # Assert the same tolerance.
         for n_neighbors in [3, 5, 7]:
-            I_computed = _compute_mi(x, y, True, False, n_neighbors)
+            I_computed = _compute_mi(
+                x, y, x_discrete=True, y_discrete=False, n_neighbors=n_neighbors
+            )
             assert_almost_equal(I_computed, I_theory, 1)
 
 
@@ -107,11 +110,11 @@ def test_compute_mi_cd_unique_label(dtype):
     y[mask] = np.random.uniform(-1, 1, size=np.sum(mask)).astype(dtype)
     y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask)).astype(dtype)
 
-    mi_1 = _compute_mi(x, y, True, False)
+    mi_1 = _compute_mi(x, y, x_discrete=True, y_discrete=False)
 
     x = np.hstack((x, 2))
     y = np.hstack((y, 10))
-    mi_2 = _compute_mi(x, y, True, False)
+    mi_2 = _compute_mi(x, y, x_discrete=True, y_discrete=False)
 
     assert mi_1 == mi_2
 
@@ -146,6 +149,9 @@ def test_mutual_info_regression(dtype):
 
     mi = mutual_info_regression(X, y, random_state=0)
     assert_array_equal(np.argsort(-mi), np.array([1, 2, 0]))
+    # XXX: should mutual_info_regression be fixed to avoid
+    # up-casting float32 inputs to float64?
+    assert mi.dtype == np.float64
 
 
 @pytest.mark.parametrize("dtype", DTYPES)
@@ -176,10 +182,14 @@ def test_mutual_info_classif_mixed(dtype):
 @pytest.mark.parametrize("dtype", DTYPES)
 def test_mutual_info_options(dtype):
     X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=dtype)
-    y = np.array([0, 1, 2, 2, 1], dtype=dtype)
+    y = np.array([0, 1, 2, 2, 1])
     X_csr = csr_matrix(X)
 
-    for mutual_info in (mutual_info_regression, mutual_info_classif):
+    for mutual_info, y_dtype in (
+        (mutual_info_regression, dtype),
+        (mutual_info_classif, y.dtype),
+    ):
+        y = y.astype(dtype)
         with pytest.raises(ValueError):
             mutual_info(X_csr, y, discrete_features=False)
         with pytest.raises(ValueError):

From bba8a121f0b003e6720b8ad8355138c51c334dea Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 17 Mar 2022 19:10:48 +0100
Subject: [PATCH 03/10] TST Use global_dtype

---
 .../tests/test_mutual_info.py                 | 50 +++++++++----------
 1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index ed8df03130c44..419a1364d22ab 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -10,8 +10,6 @@
 from sklearn.feature_selection._mutual_info import _compute_mi
 from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
 
-DTYPES = (np.float64, np.float32)
-
 
 def test_compute_mi_dd():
     # In discrete case computations are straightforward and can be done
@@ -61,8 +59,7 @@ def test_compute_mi_cc(global_dtype):
         assert_allclose(I_computed, I_theory, rtol=1e-1)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_compute_mi_cd(dtype):
+def test_compute_mi_cd(global_dtype):
     # To test define a joint distribution as follows:
     # p(x, y) = p(x) p(y | x)
     # X ~ Bernoulli(p)
@@ -84,10 +81,10 @@ def test_compute_mi_cd(dtype):
     for p in [0.3, 0.5, 0.7]:
         x = rng.uniform(size=n_samples) > p
 
-        y = np.empty(n_samples, dtype)
+        y = np.empty(n_samples, global_dtype)
         mask = x == 0
-        y[mask] = rng.uniform(-1, 1, size=np.sum(mask)).astype(dtype)
-        y[~mask] = rng.uniform(0, 2, size=np.sum(~mask)).astype(dtype)
+        y[mask] = rng.uniform(-1, 1, size=np.sum(mask)).astype(global_dtype)
+        y[~mask] = rng.uniform(0, 2, size=np.sum(~mask)).astype(global_dtype)
 
         I_theory = -0.5 * (
             (1 - p) * np.log(0.5 * (1 - p)) + p * np.log(0.5 * p) + np.log(0.5)
@@ -101,16 +98,15 @@ def test_compute_mi_cd(dtype):
             assert_allclose(I_computed, I_theory, rtol=1e-1)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_compute_mi_cd_unique_label(dtype):
+def test_compute_mi_cd_unique_label(global_dtype):
     # Test that adding unique label doesn't change MI.
     n_samples = 100
     x = np.random.uniform(size=n_samples) > 0.5
 
-    y = np.empty(n_samples, dtype)
+    y = np.empty(n_samples, global_dtype)
     mask = x == 0
-    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask)).astype(dtype)
-    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask)).astype(dtype)
+    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask)).astype(global_dtype)
+    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask)).astype(global_dtype)
 
     mi_1 = _compute_mi(x, y, x_discrete=True, y_discrete=False)
 
@@ -122,10 +118,11 @@ def test_compute_mi_cd_unique_label(dtype):
 
 
 # We are going test that feature ordering by MI matches our expectations.
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_mutual_info_classif_discrete(dtype):
-    X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=dtype)
-    y = np.array([0, 1, 2, 2, 1], dtype=dtype)
+def test_mutual_info_classif_discrete(global_dtype):
+    X = np.array(
+        [[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=global_dtype
+    )
+    y = np.array([0, 1, 2, 2, 1], dtype=global_dtype)
 
     # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
     # informative.
@@ -133,8 +130,7 @@ def test_mutual_info_classif_discrete(dtype):
     assert_array_equal(np.argsort(-mi), np.array([0, 2, 1]))
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_mutual_info_regression(dtype):
+def test_mutual_info_regression(global_dtype):
     # We generate sample from multivariate normal distribution, using
     # transformation from initially uncorrelated variables. The zero
     # variables after transformation is selected as the target vector,
@@ -145,7 +141,7 @@ def test_mutual_info_regression(dtype):
     mean = np.zeros(4)
 
     rng = check_random_state(0)
-    Z = rng.multivariate_normal(mean, cov, size=1000).astype(dtype)
+    Z = rng.multivariate_normal(mean, cov, size=1000).astype(global_dtype)
     X = Z[:, 1:]
     y = Z[:, 0]
 
@@ -156,12 +152,11 @@ def test_mutual_info_regression(dtype):
     assert mi.dtype == np.float64
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_mutual_info_classif_mixed(dtype):
+def test_mutual_info_classif_mixed(global_dtype):
     # Here the target is discrete and there are two continuous and one
     # discrete feature. The idea of this test is clear from the code.
     rng = check_random_state(0)
-    X = rng.rand(1000, 3).astype(dtype)
+    X = rng.rand(1000, 3).astype(global_dtype)
     X[:, 1] += X[:, 0]
     y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int)
     X[:, 2] = X[:, 2] > 0.5
@@ -181,17 +176,18 @@ def test_mutual_info_classif_mixed(dtype):
         assert mi_nn[2] == mi[2]
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_mutual_info_options(dtype):
-    X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=dtype)
+def test_mutual_info_options(global_dtype):
+    X = np.array(
+        [[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=global_dtype
+    )
     y = np.array([0, 1, 2, 2, 1])
     X_csr = csr_matrix(X)
 
     for mutual_info, y_dtype in (
-        (mutual_info_regression, dtype),
+        (mutual_info_regression, global_dtype),
         (mutual_info_classif, y.dtype),
     ):
-        y = y.astype(dtype)
+        y = y.astype(global_dtype)
         with pytest.raises(ValueError):
             mutual_info(X_csr, y, discrete_features=False)
         with pytest.raises(ValueError):

From 33cc917dd7217e3dc93689af19f038874617b72c Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 18 Mar 2022 15:51:21 +0100
Subject: [PATCH 04/10] TST Review comments

---
 sklearn/feature_selection/tests/test_mutual_info.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index 419a1364d22ab..dc265c6eb62b6 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -83,7 +83,7 @@ def test_compute_mi_cd(global_dtype):
 
         y = np.empty(n_samples, global_dtype)
         mask = x == 0
-        y[mask] = rng.uniform(-1, 1, size=np.sum(mask)).astype(global_dtype)
+        y[mask] = rng.uniform(-1, 1, size=np.sum(mask))
         y[~mask] = rng.uniform(0, 2, size=np.sum(~mask)).astype(global_dtype)
 
         I_theory = -0.5 * (
@@ -106,7 +106,7 @@ def test_compute_mi_cd_unique_label(global_dtype):
     y = np.empty(n_samples, global_dtype)
     mask = x == 0
     y[mask] = np.random.uniform(-1, 1, size=np.sum(mask)).astype(global_dtype)
-    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask)).astype(global_dtype)
+    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask))
 
     mi_1 = _compute_mi(x, y, x_discrete=True, y_discrete=False)
 
@@ -122,7 +122,7 @@ def test_mutual_info_classif_discrete(global_dtype):
     X = np.array(
         [[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=global_dtype
     )
-    y = np.array([0, 1, 2, 2, 1], dtype=global_dtype)
+    y = np.array([0, 1, 2, 2, 1])
 
     # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
     # informative.
@@ -206,8 +206,8 @@ def test_mutual_info_options(global_dtype):
         mi_5 = mutual_info(X, y, discrete_features=[True, False, True], random_state=0)
         mi_6 = mutual_info(X, y, discrete_features=[0, 2], random_state=0)
 
-        assert_array_equal(mi_1, mi_2)
-        assert_array_equal(mi_3, mi_4)
-        assert_array_equal(mi_5, mi_6)
+        assert_allclose(mi_1, mi_2)
+        assert_allclose(mi_3, mi_4)
+        assert_allclose(mi_5, mi_6)
 
     assert not np.allclose(mi_1, mi_3)

From 61022e194d2462c58591f73b7d6a5b024132a7b6 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 23 Mar 2022 14:41:30 +0100
Subject: [PATCH 05/10] Address comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger
---
 .../tests/test_mutual_info.py                  | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index dc265c6eb62b6..b038218366ed9 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -84,7 +84,7 @@ def test_compute_mi_cd(global_dtype):
         y = np.empty(n_samples, global_dtype)
         mask = x == 0
         y[mask] = rng.uniform(-1, 1, size=np.sum(mask))
-        y[~mask] = rng.uniform(0, 2, size=np.sum(~mask)).astype(global_dtype)
+        y[~mask] = rng.uniform(0, 2, size=np.sum(~mask))
 
         I_theory = -0.5 * (
             (1 - p) * np.log(0.5 * (1 - p)) + p * np.log(0.5 * p) + np.log(0.5)
@@ -105,7 +105,7 @@ def test_compute_mi_cd_unique_label(global_dtype):
 
     y = np.empty(n_samples, global_dtype)
     mask = x == 0
-    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask)).astype(global_dtype)
+    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask))
     y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask))
 
     mi_1 = _compute_mi(x, y, x_discrete=True, y_discrete=False)
@@ -114,14 +114,12 @@ def test_compute_mi_cd_unique_label(global_dtype):
     y = np.hstack((y, 10))
     mi_2 = _compute_mi(x, y, x_discrete=True, y_discrete=False)
 
-    assert mi_1 == mi_2
+    assert_allclose(mi_1, mi_2)
 
 
 # We are going test that feature ordering by MI matches our expectations.
 def test_mutual_info_classif_discrete(global_dtype):
-    X = np.array(
-        [[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=global_dtype
-    )
+    X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]])
     y = np.array([0, 1, 2, 2, 1])
 
     # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
@@ -180,14 +178,10 @@ def test_mutual_info_options(global_dtype):
     X = np.array(
         [[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=global_dtype
     )
-    y = np.array([0, 1, 2, 2, 1])
+    y = np.array([0, 1, 2, 2, 1], dtype=global_dtype)
     X_csr = csr_matrix(X)
 
-    for mutual_info, y_dtype in (
-        (mutual_info_regression, global_dtype),
-        (mutual_info_classif, y.dtype),
-    ):
-        y = y.astype(global_dtype)
+    for mutual_info in (mutual_info_regression, mutual_info_classif):
         with pytest.raises(ValueError):
             mutual_info(X_csr, y, discrete_features=False)
         with pytest.raises(ValueError):

From b31cccbc652e26843b4a03aba39c602928555b13 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 3 Mar 2022 16:27:48 +0100
Subject: [PATCH 06/10] TST Adapt test_mean_shift.py to test implementations on
 32bit datasets

---
 sklearn/cluster/tests/test_mean_shift.py | 46 +++++++++++++++---------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index cdd1134156173..f63b2d4740c64 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -32,6 +32,8 @@
     random_state=11,
 )
 
+DTYPES = (np.float64, np.float32)
+
 
 def test_estimate_bandwidth():
     # Test estimate_bandwidth
@@ -39,10 +41,11 @@ def test_estimate_bandwidth():
     assert 0.9 <= bandwidth <= 1.5
 
 
-def test_estimate_bandwidth_1sample():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_estimate_bandwidth_1sample(dtype):
     # Test estimate_bandwidth when n_samples=1 and quantile<1, so that
     # n_neighbors is set to 1.
-    bandwidth = estimate_bandwidth(X, n_samples=1, quantile=0.3)
+    bandwidth = estimate_bandwidth(X.astype(dtype), n_samples=1, quantile=0.3)
     assert bandwidth == pytest.approx(0.0, abs=1e-5)
 
 
@@ -50,10 +53,11 @@ def test_estimate_bandwidth_1sample():
     "bandwidth, cluster_all, expected, first_cluster_label",
     [(1.2, True, 3, 0), (1.2, False, 4, -1)],
 )
-def test_mean_shift(bandwidth, cluster_all, expected, first_cluster_label):
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_mean_shift(dtype, bandwidth, cluster_all, expected, first_cluster_label):
     # Test MeanShift algorithm
     ms = MeanShift(bandwidth=bandwidth, cluster_all=cluster_all)
-    labels = ms.fit(X).labels_
+    labels = ms.fit(X.astype(dtype)).labels_
     labels_unique = np.unique(labels)
     n_clusters_ = len(labels_unique)
     assert n_clusters_ == expected
@@ -82,7 +86,8 @@ def test_estimate_bandwidth_with_sparse_matrix():
         estimate_bandwidth(X)
 
 
-def test_parallel():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_parallel(dtype):
     centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
     X, _ = make_blobs(
         n_samples=50,
@@ -93,6 +98,8 @@ def test_parallel():
         random_state=11,
     )
 
+    X = X.astype(dtype)
+
     ms1 = MeanShift(n_jobs=2)
     ms1.fit(X)
 
@@ -103,11 +110,13 @@ def test_parallel():
     assert_array_equal(ms1.labels_, ms2.labels_)
 
 
-def test_meanshift_predict():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_meanshift_predict(dtype):
     # Test MeanShift.predict
     ms = MeanShift(bandwidth=1.2)
-    labels = ms.fit_predict(X)
-    labels2 = ms.predict(X)
+    Y = X.astype(dtype)
+    labels = ms.fit_predict(Y)
+    labels2 = ms.predict(Y)
     assert_array_equal(labels, labels2)
 
 
@@ -128,22 +137,25 @@ def test_unfitted():
     assert not hasattr(ms, "labels_")
 
 
-def test_cluster_intensity_tie():
-    X = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]])
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_cluster_intensity_tie(dtype):
+    X = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]], dtype=dtype)
     c1 = MeanShift(bandwidth=2).fit(X)
 
-    X = np.array([[4, 7], [3, 5], [3, 6], [1, 1], [2, 1], [1, 0]])
+    X = np.array([[4, 7], [3, 5], [3, 6], [1, 1], [2, 1], [1, 0]], dtype=dtype)
     c2 = MeanShift(bandwidth=2).fit(X)
     assert_array_equal(c1.labels_, [1, 1, 1, 0, 0, 0])
     assert_array_equal(c2.labels_, [0, 0, 0, 1, 1, 1])
 
 
-def test_bin_seeds():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_bin_seeds(dtype):
     # Test the bin seeding technique which can be used in the mean shift
     # algorithm
     # Data is just 6 points in the plane
     X = np.array(
-        [[1.0, 1.0], [1.4, 1.4], [1.8, 1.2], [2.0, 1.0], [2.1, 1.1], [0.0, 0.0]]
+        [[1.0, 1.0], [1.4, 1.4], [1.8, 1.2], [2.0, 1.0], [2.1, 1.1], [0.0, 0.0]],
+        dtype=dtype,
     )
 
     # With a bin coarseness of 1.0 and min_bin_freq of 1, 3 bins should be
@@ -174,6 +186,7 @@ def test_bin_seeds():
         cluster_std=0.1,
         random_state=0,
     )
+    X = X.astype(dtype)
     test_bins = get_bin_seeds(X, 1)
     assert_array_equal(test_bins, [[0, 0], [1, 1]])
 
@@ -191,9 +204,10 @@ def test_max_iter(max_iter):
         assert np.allclose(c1, c2)
 
 
-def test_mean_shift_zero_bandwidth():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_mean_shift_zero_bandwidth(dtype):
     # Check that mean shift works when the estimated bandwidth is 0.
-    X = np.array([1, 1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1)
+    X = np.array([1, 1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1).astype(dtype)
 
     # estimate_bandwidth with default args returns 0 on this dataset
     bandwidth = estimate_bandwidth(X)
@@ -206,7 +220,7 @@ def test_mean_shift_zero_bandwidth():
     # to no binning.
     ms_binning = MeanShift(bin_seeding=True, bandwidth=None).fit(X)
     ms_nobinning = MeanShift(bin_seeding=False).fit(X)
-    expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2])
+    expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2], dtype=dtype)
 
     assert v_measure_score(ms_binning.labels_, expected_labels) == 1
     assert v_measure_score(ms_nobinning.labels_, expected_labels) == 1

From ab3466690f75baa569f0ca08cc6280c8dd55e864 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 17 Mar 2022 15:59:43 +0100
Subject: [PATCH 07/10] TST Use global_dtype

---
 sklearn/cluster/tests/test_mean_shift.py | 45 ++++++++++--------------
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index f63b2d4740c64..eeb86b1131319 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -32,8 +32,6 @@
     random_state=11,
 )
 
-DTYPES = (np.float64, np.float32)
-
 
 def test_estimate_bandwidth():
     # Test estimate_bandwidth
@@ -41,11 +39,10 @@ def test_estimate_bandwidth():
     assert 0.9 <= bandwidth <= 1.5
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_estimate_bandwidth_1sample(dtype):
+def test_estimate_bandwidth_1sample(global_dtype):
     # Test estimate_bandwidth when n_samples=1 and quantile<1, so that
     # n_neighbors is set to 1.
-    bandwidth = estimate_bandwidth(X.astype(dtype), n_samples=1, quantile=0.3)
+    bandwidth = estimate_bandwidth(X.astype(global_dtype), n_samples=1, quantile=0.3)
     assert bandwidth == pytest.approx(0.0, abs=1e-5)
 
 
@@ -53,11 +50,12 @@ def test_estimate_bandwidth_1sample(dtype):
     "bandwidth, cluster_all, expected, first_cluster_label",
     [(1.2, True, 3, 0), (1.2, False, 4, -1)],
 )
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_mean_shift(dtype, bandwidth, cluster_all, expected, first_cluster_label):
+def test_mean_shift(
+    global_dtype, bandwidth, cluster_all, expected, first_cluster_label
+):
     # Test MeanShift algorithm
     ms = MeanShift(bandwidth=bandwidth, cluster_all=cluster_all)
-    labels = ms.fit(X.astype(dtype)).labels_
+    labels = ms.fit(X.astype(global_dtype)).labels_
     labels_unique = np.unique(labels)
     n_clusters_ = len(labels_unique)
     assert n_clusters_ == expected
@@ -86,8 +84,7 @@ def test_estimate_bandwidth_with_sparse_matrix():
         estimate_bandwidth(X)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_parallel(dtype):
+def test_parallel(global_dtype):
     centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
     X, _ = make_blobs(
         n_samples=50,
@@ -98,7 +95,7 @@ def test_parallel(dtype):
         random_state=11,
     )
 
-    X = X.astype(dtype)
+    X = X.astype(global_dtype)
 
     ms1 = MeanShift(n_jobs=2)
     ms1.fit(X)
@@ -110,11 +107,10 @@ def test_parallel(dtype):
     assert_array_equal(ms1.labels_, ms2.labels_)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_meanshift_predict(dtype):
+def test_meanshift_predict(global_dtype):
     # Test MeanShift.predict
     ms = MeanShift(bandwidth=1.2)
-    Y = X.astype(dtype)
+    Y = X.astype(global_dtype)
     labels = ms.fit_predict(Y)
     labels2 = ms.predict(Y)
     assert_array_equal(labels, labels2)
@@ -137,25 +133,23 @@ def test_unfitted():
     assert not hasattr(ms, "labels_")
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_cluster_intensity_tie(dtype):
-    X = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]], dtype=dtype)
+def test_cluster_intensity_tie(global_dtype):
+    X = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]], dtype=global_dtype)
     c1 = MeanShift(bandwidth=2).fit(X)
 
-    X = np.array([[4, 7], [3, 5], [3, 6], [1, 1], [2, 1], [1, 0]], dtype=dtype)
+    X = np.array([[4, 7], [3, 5], [3, 6], [1, 1], [2, 1], [1, 0]], dtype=global_dtype)
     c2 = MeanShift(bandwidth=2).fit(X)
     assert_array_equal(c1.labels_, [1, 1, 1, 0, 0, 0])
     assert_array_equal(c2.labels_, [0, 0, 0, 1, 1, 1])
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_bin_seeds(dtype):
+def test_bin_seeds(global_dtype):
     # Test the bin seeding technique which can be used in the mean shift
     # algorithm
     # Data is just 6 points in the plane
     X = np.array(
         [[1.0, 1.0], [1.4, 1.4], [1.8, 1.2], [2.0, 1.0], [2.1, 1.1], [0.0, 0.0]],
-        dtype=dtype,
+        dtype=global_dtype,
     )
 
     # With a bin coarseness of 1.0 and min_bin_freq of 1, 3 bins should be
@@ -186,7 +180,7 @@ def test_bin_seeds(dtype):
         cluster_std=0.1,
         random_state=0,
     )
-    X = X.astype(dtype)
+    X = X.astype(global_dtype)
     test_bins = get_bin_seeds(X, 1)
     assert_array_equal(test_bins, [[0, 0], [1, 1]])
 
@@ -204,10 +198,9 @@ def test_max_iter(max_iter):
         assert np.allclose(c1, c2)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_mean_shift_zero_bandwidth(dtype):
+def test_mean_shift_zero_bandwidth(global_dtype):
     # Check that mean shift works when the estimated bandwidth is 0.
-    X = np.array([1, 1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1).astype(dtype)
+    X = np.array([1, 1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1).astype(global_dtype)
 
     # estimate_bandwidth with default args returns 0 on this dataset
     bandwidth = estimate_bandwidth(X)
@@ -220,7 +213,7 @@ def test_mean_shift_zero_bandwidth(dtype):
     # to no binning.
     ms_binning = MeanShift(bin_seeding=True, bandwidth=None).fit(X)
     ms_nobinning = MeanShift(bin_seeding=False).fit(X)
-    expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2], dtype=dtype)
+    expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2], dtype=global_dtype)
 
     assert v_measure_score(ms_binning.labels_, expected_labels) == 1
     assert v_measure_score(ms_nobinning.labels_, expected_labels) == 1

From 70cde82a3233c0ae3a70389e0c8e77b6ba90833b Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 17 Mar 2022 16:06:46 +0100
Subject: [PATCH 08/10] Address review comments

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/cluster/tests/test_mean_shift.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index eeb86b1131319..860ad477e1530 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -109,10 +109,11 @@ def test_parallel(global_dtype):
 
 def test_meanshift_predict(global_dtype):
     # Test MeanShift.predict
+    global X
     ms = MeanShift(bandwidth=1.2)
-    Y = X.astype(global_dtype)
-    labels = ms.fit_predict(Y)
-    labels2 = ms.predict(Y)
+    X = X.astype(global_dtype)
+    labels = ms.fit_predict(X)
+    labels2 = ms.predict(X)
     assert_array_equal(labels, labels2)
 
 
@@ -213,8 +214,8 @@ def test_mean_shift_zero_bandwidth(global_dtype):
     # to no binning.
     ms_binning = MeanShift(bin_seeding=True, bandwidth=None).fit(X)
     ms_nobinning = MeanShift(bin_seeding=False).fit(X)
-    expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2], dtype=global_dtype)
+    expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2])
 
-    assert v_measure_score(ms_binning.labels_, expected_labels) == 1
-    assert v_measure_score(ms_nobinning.labels_, expected_labels) == 1
+    assert v_measure_score(ms_binning.labels_, expected_labels) == pytest.approx(1)
+    assert v_measure_score(ms_nobinning.labels_, expected_labels) == pytest.approx(1)
     assert_allclose(ms_binning.cluster_centers_, ms_nobinning.cluster_centers_)

From 063b93508b7ea2a07d14eae2cb1862821999386d Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 23 Mar 2022 12:17:36 +0100
Subject: [PATCH 09/10] Assert review comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger <jeremiedbb@users.noreply.github.com>
---
 sklearn/cluster/tests/test_mean_shift.py      | 39 +++++++++++--------
 .../tests/test_mutual_info.py                 | 10 +++--
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index 860ad477e1530..6e2651c406e33 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -10,7 +10,6 @@
 from scipy import sparse
 
 from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_allclose
 
 from sklearn.cluster import MeanShift
@@ -42,8 +41,12 @@ def test_estimate_bandwidth():
 def test_estimate_bandwidth_1sample(global_dtype):
     # Test estimate_bandwidth when n_samples=1 and quantile<1, so that
     # n_neighbors is set to 1.
-    bandwidth = estimate_bandwidth(X.astype(global_dtype), n_samples=1, quantile=0.3)
-    assert bandwidth == pytest.approx(0.0, abs=1e-5)
+    bandwidth = estimate_bandwidth(
+        X.astype(global_dtype, copy=False), n_samples=1, quantile=0.3
+    )
+
+    assert bandwidth.dtype == X.dtype
+    assert_allclose(bandwidth, 0.0, atol=1e-5)
 
 
 @pytest.mark.parametrize(
@@ -54,14 +57,15 @@ def test_mean_shift(
     global_dtype, bandwidth, cluster_all, expected, first_cluster_label
 ):
     # Test MeanShift algorithm
+    X_ = X.astype(global_dtype, copy=False)
     ms = MeanShift(bandwidth=bandwidth, cluster_all=cluster_all)
-    labels = ms.fit(X.astype(global_dtype)).labels_
+    labels = ms.fit(X_).labels_
     labels_unique = np.unique(labels)
     n_clusters_ = len(labels_unique)
     assert n_clusters_ == expected
     assert labels_unique[0] == first_cluster_label
 
-    cluster_centers, labels_mean_shift = mean_shift(X, cluster_all=cluster_all)
+    cluster_centers, labels_mean_shift = mean_shift(X_, cluster_all=cluster_all)
     labels_mean_shift_unique = np.unique(labels_mean_shift)
     n_clusters_mean_shift = len(labels_mean_shift_unique)
     assert n_clusters_mean_shift == expected
@@ -95,7 +99,7 @@ def test_parallel(global_dtype):
         random_state=11,
     )
 
-    X = X.astype(global_dtype)
+    X = X.astype(global_dtype, copy=False)
 
     ms1 = MeanShift(n_jobs=2)
     ms1.fit(X)
@@ -103,17 +107,16 @@ def test_parallel(global_dtype):
     ms2 = MeanShift()
     ms2.fit(X)
 
-    assert_array_almost_equal(ms1.cluster_centers_, ms2.cluster_centers_)
+    assert_allclose(ms1.cluster_centers_, ms2.cluster_centers_)
     assert_array_equal(ms1.labels_, ms2.labels_)
 
 
 def test_meanshift_predict(global_dtype):
     # Test MeanShift.predict
-    global X
     ms = MeanShift(bandwidth=1.2)
-    X = X.astype(global_dtype)
-    labels = ms.fit_predict(X)
-    labels2 = ms.predict(X)
+    X_ = X.astype(global_dtype, copy=False)
+    labels = ms.fit_predict(X_)
+    labels2 = ms.predict(X_)
     assert_array_equal(labels, labels2)
 
 
@@ -171,7 +174,7 @@ def test_bin_seeds(global_dtype):
     # we bail and use the whole data here.
     with warnings.catch_warnings(record=True):
         test_bins = get_bin_seeds(X, 0.01, 1)
-    assert_array_almost_equal(test_bins, X)
+    assert_allclose(test_bins, X)
 
     # tight clusters around [0, 0] and [1, 1], only get two bins
     X, _ = make_blobs(
@@ -181,7 +184,7 @@ def test_bin_seeds(global_dtype):
         cluster_std=0.1,
         random_state=0,
     )
-    X = X.astype(global_dtype)
+    X = X.astype(global_dtype, copy=False)
     test_bins = get_bin_seeds(X, 1)
     assert_array_equal(test_bins, [[0, 0], [1, 1]])
 
@@ -201,7 +204,11 @@ def test_max_iter(max_iter):
 
 def test_mean_shift_zero_bandwidth(global_dtype):
     # Check that mean shift works when the estimated bandwidth is 0.
-    X = np.array([1, 1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1).astype(global_dtype)
+    X = (
+        np.array([1, 1, 1, 2, 2, 2, 3, 3])
+        .reshape(-1, 1)
+        .astype(global_dtype, copy=False)
+    )
 
     # estimate_bandwidth with default args returns 0 on this dataset
     bandwidth = estimate_bandwidth(X)
@@ -216,6 +223,6 @@ def test_mean_shift_zero_bandwidth(global_dtype):
     ms_nobinning = MeanShift(bin_seeding=False).fit(X)
     expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2])
 
-    assert v_measure_score(ms_binning.labels_, expected_labels) == pytest.approx(1)
-    assert v_measure_score(ms_nobinning.labels_, expected_labels) == pytest.approx(1)
+    assert_allclose(v_measure_score(ms_binning.labels_, expected_labels), 1)
+    assert_allclose(v_measure_score(ms_nobinning.labels_, expected_labels), 1)
     assert_allclose(ms_binning.cluster_centers_, ms_nobinning.cluster_centers_)
diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index b038218366ed9..af2b733efd62d 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -119,7 +119,9 @@ def test_compute_mi_cd_unique_label(global_dtype):
 
 # We are going test that feature ordering by MI matches our expectations.
 def test_mutual_info_classif_discrete(global_dtype):
-    X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]])
+    X = np.array(
+        [[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=global_dtype
+    )
     y = np.array([0, 1, 2, 2, 1])
 
     # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
@@ -139,7 +141,7 @@ def test_mutual_info_regression(global_dtype):
     mean = np.zeros(4)
 
     rng = check_random_state(0)
-    Z = rng.multivariate_normal(mean, cov, size=1000).astype(global_dtype)
+    Z = rng.multivariate_normal(mean, cov, size=1000).astype(global_dtype, copy=False)
     X = Z[:, 1:]
     y = Z[:, 0]
 
@@ -154,7 +156,7 @@ def test_mutual_info_classif_mixed(global_dtype):
     # Here the target is discrete and there are two continuous and one
     # discrete feature. The idea of this test is clear from the code.
     rng = check_random_state(0)
-    X = rng.rand(1000, 3).astype(global_dtype)
+    X = rng.rand(1000, 3).astype(global_dtype, copy=False)
     X[:, 1] += X[:, 0]
     y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int)
     X[:, 2] = X[:, 2] > 0.5
@@ -204,4 +206,4 @@ def test_mutual_info_options(global_dtype):
         assert_allclose(mi_3, mi_4)
         assert_allclose(mi_5, mi_6)
 
-    assert not np.allclose(mi_1, mi_3)
+        assert not np.allclose(mi_1, mi_3)

From 8a95627de322535b9ca8820a04ebbba447e27e50 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 23 Mar 2022 18:11:14 +0100
Subject: [PATCH 10/10] TST Undo changes made to other files

---
 sklearn/cluster/tests/test_mean_shift.py | 59 +++++++++---------------
 1 file changed, 22 insertions(+), 37 deletions(-)

diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index 6e2651c406e33..cdd1134156173 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -10,6 +10,7 @@
 from scipy import sparse
 
 from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_allclose
 
 from sklearn.cluster import MeanShift
@@ -38,34 +39,27 @@ def test_estimate_bandwidth():
     assert 0.9 <= bandwidth <= 1.5
 
 
-def test_estimate_bandwidth_1sample(global_dtype):
+def test_estimate_bandwidth_1sample():
     # Test estimate_bandwidth when n_samples=1 and quantile<1, so that
     # n_neighbors is set to 1.
-    bandwidth = estimate_bandwidth(
-        X.astype(global_dtype, copy=False), n_samples=1, quantile=0.3
-    )
-
-    assert bandwidth.dtype == X.dtype
-    assert_allclose(bandwidth, 0.0, atol=1e-5)
+    bandwidth = estimate_bandwidth(X, n_samples=1, quantile=0.3)
+    assert bandwidth == pytest.approx(0.0, abs=1e-5)
 
 
 @pytest.mark.parametrize(
     "bandwidth, cluster_all, expected, first_cluster_label",
     [(1.2, True, 3, 0), (1.2, False, 4, -1)],
 )
-def test_mean_shift(
-    global_dtype, bandwidth, cluster_all, expected, first_cluster_label
-):
+def test_mean_shift(bandwidth, cluster_all, expected, first_cluster_label):
     # Test MeanShift algorithm
-    X_ = X.astype(global_dtype, copy=False)
     ms = MeanShift(bandwidth=bandwidth, cluster_all=cluster_all)
-    labels = ms.fit(X_).labels_
+    labels = ms.fit(X).labels_
     labels_unique = np.unique(labels)
     n_clusters_ = len(labels_unique)
     assert n_clusters_ == expected
     assert labels_unique[0] == first_cluster_label
 
-    cluster_centers, labels_mean_shift = mean_shift(X_, cluster_all=cluster_all)
+    cluster_centers, labels_mean_shift = mean_shift(X, cluster_all=cluster_all)
     labels_mean_shift_unique = np.unique(labels_mean_shift)
     n_clusters_mean_shift = len(labels_mean_shift_unique)
     assert n_clusters_mean_shift == expected
@@ -88,7 +82,7 @@ def test_estimate_bandwidth_with_sparse_matrix():
         estimate_bandwidth(X)
 
 
-def test_parallel(global_dtype):
+def test_parallel():
     centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
     X, _ = make_blobs(
         n_samples=50,
@@ -99,24 +93,21 @@ def test_parallel(global_dtype):
         random_state=11,
     )
 
-    X = X.astype(global_dtype, copy=False)
-
     ms1 = MeanShift(n_jobs=2)
     ms1.fit(X)
 
     ms2 = MeanShift()
     ms2.fit(X)
 
-    assert_allclose(ms1.cluster_centers_, ms2.cluster_centers_)
+    assert_array_almost_equal(ms1.cluster_centers_, ms2.cluster_centers_)
     assert_array_equal(ms1.labels_, ms2.labels_)
 
 
-def test_meanshift_predict(global_dtype):
+def test_meanshift_predict():
     # Test MeanShift.predict
     ms = MeanShift(bandwidth=1.2)
-    X_ = X.astype(global_dtype, copy=False)
-    labels = ms.fit_predict(X_)
-    labels2 = ms.predict(X_)
+    labels = ms.fit_predict(X)
+    labels2 = ms.predict(X)
     assert_array_equal(labels, labels2)
 
 
@@ -137,23 +128,22 @@ def test_unfitted():
     assert not hasattr(ms, "labels_")
 
 
-def test_cluster_intensity_tie(global_dtype):
-    X = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]], dtype=global_dtype)
+def test_cluster_intensity_tie():
+    X = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]])
     c1 = MeanShift(bandwidth=2).fit(X)
 
-    X = np.array([[4, 7], [3, 5], [3, 6], [1, 1], [2, 1], [1, 0]], dtype=global_dtype)
+    X = np.array([[4, 7], [3, 5], [3, 6], [1, 1], [2, 1], [1, 0]])
     c2 = MeanShift(bandwidth=2).fit(X)
     assert_array_equal(c1.labels_, [1, 1, 1, 0, 0, 0])
     assert_array_equal(c2.labels_, [0, 0, 0, 1, 1, 1])
 
 
-def test_bin_seeds(global_dtype):
+def test_bin_seeds():
     # Test the bin seeding technique which can be used in the mean shift
     # algorithm
     # Data is just 6 points in the plane
     X = np.array(
-        [[1.0, 1.0], [1.4, 1.4], [1.8, 1.2], [2.0, 1.0], [2.1, 1.1], [0.0, 0.0]],
-        dtype=global_dtype,
+        [[1.0, 1.0], [1.4, 1.4], [1.8, 1.2], [2.0, 1.0], [2.1, 1.1], [0.0, 0.0]]
     )
 
     # With a bin coarseness of 1.0 and min_bin_freq of 1, 3 bins should be
@@ -174,7 +164,7 @@ def test_bin_seeds(global_dtype):
     # we bail and use the whole data here.
     with warnings.catch_warnings(record=True):
         test_bins = get_bin_seeds(X, 0.01, 1)
-    assert_allclose(test_bins, X)
+    assert_array_almost_equal(test_bins, X)
 
     # tight clusters around [0, 0] and [1, 1], only get two bins
     X, _ = make_blobs(
@@ -184,7 +174,6 @@ def test_bin_seeds(global_dtype):
         cluster_std=0.1,
         random_state=0,
     )
-    X = X.astype(global_dtype, copy=False)
     test_bins = get_bin_seeds(X, 1)
     assert_array_equal(test_bins, [[0, 0], [1, 1]])
 
@@ -202,13 +191,9 @@ def test_max_iter(max_iter):
         assert np.allclose(c1, c2)
 
 
-def test_mean_shift_zero_bandwidth(global_dtype):
+def test_mean_shift_zero_bandwidth():
     # Check that mean shift works when the estimated bandwidth is 0.
-    X = (
-        np.array([1, 1, 1, 2, 2, 2, 3, 3])
-        .reshape(-1, 1)
-        .astype(global_dtype, copy=False)
-    )
+    X = np.array([1, 1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1)
 
     # estimate_bandwidth with default args returns 0 on this dataset
     bandwidth = estimate_bandwidth(X)
@@ -223,6 +208,6 @@ def test_mean_shift_zero_bandwidth(global_dtype):
     ms_nobinning = MeanShift(bin_seeding=False).fit(X)
     expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2])
 
-    assert_allclose(v_measure_score(ms_binning.labels_, expected_labels), 1)
-    assert_allclose(v_measure_score(ms_nobinning.labels_, expected_labels), 1)
+    assert v_measure_score(ms_binning.labels_, expected_labels) == 1
+    assert v_measure_score(ms_nobinning.labels_, expected_labels) == 1
     assert_allclose(ms_binning.cluster_centers_, ms_nobinning.cluster_centers_)