diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py index 7cc25c3ddd642..af2b733efd62d 100644 --- a/sklearn/feature_selection/tests/test_mutual_info.py +++ b/sklearn/feature_selection/tests/test_mutual_info.py @@ -5,7 +5,6 @@ from sklearn.utils import check_random_state from sklearn.utils._testing import ( assert_array_equal, - assert_almost_equal, assert_allclose, ) from sklearn.feature_selection._mutual_info import _compute_mi @@ -22,7 +21,7 @@ def test_compute_mi_dd(): H_xy = -1 / 5 * np.log(1 / 5) - 2 / 5 * np.log(2 / 5) - 2 / 5 * np.log(2 / 5) I_xy = H_x + H_y - H_xy - assert_almost_equal(_compute_mi(x, y, True, True), I_xy) + assert_allclose(_compute_mi(x, y, x_discrete=True, y_discrete=True), I_xy) def test_compute_mi_cc(global_dtype): @@ -54,11 +53,13 @@ def test_compute_mi_cc(global_dtype): # Theory and computed values won't be very close # We here check with a large relative tolerance for n_neighbors in [3, 5, 7]: - I_computed = _compute_mi(x, y, False, False, n_neighbors) + I_computed = _compute_mi( + x, y, x_discrete=False, y_discrete=False, n_neighbors=n_neighbors + ) assert_allclose(I_computed, I_theory, rtol=1e-1) -def test_compute_mi_cd(): +def test_compute_mi_cd(global_dtype): # To test define a joint distribution as follows: # p(x, y) = p(x) p(y | x) # X ~ Bernoulli(p) @@ -80,7 +81,7 @@ def test_compute_mi_cd(): for p in [0.3, 0.5, 0.7]: x = rng.uniform(size=n_samples) > p - y = np.empty(n_samples) + y = np.empty(n_samples, global_dtype) mask = x == 0 y[mask] = rng.uniform(-1, 1, size=np.sum(mask)) y[~mask] = rng.uniform(0, 2, size=np.sum(~mask)) @@ -91,32 +92,36 @@ def test_compute_mi_cd(): # Assert the same tolerance. for n_neighbors in [3, 5, 7]: - I_computed = _compute_mi(x, y, True, False, n_neighbors) - assert_almost_equal(I_computed, I_theory, 1) + I_computed = _compute_mi( + x, y, x_discrete=True, y_discrete=False, n_neighbors=n_neighbors + ) + assert_allclose(I_computed, I_theory, rtol=1e-1) -def test_compute_mi_cd_unique_label(): +def test_compute_mi_cd_unique_label(global_dtype): # Test that adding unique label doesn't change MI. n_samples = 100 x = np.random.uniform(size=n_samples) > 0.5 - y = np.empty(n_samples) + y = np.empty(n_samples, global_dtype) mask = x == 0 y[mask] = np.random.uniform(-1, 1, size=np.sum(mask)) y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask)) - mi_1 = _compute_mi(x, y, True, False) + mi_1 = _compute_mi(x, y, x_discrete=True, y_discrete=False) x = np.hstack((x, 2)) y = np.hstack((y, 10)) - mi_2 = _compute_mi(x, y, True, False) + mi_2 = _compute_mi(x, y, x_discrete=True, y_discrete=False) - assert mi_1 == mi_2 + assert_allclose(mi_1, mi_2) # We are going test that feature ordering by MI matches our expectations. -def test_mutual_info_classif_discrete(): - X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]]) +def test_mutual_info_classif_discrete(global_dtype): + X = np.array( + [[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=global_dtype + ) y = np.array([0, 1, 2, 2, 1]) # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly @@ -125,7 +130,7 @@ def test_mutual_info_classif_discrete(): assert_array_equal(np.argsort(-mi), np.array([0, 2, 1])) -def test_mutual_info_regression(): +def test_mutual_info_regression(global_dtype): # We generate sample from multivariate normal distribution, using # transformation from initially uncorrelated variables. The zero # variables after transformation is selected as the target vector, @@ -136,19 +141,22 @@ def test_mutual_info_regression(): mean = np.zeros(4) rng = check_random_state(0) - Z = rng.multivariate_normal(mean, cov, size=1000) + Z = rng.multivariate_normal(mean, cov, size=1000).astype(global_dtype, copy=False) X = Z[:, 1:] y = Z[:, 0] mi = mutual_info_regression(X, y, random_state=0) assert_array_equal(np.argsort(-mi), np.array([1, 2, 0])) + # XXX: should mutual_info_regression be fixed to avoid + # up-casting float32 inputs to float64? + assert mi.dtype == np.float64 -def test_mutual_info_classif_mixed(): +def test_mutual_info_classif_mixed(global_dtype): # Here the target is discrete and there are two continuous and one # discrete feature. The idea of this test is clear from the code. rng = check_random_state(0) - X = rng.rand(1000, 3) + X = rng.rand(1000, 3).astype(global_dtype, copy=False) X[:, 1] += X[:, 0] y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int) X[:, 2] = X[:, 2] > 0.5 @@ -168,9 +176,11 @@ def test_mutual_info_classif_mixed(): assert mi_nn[2] == mi[2] -def test_mutual_info_options(): - X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=float) - y = np.array([0, 1, 2, 2, 1], dtype=float) +def test_mutual_info_options(global_dtype): + X = np.array( + [[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=global_dtype + ) + y = np.array([0, 1, 2, 2, 1], dtype=global_dtype) X_csr = csr_matrix(X) for mutual_info in (mutual_info_regression, mutual_info_classif): @@ -192,8 +202,8 @@ def test_mutual_info_options(): mi_5 = mutual_info(X, y, discrete_features=[True, False, True], random_state=0) mi_6 = mutual_info(X, y, discrete_features=[0, 2], random_state=0) - assert_array_equal(mi_1, mi_2) - assert_array_equal(mi_3, mi_4) - assert_array_equal(mi_5, mi_6) + assert_allclose(mi_1, mi_2) + assert_allclose(mi_3, mi_4) + assert_allclose(mi_5, mi_6) - assert not np.allclose(mi_1, mi_3) + assert not np.allclose(mi_1, mi_3)