diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index 72f5101f4d753..bba1a6216bb56 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -18,11 +18,33 @@ import numpy as np from . import empirical_covariance, EmpiricalCovariance -from .._config import config_context from ..utils import check_array -from ..utils._param_validation import Interval +from ..utils._param_validation import Interval, validate_params +def _ledoit_wolf(X, *, assume_centered, block_size): + """Estimate the shrunk Ledoit-Wolf covariance matrix.""" + # for only one feature, the result is the same whatever the shrinkage + if len(X.shape) == 2 and X.shape[1] == 1: + if not assume_centered: + X = X - X.mean() + return np.atleast_2d((X**2).mean()), 0.0 + n_features = X.shape[1] + + # get Ledoit-Wolf shrinkage + shrinkage = ledoit_wolf_shrinkage( + X, assume_centered=assume_centered, block_size=block_size + ) + emp_cov = empirical_covariance(X, assume_centered=assume_centered) + mu = np.sum(np.trace(emp_cov)) / n_features + shrunk_cov = (1.0 - shrinkage) * emp_cov + shrunk_cov.flat[:: n_features + 1] += shrinkage * mu + + return shrunk_cov, shrinkage + + +############################################################################### +# Public API # ShrunkCovariance estimator @@ -288,6 +310,7 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000): return shrinkage +@validate_params({"X": ["array-like"]}) def ledoit_wolf(X, *, assume_centered=False, block_size=1000): """Estimate the shrunk Ledoit-Wolf covariance matrix. @@ -325,31 +348,13 @@ def ledoit_wolf(X, *, assume_centered=False, block_size=1000): where mu = trace(cov) / n_features """ - X = check_array(X) - # for only one feature, the result is the same whatever the shrinkage - if len(X.shape) == 2 and X.shape[1] == 1: - if not assume_centered: - X = X - X.mean() - return np.atleast_2d((X**2).mean()), 0.0 - if X.ndim == 1: - X = np.reshape(X, (1, -1)) - warnings.warn( - "Only one sample available. You may want to reshape your data array" - ) - n_features = X.size - else: - _, n_features = X.shape + estimator = LedoitWolf( + assume_centered=assume_centered, + block_size=block_size, + store_precision=False, + ).fit(X) - # get Ledoit-Wolf shrinkage - shrinkage = ledoit_wolf_shrinkage( - X, assume_centered=assume_centered, block_size=block_size - ) - emp_cov = empirical_covariance(X, assume_centered=assume_centered) - mu = np.sum(np.trace(emp_cov)) / n_features - shrunk_cov = (1.0 - shrinkage) * emp_cov - shrunk_cov.flat[:: n_features + 1] += shrinkage * mu - - return shrunk_cov, shrinkage + return estimator.covariance_, estimator.shrinkage_ class LedoitWolf(EmpiricalCovariance): @@ -488,10 +493,9 @@ def fit(self, X, y=None): self.location_ = np.zeros(X.shape[1]) else: self.location_ = X.mean(0) - with config_context(assume_finite=True): - covariance, shrinkage = ledoit_wolf( - X - self.location_, assume_centered=True, block_size=self.block_size - ) + covariance, shrinkage = _ledoit_wolf( + X - self.location_, assume_centered=True, block_size=self.block_size + ) self.shrinkage_ = shrinkage self._set_covariance(covariance) diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py index 6a9031d0fcb36..fb4eeb26138df 100644 --- a/sklearn/covariance/tests/test_covariance.py +++ b/sklearn/covariance/tests/test_covariance.py @@ -7,6 +7,7 @@ import numpy as np import pytest +from sklearn.utils._testing import assert_allclose from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import assert_array_equal @@ -23,6 +24,7 @@ OAS, oas, ) +from sklearn.covariance._shrunk_covariance import _ledoit_wolf X, _ = datasets.load_diabetes(return_X_y=True) X_1d = X[:, 0] @@ -158,6 +160,9 @@ def test_ledoit_wolf(): assert_almost_equal(lw.shrinkage_, shrinkage_, 4) assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X)) assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1]) + assert_almost_equal( + lw.shrinkage_, _ledoit_wolf(X=X, assume_centered=False, block_size=10000)[1] + ) assert_almost_equal(lw.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X) @@ -172,6 +177,10 @@ def test_ledoit_wolf(): X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf() lw.fit(X_1d) + assert_allclose( + X_1d.var(ddof=0), + _ledoit_wolf(X=X_1d, assume_centered=False, block_size=10000)[0], + ) lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_) diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index b42780077f1ce..3314cb372627d 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -130,6 +130,7 @@ def test_function_param_validation(func_module): PARAM_VALIDATION_CLASS_WRAPPER_LIST = [ ("sklearn.decomposition.non_negative_factorization", "sklearn.decomposition.NMF"), + ("sklearn.covariance.ledoit_wolf", "sklearn.covariance.LedoitWolf"), ]