Skip to content

MAINT Parameters validation for covariance.oas #24904

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Dec 28, 2022
61 changes: 31 additions & 30 deletions sklearn/covariance/_shrunk_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,31 @@ def _ledoit_wolf(X, *, assume_centered, block_size):
return shrunk_cov, shrinkage


def _oas(X, *, assume_centered=False):
"""Estimate covariance with the Oracle Approximating Shrinkage algorithm."""
# for only one feature, the result is the same whatever the shrinkage
if len(X.shape) == 2 and X.shape[1] == 1:
if not assume_centered:
X = X - X.mean()
return np.atleast_2d((X**2).mean()), 0.0

n_samples, n_features = X.shape
Copy link
Contributor Author

@raghuveerbhat raghuveerbhat Nov 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the following check as this is moved inside the class

::Removed::

- if X.ndim == 1:
-     n_samples = 1
-     n_features = X.size
- else:
-     n_samples, n_features = X.shape

::Addition::

+     n_samples, n_features = X.shape

Please let me know if this is okay.


emp_cov = empirical_covariance(X, assume_centered=assume_centered)
mu = np.trace(emp_cov) / n_features

# formula from Chen et al.'s **implementation**
alpha = np.mean(emp_cov**2)
num = alpha + mu**2
den = (n_samples + 1.0) * (alpha - (mu**2) / n_features)

shrinkage = 1.0 if den == 0 else min(num / den, 1.0)
shrunk_cov = (1.0 - shrinkage) * emp_cov
shrunk_cov.flat[:: n_features + 1] += shrinkage * mu

return shrunk_cov, shrinkage


###############################################################################
# Public API
# ShrunkCovariance estimator
Expand Down Expand Up @@ -503,6 +528,7 @@ def fit(self, X, y=None):


# OAS estimator
@validate_params({"X": ["array-like"]})
def oas(X, *, assume_centered=False):
"""Estimate covariance with the Oracle Approximating Shrinkage algorithm.

Expand Down Expand Up @@ -537,35 +563,10 @@ def oas(X, *, assume_centered=False):
The formula we used to implement the OAS is slightly modified compared
to the one given in the article. See :class:`OAS` for more details.
"""
X = np.asarray(X)
# for only one feature, the result is the same whatever the shrinkage
if len(X.shape) == 2 and X.shape[1] == 1:
if not assume_centered:
X = X - X.mean()
return np.atleast_2d((X**2).mean()), 0.0
if X.ndim == 1:
X = np.reshape(X, (1, -1))
warnings.warn(
"Only one sample available. You may want to reshape your data array"
)
n_samples = 1
n_features = X.size
else:
n_samples, n_features = X.shape

emp_cov = empirical_covariance(X, assume_centered=assume_centered)
mu = np.trace(emp_cov) / n_features

# formula from Chen et al.'s **implementation**
alpha = np.mean(emp_cov**2)
num = alpha + mu**2
den = (n_samples + 1.0) * (alpha - (mu**2) / n_features)

shrinkage = 1.0 if den == 0 else min(num / den, 1.0)
shrunk_cov = (1.0 - shrinkage) * emp_cov
shrunk_cov.flat[:: n_features + 1] += shrinkage * mu

return shrunk_cov, shrinkage
estimator = OAS(
assume_centered=assume_centered,
).fit(X)
return estimator.covariance_, estimator.shrinkage_


class OAS(EmpiricalCovariance):
Expand Down Expand Up @@ -697,7 +698,7 @@ def fit(self, X, y=None):
else:
self.location_ = X.mean(0)

covariance, shrinkage = oas(X - self.location_, assume_centered=True)
covariance, shrinkage = _oas(X - self.location_, assume_centered=True)
self.shrinkage_ = shrinkage
self._set_covariance(covariance)

Expand Down
12 changes: 12 additions & 0 deletions sklearn/covariance/tests/test_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
)
from sklearn.covariance._shrunk_covariance import _ledoit_wolf

from .._shrunk_covariance import _oas

X, _ = datasets.load_diabetes(return_X_y=True)
X_1d = X[:, 0]
n_samples, n_features = X.shape
Expand Down Expand Up @@ -336,6 +338,16 @@ def test_oas():
assert_almost_equal(oa.score(X), score_, 4)
assert oa.precision_ is None

# test function _oas without assuming centered data
X_1f = X[:, 0:1]
oa = OAS()
oa.fit(X_1f)
# compare shrunk covariance obtained from data and from MLE estimate
_oa_cov_from_mle, _oa_shrinkage_from_mle = _oas(X_1f)
assert_array_almost_equal(_oa_cov_from_mle, oa.covariance_, 4)
assert_almost_equal(_oa_shrinkage_from_mle, oa.shrinkage_)
assert_array_almost_equal((X_1f**2).sum() / n_samples, oa.covariance_, 4)


def test_EmpiricalCovariance_validates_mahalanobis():
"""Checks that EmpiricalCovariance validates data with mahalanobis."""
Expand Down
1 change: 1 addition & 0 deletions sklearn/tests/test_public_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def test_function_param_validation(func_module):
PARAM_VALIDATION_CLASS_WRAPPER_LIST = [
("sklearn.decomposition.non_negative_factorization", "sklearn.decomposition.NMF"),
("sklearn.covariance.ledoit_wolf", "sklearn.covariance.LedoitWolf"),
("sklearn.covariance.oas", "sklearn.covariance.OAS"),
]


Expand Down