-
-
Notifications
You must be signed in to change notification settings - Fork 26.2k
FIX Run common tests on SparseCoder #32077
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f5380f5
f346d11
d8aeb0a
4ff1eb6
70e29b4
419fd84
efe5b9b
7204c5e
88beb6a
42f8be4
c9b1689
fbb9dbf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
- :class:`decomposition.SparseCoder` now follows the transformer API of scikit-learn. | ||
In addition, the :meth:`fit` method now validates the input and parameters. | ||
By :user:`François Paugam <FrancoisPgm>`. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -356,14 +356,11 @@ def sparse_encode( | |
[ 0., 1., 1., 0., 0.]]) | ||
""" | ||
if check_input: | ||
if algorithm == "lasso_cd": | ||
dictionary = check_array( | ||
dictionary, order="C", dtype=[np.float64, np.float32] | ||
) | ||
X = check_array(X, order="C", dtype=[np.float64, np.float32]) | ||
else: | ||
dictionary = check_array(dictionary) | ||
X = check_array(X) | ||
order = "C" if algorithm == "lasso_cd" else None | ||
dictionary = check_array( | ||
dictionary, order=order, dtype=[np.float64, np.float32] | ||
) | ||
X = check_array(X, order=order, dtype=[np.float64, np.float32]) | ||
|
||
if dictionary.shape[1] != X.shape[1]: | ||
raise ValueError( | ||
|
@@ -421,7 +418,7 @@ def _sparse_encode( | |
regularization = 1.0 | ||
|
||
if gram is None and algorithm != "threshold": | ||
gram = np.dot(dictionary, dictionary.T) | ||
gram = np.dot(dictionary, dictionary.T).astype(X.dtype, copy=False) | ||
|
||
if cov is None and algorithm != "lasso_cd": | ||
copy_cov = False | ||
|
@@ -1301,6 +1298,19 @@ class SparseCoder(_BaseSparseCoding, BaseEstimator): | |
[ 0., 1., 1., 0., 0.]]) | ||
""" | ||
|
||
_parameter_constraints: dict = { | ||
"dictionary": ["array-like"], | ||
"transform_algorithm": [ | ||
StrOptions({"lasso_lars", "lasso_cd", "lars", "omp", "threshold"}) | ||
], | ||
"transform_n_nonzero_coefs": [Interval(Integral, 1, None, closed="left"), None], | ||
"transform_alpha": [Interval(Real, 0, None, closed="left"), None], | ||
jeremiedbb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"split_sign": ["boolean"], | ||
"n_jobs": [Integral, None], | ||
"positive_code": ["boolean"], | ||
"transform_max_iter": [Interval(Integral, 0, None, closed="left")], | ||
jeremiedbb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
def __init__( | ||
self, | ||
dictionary, | ||
|
@@ -1324,16 +1334,17 @@ def __init__( | |
) | ||
self.dictionary = dictionary | ||
|
||
@_fit_context(prefer_skip_nested_validation=True) | ||
def fit(self, X, y=None): | ||
"""Do nothing and return the estimator unchanged. | ||
"""Only validate the parameters of the estimator. | ||
|
||
This method is just there to implement the usual API and hence | ||
work in pipelines. | ||
This method allows to: (i) validate the parameters of the estimator and | ||
(ii) be consistent with the scikit-learn transformer API. | ||
|
||
Parameters | ||
---------- | ||
X : Ignored | ||
Not used, present for API consistency by convention. | ||
X : array-like of shape (n_samples, n_features) | ||
Training data. Only used for input validation. | ||
|
||
y : Ignored | ||
Not used, present for API consistency by convention. | ||
|
@@ -1343,6 +1354,13 @@ def fit(self, X, y=None): | |
self : object | ||
Returns the instance itself. | ||
""" | ||
X = validate_data(self, X) | ||
self.n_components_ = self.dictionary.shape[0] | ||
if X.shape[1] != self.dictionary.shape[1]: | ||
raise ValueError( | ||
"Dictionary and X have different numbers of features:" | ||
f"dictionary.shape: {self.dictionary.shape} X.shape{X.shape}" | ||
) | ||
return self | ||
|
||
def transform(self, X, y=None): | ||
|
@@ -1353,7 +1371,7 @@ def transform(self, X, y=None): | |
|
||
Parameters | ||
---------- | ||
X : ndarray of shape (n_samples, n_features) | ||
X : array-like of shape (n_samples, n_features) | ||
Training vector, where `n_samples` is the number of samples | ||
and `n_features` is the number of features. | ||
|
||
|
@@ -1389,16 +1407,6 @@ def __sklearn_tags__(self): | |
tags.transformer_tags.preserves_dtype = ["float64", "float32"] | ||
return tags | ||
|
||
@property | ||
def n_components_(self): | ||
"""Number of atoms.""" | ||
return self.dictionary.shape[0] | ||
|
||
@property | ||
def n_features_in_(self): | ||
"""Number of features seen during `fit`.""" | ||
return self.dictionary.shape[1] | ||
|
||
Comment on lines
-1397
to
-1401
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note for reviewers: With this modification, |
||
@property | ||
def _n_features_out(self): | ||
"""Number of transformed output features.""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,8 @@ | |
from functools import partial | ||
from inspect import isfunction | ||
|
||
import numpy as np | ||
|
||
from sklearn import clone, config_context | ||
from sklearn.calibration import CalibratedClassifierCV | ||
from sklearn.cluster import ( | ||
|
@@ -177,6 +179,8 @@ | |
|
||
CROSS_DECOMPOSITION = ["PLSCanonical", "PLSRegression", "CCA", "PLSSVD"] | ||
|
||
rng = np.random.RandomState(0) | ||
|
||
# The following dictionary is to indicate constructor arguments suitable for the test | ||
# suite, which uses very small datasets, and is intended to run rather quickly. | ||
INIT_PARAMS = { | ||
|
@@ -441,6 +445,7 @@ | |
SGDClassifier: dict(max_iter=5), | ||
SGDOneClassSVM: dict(max_iter=5), | ||
SGDRegressor: dict(max_iter=5), | ||
SparseCoder: dict(dictionary=rng.normal(size=(5, 3))), | ||
SparsePCA: dict(max_iter=5), | ||
# Due to the jl lemma and often very few samples, the number | ||
# of components of the random matrix projection will be probably | ||
|
@@ -711,6 +716,38 @@ | |
], | ||
}, | ||
SkewedChi2Sampler: {"check_dict_unchanged": dict(n_components=1)}, | ||
SparseCoder: { | ||
"check_estimators_dtypes": dict(dictionary=rng.normal(size=(5, 5))), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a single case where we can set to have it pass all the tests? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
"check_dtype_object": dict(dictionary=rng.normal(size=(5, 10))), | ||
"check_transformers_unfitted_stateless": dict( | ||
dictionary=rng.normal(size=(5, 5)) | ||
), | ||
"check_fit_idempotent": dict(dictionary=rng.normal(size=(5, 2))), | ||
"check_transformer_preserve_dtypes": dict( | ||
dictionary=rng.normal(size=(5, 3)).astype(np.float32) | ||
), | ||
"check_set_output_transform": dict(dictionary=rng.normal(size=(5, 5))), | ||
"check_global_output_transform_pandas": dict( | ||
dictionary=rng.normal(size=(5, 5)) | ||
), | ||
"check_set_output_transform_pandas": dict(dictionary=rng.normal(size=(5, 5))), | ||
"check_set_output_transform_polars": dict(dictionary=rng.normal(size=(5, 5))), | ||
"check_global_set_output_transform_polars": dict( | ||
dictionary=rng.normal(size=(5, 5)) | ||
), | ||
"check_dataframe_column_names_consistency": dict( | ||
dictionary=rng.normal(size=(5, 8)) | ||
), | ||
"check_estimators_overwrite_params": dict(dictionary=rng.normal(size=(5, 2))), | ||
"check_estimators_fit_returns_self": dict(dictionary=rng.normal(size=(5, 2))), | ||
"check_readonly_memmap_input": dict(dictionary=rng.normal(size=(5, 2))), | ||
"check_n_features_in_after_fitting": dict(dictionary=rng.normal(size=(5, 4))), | ||
"check_fit_check_is_fitted": dict(dictionary=rng.normal(size=(5, 2))), | ||
"check_n_features_in": dict(dictionary=rng.normal(size=(5, 2))), | ||
"check_positive_only_tag_during_fit": dict(dictionary=rng.normal(size=(5, 4))), | ||
"check_fit2d_1sample": dict(dictionary=rng.normal(size=(5, 10))), | ||
"check_fit2d_1feature": dict(dictionary=rng.normal(size=(5, 1))), | ||
}, | ||
SparsePCA: {"check_dict_unchanged": dict(max_iter=5, n_components=1)}, | ||
SparseRandomProjection: {"check_dict_unchanged": dict(n_components=1)}, | ||
SpectralBiclustering: { | ||
|
@@ -748,7 +785,7 @@ def _tested_estimators(type_filter=None): | |
yield estimator | ||
|
||
|
||
SKIPPED_ESTIMATORS = [SparseCoder, FrozenEstimator] | ||
SKIPPED_ESTIMATORS = [FrozenEstimator] | ||
|
||
|
||
def _construct_instances(Estimator): | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please reference yourself as author of the PR: "By :user:`your name <your handle>`".