scikit-learn · wdevazelhes · Oct 4, 2016 · Oct 6, 2016 · Feb 25, 2019 · Feb 25, 2019
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
@@ -223,6 +223,16 @@ Changelog
   :class:`calibration.CalibratedClassifierCV can now properly be used on
   prefitted pipelines. :pr:`19641` by :user:`Alek Lefebvre <AlekLefebvre>`
 
+:mod:`sklearn.utils`
+....................
+
+- |Enhancement| Improve common tests in `estimator_checks` to ensure that
+  estimators give the same result when given sparse and dense inputs.
+  :pr:`13246` by :user:`Maniteja Nandana <maniteja123>`,
+  :user:`William de Vazelhes <wdevazelhes>`,
+  :user:`Alexandre Gramfort <agramfort>`, and
+  :user:`Jérôme Dockès <jeromedockes>`.
+
 Code and Documentation Contributors
 -----------------------------------
 

diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
@@ -448,7 +448,7 @@ def predict(self, X):
             Cluster labels.
         """
         check_is_fitted(self)
-        X = self._validate_data(X, reset=False)
+        X = self._validate_data(X, reset=False, accept_sparse=True)
         if not hasattr(self, "cluster_centers_"):
             raise ValueError("Predict method is not supported when "
                              "affinity='precomputed'.")

diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py
@@ -350,6 +350,9 @@ def transform(self, X):
         if sparse.issparse(X):
             n_samples = X.shape[0]
             output = []
+            if type(X) in [sparse.bsr_matrix, sparse.coo_matrix,
+                           sparse.dia_matrix]:
+                X = X.tocsr(copy=True)
             for batch in gen_batches(n_samples, self.batch_size_,
                                      min_batch_size=self.n_components or 0):
                 output.append(super().transform(X[batch].toarray()))

diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
@@ -593,16 +593,20 @@ def _transform_dense(self, X):
         return np.hstack(X_new)
 
     def _transform_sparse(self, X):
-        indices = X.indices.copy()
-        indptr = X.indptr.copy()
-
-        data_step = np.sqrt(X.data * self.sample_interval_)
+        # We remove possible explicit zeros, which will lead to infinity
+        # in the log:
+        X_pruned = X.copy()
+        X_pruned.eliminate_zeros()
+
+        indices = X_pruned.indices.copy()
+        indptr = X_pruned.indptr.copy()
+        data_step = np.sqrt(X_pruned.data * self.sample_interval_)
         X_step = sp.csr_matrix((data_step, indices, indptr),
                                shape=X.shape, dtype=X.dtype, copy=False)
         X_new = [X_step]
 
-        log_step_nz = self.sample_interval_ * np.log(X.data)
-        step_nz = 2 * X.data * self.sample_interval_
+        log_step_nz = self.sample_interval_ * np.log(X_pruned.data)
+        step_nz = 2 * X_pruned.data * self.sample_interval_
 
         for j in range(1, self.sample_steps):
             factor_nz = np.sqrt(step_nz /

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
@@ -45,6 +45,7 @@
 # TODO: bayesian_ridge_regression and bayesian_regression_ard
 # should be squashed into its respective objects.
 
+DENSE_INTERCEPT_DECAY = 1.0
 SPARSE_INTERCEPT_DECAY = 0.01
 # For sparse data intercept updates are scaled by this decay factor to avoid
 # intercept oscillation.
@@ -186,7 +187,7 @@ def make_dataset(X, y, sample_weight, random_state=None):
     else:
         X = np.ascontiguousarray(X)
         dataset = ArrayData(X, y, sample_weight, seed=seed)
-        intercept_decay = 1.0
+        intercept_decay = DENSE_INTERCEPT_DECAY
 
     return dataset, intercept_decay
 

diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py
@@ -257,6 +257,23 @@ def fit(self, X, y, coef_init=None, intercept_init=None):
                          loss="hinge", learning_rate=lr,
                          coef_init=coef_init, intercept_init=intercept_init)
 
+    def _more_tags(self):
+        return {
+            '_xfail_checks': {
+                'check_estimator_sparse_dense':
+                    "PassiveAggressiveClassifier has a "
+                    "special intercept_decay for sparse inputs (see "
+                    "the constant "
+                    "`linear_model._base.SPARSE_INTERCEPT_DECAY`), "
+                    "which gives different results than the one for "
+                    "dense data. Therefore it is not tested in common "
+                    "tests but rather in `linear_model.test_sgd` (namely "
+                    "`test_sgd_sparse_dense_same_decay`), with the sparse "
+                    "intercept set to the same value between sparse and "
+                    "dense, on a toy example."
+            }
+        }
+
 
 class PassiveAggressiveRegressor(BaseSGDRegressor):
     """Passive Aggressive Regressor
@@ -468,3 +485,20 @@ def fit(self, X, y, coef_init=None, intercept_init=None):
                          learning_rate=lr,
                          coef_init=coef_init,
                          intercept_init=intercept_init)
+
+    def _more_tags(self):
+        return {
+            '_xfail_checks': {
+                'check_estimator_sparse_dense':
+                    "PassiveAggressiveRegressor has a "
+                    "special intercept_decay for sparse inputs (see "
+                    "the constant "
+                    "`linear_model._base.SPARSE_INTERCEPT_DECAY`), "
+                    "which gives different results than the one for "
+                    "dense data. Therefore it is not tested in common "
+                    "tests but rather in `linear_model.test_sgd` (namely "
+                    "`test_sgd_sparse_dense_same_decay`), with the sparse "
+                    "intercept set to the same value between sparse and "
+                    "dense, on a toy example."
+            }
+        }
diff --git a/sklearn/linear_model/_perceptron.py b/sklearn/linear_model/_perceptron.py
@@ -169,3 +169,20 @@ def __init__(self, *, penalty=None, alpha=0.0001, l1_ratio=0.15,
             validation_fraction=validation_fraction,
             n_iter_no_change=n_iter_no_change, power_t=0.5,
             warm_start=warm_start, class_weight=class_weight, n_jobs=n_jobs)
+
+    def _more_tags(self):
+        return {
+            '_xfail_checks': {
+                'check_estimator_sparse_dense':
+                    "Perceptron has a "
+                    "special intercept_decay for sparse inputs (see "
+                    "the constant "
+                    "`linear_model._base.SPARSE_INTERCEPT_DECAY`), "
+                    "which gives different results than the one for "
+                    "dense data. Therefore it is not tested in common "
+                    "tests but rather in `linear_model.test_sgd` (namely "
+                    "`test_sgd_sparse_dense_same_decay`), with the sparse "
+                    "intercept set to the same value between sparse and "
+                    "dense, on a toy example."
+            }
+        }
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
@@ -1111,6 +1111,17 @@ def _more_tags(self):
             '_xfail_checks': {
                 'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
+                'check_estimator_sparse_dense':
+                    "SGDClassifier has a "
+                    "special intercept_decay for sparse inputs (see "
+                    "the constant "
+                    "`linear_model._base.SPARSE_INTERCEPT_DECAY`), "
+                    "which gives different results than the one for "
+                    "dense data. Therefore it is not tested in common "
+                    "tests but rather in `linear_model.test_sgd` (namely "
+                    "`test_sgd_sparse_dense_same_decay`), with the sparse "
+                    "intercept set to the same value between sparse and "
+                    "dense, on a toy example."
             }
         }
 
@@ -1604,5 +1615,16 @@ def _more_tags(self):
             '_xfail_checks': {
                 'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
+                'check_estimator_sparse_dense':
+                    "SGDRegressor has a "
+                    "special intercept_decay for sparse inputs (see "
+                    "the constant "
+                    "`linear_model._base.SPARSE_INTERCEPT_DECAY`), "
+                    "which gives different results than the one for "
+                    "dense data. Therefore it is not tested in common "
+                    "tests but rather in `linear_model.test_sgd` (namely "
+                    "`test_sgd_sparse_dense_same_decay`), with the sparse "
+                    "intercept set to the same value between sparse and "
+                    "dense, on a toy example."
             }
         }
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
@@ -10,6 +10,7 @@
 from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_raises_regexp
 from sklearn.utils._testing import ignore_warnings
+from sklearn.utils.estimator_checks import check_estimator_sparse_dense
 from sklearn.utils.fixes import parse_version
 
 from sklearn import linear_model, datasets, metrics
@@ -1646,3 +1647,28 @@ def test_SGDClassifier_fit_for_all_backends(backend):
     with joblib.parallel_backend(backend=backend):
         clf_parallel.fit(X, y)
     assert_array_almost_equal(clf_sequential.coef_, clf_parallel.coef_)
+
+
+@pytest.mark.parametrize('estimator_orig',
+                         [linear_model.SGDRegressor(),
+                          linear_model.PassiveAggressiveRegressor(),
+                          linear_model.SGDClassifier(),
+                          linear_model.Perceptron(),
+                          linear_model.PassiveAggressiveClassifier()])
+def test_sgd_sparse_dense_same_decay(estimator_orig):
+    """Tests that with default parameters, estimators that inherit from
+    `sklearn.linear_model._stochastic_gradient.BaseSGD`
+    return the same results on dense and sparse data. It's
+    tested here and not in common tests because for sparse data,
+    the "intercept decay" variable is set to a different value than for
+    dense data, which would give different results between sparse and
+    dense. Here we test that for toy examples, if this intercept
+    decay is set to the same value, the result is the same between
+    sparse and dense."""
+    old_dense_intercept_decay = linear_model._base.DENSE_INTERCEPT_DECAY
+    old_sparse_intercept_decay = linear_model._base.SPARSE_INTERCEPT_DECAY
+    linear_model._base.DENSE_INTERCEPT_DECAY = 0.01
+    linear_model._base.SPARSE_INTERCEPT_DECAY = 0.01
+    check_estimator_sparse_dense(None, estimator_orig)
+    linear_model._base.DENSE_INTERCEPT_DECAY = old_dense_intercept_decay
+    linear_model._base.SPARSE_INTERCEPT_DECAY = old_sparse_intercept_decay
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
@@ -953,7 +953,15 @@ def inverse_transform(self, X, copy=None):
 
     def _more_tags(self):
         return {'allow_nan': True,
-                'preserves_dtype': [np.float64, np.float32]}
+                'preserves_dtype': [np.float64, np.float32],
+                '_xfail_checks':
+                    {'check_estimator_sparse_dense':
+                     "Default StandardScaler doesn't support sparse "
+                     "inputs. But StandardScaler is tested on sparse "
+                     "data in `preprocessing.tests.test_data."
+                     "test_scaler_without_centering`."
+                     }
+                }
 
 
 class MaxAbsScaler(TransformerMixin, BaseEstimator):
@@ -1459,7 +1467,14 @@ def inverse_transform(self, X):
         return X
 
     def _more_tags(self):
-        return {'allow_nan': True}
+        return {'allow_nan': True,
+                '_xfail_checks':
+                    {'check_estimator_sparse_dense':
+                     "Default RobustScaler don't support sparse inputs. "
+                     "But RobustScaler is tested on sparse data in "
+                     "`preprocessing.tests.test_data."
+                     "test_robust_scaler_equivalence_dense_sparse`."
+                     }}
 
 
 @_deprecate_positional_args