From 06de5376a15aacf508286ae08ec36d0faf4e0d6b Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 13:52:33 +0200 Subject: [PATCH 001/116] first normalize changes --- sklearn/linear_model/_base.py | 17 +++++++++++++++-- sklearn/linear_model/tests/test_base.py | 10 +++++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 4ab797578dbde..1233d79c484d4 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -119,6 +119,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True, This is here because nearly all linear models will want their data to be centered. This function also systematically makes y consistent with X.dtype """ + if isinstance(sample_weight, numbers.Number): sample_weight = None if sample_weight is not None: @@ -409,6 +410,10 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. + .. deprecated:: 0.24 + ``normalize`` was deprecated in version 0.24 and will be removed in + 0.26. + copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. @@ -471,8 +476,8 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): array([16.]) """ @_deprecate_positional_args - def __init__(self, *, fit_intercept=True, normalize=False, copy_X=True, - n_jobs=None): + def __init__(self, *, fit_intercept=True, normalize='deprecate', + copy_X=True, n_jobs=None): self.fit_intercept = fit_intercept self.normalize = normalize self.copy_X = copy_X @@ -501,6 +506,12 @@ def fit(self, X, y, sample_weight=None): self : returns an instance of self. """ + if self.normalize != "deprecate": + warnings.warn("'normalize' was deprecated in version 0.24 and will" + " be removed in 0.26.", FutureWarning) + else: + self.normalize = False + n_jobs_ = self.n_jobs X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) @@ -578,6 +589,8 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, check_input=check_input, sample_weight=sample_weight) if sample_weight is not None: X, y = _rescale_data(X, y, sample_weight=sample_weight) + + # FIXME: 'normalize' to be removed in 0.26 if hasattr(precompute, '__array__') and ( fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features))): diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index af76826715241..b0c7167e48c13 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -153,6 +153,7 @@ def test_linear_regression_sparse(random_state=0): assert_array_almost_equal(ols.predict(X) - y.ravel(), 0) +# FIXME: 'normalize' to be removed in 0.26 @pytest.mark.parametrize('normalize', [True, False]) @pytest.mark.parametrize('fit_intercept', [True, False]) def test_linear_regression_sparse_equal_dense(normalize, fit_intercept): @@ -232,10 +233,13 @@ def test_linear_regression_pd_sparse_dataframe_warning(): assert hasattr(df, "sparse") with pytest.warns(None) as record: + reg = LinearRegression() reg.fit(df.iloc[:, 0:2], df.iloc[:, 3]) + assert not record +# FIXME: 'normalize' to be removed in 0.26 def test_preprocess_data(): n_samples = 200 n_features = 2 @@ -270,6 +274,7 @@ def test_preprocess_data(): assert_array_almost_equal(yt, y - expected_y_mean) +# FIXME: 'normalize' to be removed in 0.26 def test_preprocess_data_multioutput(): n_samples = 200 n_features = 3 @@ -296,6 +301,7 @@ def test_preprocess_data_multioutput(): assert_array_almost_equal(yt, y - y_mean) +# FIXME: 'normalize' to be removed in 0.26 def test_preprocess_data_weighted(): n_samples = 200 n_features = 2 @@ -329,6 +335,7 @@ def test_preprocess_data_weighted(): assert_array_almost_equal(yt, y - expected_y_mean) +# FIXME: 'normalize' to be removed in 0.26 def test_sparse_preprocess_data_with_return_mean(): n_samples = 200 n_features = 2 @@ -398,6 +405,7 @@ def test_preprocess_copy_data_no_checks(is_sparse, to_copy): assert np.may_share_memory(X_, X) +# FIXME: 'normalize' to be removed in 0.26 def test_dtype_preprocess_data(): n_samples = 200 n_features = 2 @@ -527,4 +535,4 @@ def test_fused_types_make_dataset(): assert_array_equal(xi_data_32, xicsr_data_32) assert_array_equal(xi_data_64, xicsr_data_64) assert_array_equal(yi_32, yicsr_32) - assert_array_equal(yi_64, yicsr_64) + assert_array_equal(yi_64, yicsr_64) \ No newline at end of file From d1c981677a05bef04071cb525db01c548eb118a8 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 13:55:12 +0200 Subject: [PATCH 002/116] exchanged setting self.normalize by _normalize --- sklearn/linear_model/_base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 1233d79c484d4..ae6528a512ab9 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -509,8 +509,9 @@ def fit(self, X, y, sample_weight=None): if self.normalize != "deprecate": warnings.warn("'normalize' was deprecated in version 0.24 and will" " be removed in 0.26.", FutureWarning) + _normalize = self.normalize else: - self.normalize = False + _normalize = False n_jobs_ = self.n_jobs X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'], @@ -521,7 +522,7 @@ def fit(self, X, y, sample_weight=None): dtype=X.dtype) X, y, X_offset, y_offset, X_scale = self._preprocess_data( - X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, + X, y, fit_intercept=self.fit_intercept, normalize=_normalize, copy=self.copy_X, sample_weight=sample_weight, return_mean=True) From 233a82e7a4f6f387c0cf2163413ef16c28938af9 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 14:16:58 +0200 Subject: [PATCH 003/116] updated the warning --- sklearn/linear_model/_base.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index ae6528a512ab9..265e362cc6024 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -507,8 +507,21 @@ def fit(self, X, y, sample_weight=None): """ if self.normalize != "deprecate": - warnings.warn("'normalize' was deprecated in version 0.24 and will" - " be removed in 0.26.", FutureWarning) + if not self.normalize: + warnings.warn("'normalize' was deprecated in version 0.24" + " and will be removed in 0.26.", FutureWarning) + else: + warnings.warn("'normalize' was deprecated in version 0.24" + " and will be removed in 0.26." + " If you wish to keep normalizing your data" + " consider using a pipeline: \n" + " from sklearn.preprocessing import" + " StandardScaler \n" + " from sklearn.pipeline import make_pipeline" + " model = make_pipeline( \n" + " StandardScaler(with_mean=False), \n" + " {}())".format(type(self).__name__), + FutureWarning) _normalize = self.normalize else: _normalize = False From 9369ed322b495e621c524ca08dee9912c5c31c70 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 14:24:11 +0200 Subject: [PATCH 004/116] clean up --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index b0c7167e48c13..ee6efbaf87722 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -535,4 +535,4 @@ def test_fused_types_make_dataset(): assert_array_equal(xi_data_32, xicsr_data_32) assert_array_equal(xi_data_64, xicsr_data_64) assert_array_equal(yi_32, yicsr_32) - assert_array_equal(yi_64, yicsr_64) \ No newline at end of file + assert_array_equal(yi_64, yicsr_64) From 523d58895c928d6283c129883f3713427708d5a7 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 15:23:08 +0200 Subject: [PATCH 005/116] added test if warnings do show up --- sklearn/linear_model/tests/test_base.py | 26 ++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index ee6efbaf87722..a937e0720da81 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -24,6 +24,7 @@ from sklearn.datasets import make_regression from sklearn.datasets import load_iris + rng = np.random.RandomState(0) rtol = 1e-6 @@ -153,7 +154,31 @@ def test_linear_regression_sparse(random_state=0): assert_array_almost_equal(ols.predict(X) - y.ravel(), 0) +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@pytest.mark.parametrize('normalize, n_warnings, warning', + [(True, 1, FutureWarning), + (False, 1, FutureWarning), + (None, 0, None)]) +def test_assure_warning_when_normalize(normalize, n_warnings, warning): + rng = check_random_state(0) + n_samples = 200 + n_features = 2 + X = rng.randn(n_samples, n_features) + X[X < 0.1] = 0. + y = rng.rand(n_samples) + params = dict() + if normalize is not None: + params['normalize'] = normalize + + clf = LinearRegression(**params) + + with pytest.warns(warning) as record: + clf.fit(X, y) + assert len(record) == n_warnings + + # FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize('normalize', [True, False]) @pytest.mark.parametrize('fit_intercept', [True, False]) def test_linear_regression_sparse_equal_dense(normalize, fit_intercept): @@ -239,7 +264,6 @@ def test_linear_regression_pd_sparse_dataframe_warning(): assert not record -# FIXME: 'normalize' to be removed in 0.26 def test_preprocess_data(): n_samples = 200 n_features = 2 From a7b742282937f94ca1aebbe97976a0cfe1775694 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 15:27:18 +0200 Subject: [PATCH 006/116] clean up --- sklearn/linear_model/tests/test_base.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index a937e0720da81..8bc2ff87fb050 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -298,7 +298,6 @@ def test_preprocess_data(): assert_array_almost_equal(yt, y - expected_y_mean) -# FIXME: 'normalize' to be removed in 0.26 def test_preprocess_data_multioutput(): n_samples = 200 n_features = 3 @@ -325,7 +324,6 @@ def test_preprocess_data_multioutput(): assert_array_almost_equal(yt, y - y_mean) -# FIXME: 'normalize' to be removed in 0.26 def test_preprocess_data_weighted(): n_samples = 200 n_features = 2 @@ -359,7 +357,6 @@ def test_preprocess_data_weighted(): assert_array_almost_equal(yt, y - expected_y_mean) -# FIXME: 'normalize' to be removed in 0.26 def test_sparse_preprocess_data_with_return_mean(): n_samples = 200 n_features = 2 @@ -429,7 +426,6 @@ def test_preprocess_copy_data_no_checks(is_sparse, to_copy): assert np.may_share_memory(X_, X) -# FIXME: 'normalize' to be removed in 0.26 def test_dtype_preprocess_data(): n_samples = 200 n_features = 2 From 15368a72094491bd82fcdbc5a0e033403c7437a2 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:11:05 +0200 Subject: [PATCH 007/116] change of the warning msg --- sklearn/linear_model/_base.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 265e362cc6024..b53cea3cca916 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -119,7 +119,6 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True, This is here because nearly all linear models will want their data to be centered. This function also systematically makes y consistent with X.dtype """ - if isinstance(sample_weight, numbers.Number): sample_weight = None if sample_weight is not None: @@ -513,11 +512,9 @@ def fit(self, X, y, sample_weight=None): else: warnings.warn("'normalize' was deprecated in version 0.24" " and will be removed in 0.26." - " If you wish to keep normalizing your data" - " consider using a pipeline: \n" - " from sklearn.preprocessing import" - " StandardScaler \n" - " from sklearn.pipeline import make_pipeline" + " If you wish to keep equivalent behaviour, use" + " Pipeline with a StandardScaler in a" + " preprocessing stage:" " model = make_pipeline( \n" " StandardScaler(with_mean=False), \n" " {}())".format(type(self).__name__), From 293682fbaeceb3ab71180d008ef9e74fe980a040 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:11:54 +0200 Subject: [PATCH 008/116] clean up --- sklearn/linear_model/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index b53cea3cca916..2a49fc0c6d3df 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -512,8 +512,8 @@ def fit(self, X, y, sample_weight=None): else: warnings.warn("'normalize' was deprecated in version 0.24" " and will be removed in 0.26." - " If you wish to keep equivalent behaviour, use" - " Pipeline with a StandardScaler in a" + " If you wish to keep an equivalent behaviour," + " use Pipeline with a StandardScaler in a" " preprocessing stage:" " model = make_pipeline( \n" " StandardScaler(with_mean=False), \n" From 6258d1c8a5f356dbe944fae453b196601a3fcad2 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:14:12 +0200 Subject: [PATCH 009/116] updated warning msg --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 2a49fc0c6d3df..880867378cc7b 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -517,7 +517,7 @@ def fit(self, X, y, sample_weight=None): " preprocessing stage:" " model = make_pipeline( \n" " StandardScaler(with_mean=False), \n" - " {}())".format(type(self).__name__), + " {type(self).__name__}())", FutureWarning) _normalize = self.normalize else: From 2f4d60a0ab136b247f8922de8083e93191bc556f Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:17:24 +0200 Subject: [PATCH 010/116] updated warning msg --- sklearn/linear_model/_base.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 880867378cc7b..547c9f797353c 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -507,18 +507,20 @@ def fit(self, X, y, sample_weight=None): if self.normalize != "deprecate": if not self.normalize: - warnings.warn("'normalize' was deprecated in version 0.24" - " and will be removed in 0.26.", FutureWarning) + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26.", FutureWarning + ) else: - warnings.warn("'normalize' was deprecated in version 0.24" - " and will be removed in 0.26." - " If you wish to keep an equivalent behaviour," - " use Pipeline with a StandardScaler in a" - " preprocessing stage:" - " model = make_pipeline( \n" - " StandardScaler(with_mean=False), \n" - " {type(self).__name__}())", - FutureWarning) + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26. If you wish to keep an equivalent" + " behaviour, use Pipeline with a StandardScaler in a" + " preprocessing stage:" + " model = make_pipeline( \n" + " StandardScaler(with_mean=False), \n" + " {type(self).__name__}())", FutureWarning + ) _normalize = self.normalize else: _normalize = False From 582532a0164b722c0c90958c5a419e60e1762239 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:18:59 +0200 Subject: [PATCH 011/116] removed ignore warning from the test --- sklearn/linear_model/tests/test_base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 8bc2ff87fb050..6378e9eae838b 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -24,7 +24,6 @@ from sklearn.datasets import make_regression from sklearn.datasets import load_iris - rng = np.random.RandomState(0) rtol = 1e-6 @@ -154,7 +153,6 @@ def test_linear_regression_sparse(random_state=0): assert_array_almost_equal(ols.predict(X) - y.ravel(), 0) -@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize('normalize, n_warnings, warning', [(True, 1, FutureWarning), (False, 1, FutureWarning), From 428f1fabbed2392ddf674ea23e2d09c04f539c8b Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:20:08 +0200 Subject: [PATCH 012/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 6378e9eae838b..42aa6074c4f43 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -153,10 +153,12 @@ def test_linear_regression_sparse(random_state=0): assert_array_almost_equal(ols.predict(X) - y.ravel(), 0) -@pytest.mark.parametrize('normalize, n_warnings, warning', - [(True, 1, FutureWarning), - (False, 1, FutureWarning), - (None, 0, None)]) +@pytest.mark.parametrize( + 'normalize, n_warnings, warning', + [(True, 1, FutureWarning), + (False, 1, FutureWarning), + ("deprecate", 0, None)] +) def test_assure_warning_when_normalize(normalize, n_warnings, warning): rng = check_random_state(0) n_samples = 200 From a93d367d08953d1572635348f8fd87bb1621af4f Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:23:07 +0200 Subject: [PATCH 013/116] cleaning up the test --- sklearn/linear_model/tests/test_base.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 42aa6074c4f43..432c35db682bf 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -167,13 +167,10 @@ def test_assure_warning_when_normalize(normalize, n_warnings, warning): X[X < 0.1] = 0. y = rng.rand(n_samples) params = dict() - if normalize is not None: - params['normalize'] = normalize - - clf = LinearRegression(**params) + model = LinearRegression(normalize=normalize) with pytest.warns(warning) as record: - clf.fit(X, y) + model.fit(X, y) assert len(record) == n_warnings From 0e89ab9ae50856300e0826c8a409fd1d7659c2d0 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:23:53 +0200 Subject: [PATCH 014/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 432c35db682bf..7ba8cf1500152 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -160,6 +160,7 @@ def test_linear_regression_sparse(random_state=0): ("deprecate", 0, None)] ) def test_assure_warning_when_normalize(normalize, n_warnings, warning): + # check that we issue a FutureWarning when normalize was set rng = check_random_state(0) n_samples = 200 n_features = 2 From 97c6221a0bcfe93ec63b634901bcc269c83ca1ad Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:27:45 +0200 Subject: [PATCH 015/116] cleaning up the test --- sklearn/linear_model/tests/test_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 432c35db682bf..6b45cac5426dd 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -166,7 +166,6 @@ def test_assure_warning_when_normalize(normalize, n_warnings, warning): X = rng.randn(n_samples, n_features) X[X < 0.1] = 0. y = rng.rand(n_samples) - params = dict() model = LinearRegression(normalize=normalize) with pytest.warns(warning) as record: From 0b3e5b5c41041257df41a50d6c9fc78a8fa598de Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 17:56:02 +0200 Subject: [PATCH 016/116] updated tests in test_coordinate_descent --- sklearn/linear_model/tests/test_coordinate_descent.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 4963444300552..c63a512b86849 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -270,6 +270,8 @@ def test_lasso_cv_positive_constraint(): assert min(clf_constrained.coef_) >= 0 +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( "LinearModel, params", [(Lasso, {"tol": 1e-16, "alpha": 0.1}), @@ -353,6 +355,8 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): assert_allclose(y_pred_normalize, y_pred_standardize) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( "LinearModel, params", [(Lasso, {"tol": 1e-16, "alpha": 0.1}), From 7fc22eec942bead9a76a70fc0ece96dfe126f64f Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 29 Jun 2020 10:01:26 +0200 Subject: [PATCH 017/116] removed with_mean=False from standardScaler --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 547c9f797353c..a3a0b5e1398dc 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -518,7 +518,7 @@ def fit(self, X, y, sample_weight=None): " behaviour, use Pipeline with a StandardScaler in a" " preprocessing stage:" " model = make_pipeline( \n" - " StandardScaler(with_mean=False), \n" + " StandardScaler(), \n" " {type(self).__name__}())", FutureWarning ) _normalize = self.normalize From 86bb2efbccc8fde04918f90bb4aff61f3a00e343 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 29 Jun 2020 10:54:54 +0200 Subject: [PATCH 018/116] added private function _deprecate_normalize(normalize, default) to call warnings --- sklearn/linear_model/_base.py | 45 ++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index a3a0b5e1398dc..e58348fdc4d4a 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -46,6 +46,31 @@ # intercept oscillation. +def _deprecate_normalize(normalize, default): + if normalize == 'deprecate': + _normalize = default + else: + _normalize = normalize + + if default or (normalize != 'deprecate' and normalize): + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26. If you wish to keep an equivalent" + " behaviour, use Pipeline with a StandardScaler in a" + " preprocessing stage:" + " model = make_pipeline( \n" + " StandardScaler(), \n" + " {type(self).__name__}())", FutureWarning + ) + elif (normalize != 'deprecate' and not normalize): + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26.", FutureWarning + ) + + return _normalize + + def make_dataset(X, y, sample_weight, random_state=None): """Create ``Dataset`` abstraction for sparse and dense inputs. @@ -505,25 +530,7 @@ def fit(self, X, y, sample_weight=None): self : returns an instance of self. """ - if self.normalize != "deprecate": - if not self.normalize: - warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26.", FutureWarning - ) - else: - warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. If you wish to keep an equivalent" - " behaviour, use Pipeline with a StandardScaler in a" - " preprocessing stage:" - " model = make_pipeline( \n" - " StandardScaler(), \n" - " {type(self).__name__}())", FutureWarning - ) - _normalize = self.normalize - else: - _normalize = False + _normalize = _deprecate_normalize(self.normalize, default=False) n_jobs_ = self.n_jobs X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'], From 2e5237782363f56bddab91997f97acbdf3f47080 Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 30 Jun 2020 10:53:09 +0200 Subject: [PATCH 019/116] added whats new --- doc/whats_new/v0.24.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 60bf06cef8d02..fa50a53c8dd07 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -139,6 +139,17 @@ Changelog :pr:`16289` by :user:`Masashi Kishimoto ` and :user:`Olivier Grisel `. +:mod:`sklearn.linear_model` +....................... + +- |API|: The parameter ``normalize`` is deprecated in 0.24 and will be removed + in 0.26. The use of pipeline with StandardScaler is recommended instead. + ``normalize`` parameter did not take any effect if ``fit_intercept`` was set + to False and therefore was deemed confusing. + :class:`linear_model.LinearRegression` + :pr:`17743` by :user:`Maria Telenczuk ` and "user"`Guillaume Lemaitre + ` and :user:`Alexandre Gramfort `. + :mod:`sklearn.metrics` ...................... @@ -211,7 +222,7 @@ Changelog - |Enhancement| Avoid converting float32 input to float64 in :class:`neural_network.BernoulliRBM`. - :pr:`16352` by :user:`Arthur Imbert `. + :pr:`16352` by :user:`Arthur Imbert `. :mod:`sklearn.preprocessing` ............................ From c8d929506229ec222cb43b530718e5e259c3c94a Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 30 Jun 2020 11:04:59 +0200 Subject: [PATCH 020/116] corrected the text in whats new --- doc/whats_new/v0.24.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index fa50a53c8dd07..db856d092b1c3 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -147,7 +147,7 @@ Changelog ``normalize`` parameter did not take any effect if ``fit_intercept`` was set to False and therefore was deemed confusing. :class:`linear_model.LinearRegression` - :pr:`17743` by :user:`Maria Telenczuk ` and "user"`Guillaume Lemaitre + :pr:`17743` by :user:`Maria Telenczuk ` and :user:`Guillaume Lemaitre ` and :user:`Alexandre Gramfort `. :mod:`sklearn.metrics` From 5a3008ba852d5b3a8e34eef391a33b7b675c55a3 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 15:22:53 +0200 Subject: [PATCH 021/116] extended underline of the title --- doc/whats_new/v0.24.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index f6b3a8afb6039..c4dfbb1095e4f 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -148,7 +148,7 @@ Changelog :user:`Olivier Grisel `. :mod:`sklearn.linear_model` -....................... +........................... - |API|: The parameter ``normalize`` is deprecated in 0.24 and will be removed in 0.26. The use of pipeline with StandardScaler is recommended instead. From 45b42e3d565904c6280b5c4afea6908d68f3b247 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 16:36:33 +0200 Subject: [PATCH 022/116] Update doc/whats_new/v0.24.rst Co-authored-by: Nicolas Hug --- doc/whats_new/v0.24.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index c4dfbb1095e4f..11f35c011564a 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -150,7 +150,7 @@ Changelog :mod:`sklearn.linear_model` ........................... -- |API|: The parameter ``normalize`` is deprecated in 0.24 and will be removed +- |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression` is deprecated and will be removed in 0.26. The use of pipeline with StandardScaler is recommended instead. ``normalize`` parameter did not take any effect if ``fit_intercept`` was set to False and therefore was deemed confusing. From afc9c019c04761d33f775af3ae8b1bf6e8830b04 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 16:36:52 +0200 Subject: [PATCH 023/116] Update doc/whats_new/v0.24.rst Co-authored-by: Nicolas Hug --- doc/whats_new/v0.24.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 11f35c011564a..6eb8cf2d24972 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -154,7 +154,6 @@ Changelog in 0.26. The use of pipeline with StandardScaler is recommended instead. ``normalize`` parameter did not take any effect if ``fit_intercept`` was set to False and therefore was deemed confusing. - :class:`linear_model.LinearRegression` :pr:`17743` by :user:`Maria Telenczuk ` and :user:`Guillaume Lemaitre ` and :user:`Alexandre Gramfort `. From 63f93ccca2f583121dc26602f580be8ac5a776ce Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 16:37:14 +0200 Subject: [PATCH 024/116] Update doc/whats_new/v0.24.rst Co-authored-by: Nicolas Hug --- doc/whats_new/v0.24.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 6eb8cf2d24972..251d2005749d1 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -151,7 +151,7 @@ Changelog ........................... - |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression` is deprecated and will be removed - in 0.26. The use of pipeline with StandardScaler is recommended instead. + in 0.26. The use of :class:`~sklearn.pipeline.Pipeline` with :class:`~sklearn.preprocessing.StandardScaler` is recommended instead. ``normalize`` parameter did not take any effect if ``fit_intercept`` was set to False and therefore was deemed confusing. :pr:`17743` by :user:`Maria Telenczuk ` and :user:`Guillaume Lemaitre From ca413e18758258bbb016c84b27c026185aba5ec5 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 16:42:40 +0200 Subject: [PATCH 025/116] update the test to init LinearRegression outside of the pytest.warns --- sklearn/linear_model/tests/test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index a10558486c619..d1d124c374d70 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -246,8 +246,9 @@ def test_linear_regression_pd_sparse_dataframe_warning(): df[str(col)] = arr msg = "pandas.DataFrame with sparse columns found." + + reg = LinearRegression() with pytest.warns(UserWarning, match=msg): - reg = LinearRegression() reg.fit(df.iloc[:, 0:2], df.iloc[:, 3]) # does not warn when the whole dataframe is sparse @@ -255,7 +256,6 @@ def test_linear_regression_pd_sparse_dataframe_warning(): assert hasattr(df, "sparse") with pytest.warns(None) as record: - reg = LinearRegression() reg.fit(df.iloc[:, 0:2], df.iloc[:, 3]) assert not record From e546ec10df6c6b38782e3e84b03d8d4581cae2be Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 16:44:28 +0200 Subject: [PATCH 026/116] Update sklearn/linear_model/_base.py Co-authored-by: Nicolas Hug --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index e58348fdc4d4a..82e6e9c89ed2d 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -500,7 +500,7 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): array([16.]) """ @_deprecate_positional_args - def __init__(self, *, fit_intercept=True, normalize='deprecate', + def __init__(self, *, fit_intercept=True, normalize='deprecated', copy_X=True, n_jobs=None): self.fit_intercept = fit_intercept self.normalize = normalize From 3886e0278ab1020e0ba0d51d43f0aed5ecf9c00f Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 16:47:09 +0200 Subject: [PATCH 027/116] changed the warning message --- sklearn/linear_model/_base.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index e58348fdc4d4a..49b6efbb9ee14 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -55,12 +55,11 @@ def _deprecate_normalize(normalize, default): if default or (normalize != 'deprecate' and normalize): warnings.warn( "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. If you wish to keep an equivalent" - " behaviour, use Pipeline with a StandardScaler in a" + " removed in 0.26. If you still wish to normalize use Pipeline " + " with a StandardScaler in a" " preprocessing stage:" - " model = make_pipeline( \n" - " StandardScaler(), \n" - " {type(self).__name__}())", FutureWarning + " model = make_pipeline(StandardScaler()," + " {type(self).__name__}())", FutureWarning ) elif (normalize != 'deprecate' and not normalize): warnings.warn( From 4e4afe7b92c9f6b8f1dd5e9f3bf11f6645fbfd34 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 16:48:06 +0200 Subject: [PATCH 028/116] changed deprecate for deprecated --- sklearn/linear_model/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 2ee4fdb420eab..06c4b0274f492 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -47,12 +47,12 @@ def _deprecate_normalize(normalize, default): - if normalize == 'deprecate': + if normalize == 'deprecated': _normalize = default else: _normalize = normalize - if default or (normalize != 'deprecate' and normalize): + if default or (normalize != 'deprecated' and normalize): warnings.warn( "'normalize' was deprecated in version 0.24 and will be" " removed in 0.26. If you still wish to normalize use Pipeline " @@ -61,7 +61,7 @@ def _deprecate_normalize(normalize, default): " model = make_pipeline(StandardScaler()," " {type(self).__name__}())", FutureWarning ) - elif (normalize != 'deprecate' and not normalize): + elif (normalize != 'deprecated' and not normalize): warnings.warn( "'normalize' was deprecated in version 0.24 and will be" " removed in 0.26.", FutureWarning From e29932d081fff792375d05848a11879d4e3fc409 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 16:50:03 +0200 Subject: [PATCH 029/116] changed deprecate for deprecated --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index d1d124c374d70..d2a082b87cbb5 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -157,7 +157,7 @@ def test_linear_regression_sparse(random_state=0): 'normalize, n_warnings, warning', [(True, 1, FutureWarning), (False, 1, FutureWarning), - ("deprecate", 0, None)] + ("deprecated", 0, None)] ) def test_assure_warning_when_normalize(normalize, n_warnings, warning): # check that we issue a FutureWarning when normalize was set From 7f32058b674a2ef283a8032183614bd2f79f70b1 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 1 Jul 2020 16:58:17 +0200 Subject: [PATCH 030/116] added checking if message correct in the test --- sklearn/linear_model/tests/test_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index d2a082b87cbb5..509da8baa3108 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -172,6 +172,8 @@ def test_assure_warning_when_normalize(normalize, n_warnings, warning): with pytest.warns(warning) as record: model.fit(X, y) assert len(record) == n_warnings + if n_warnings: + assert "'normalize' was deprecated" in str(record[0].message) # FIXME: 'normalize' to be removed in 0.26 From 6d3ee7272d5b67a550f15026f38799568a53be9a Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 2 Jul 2020 14:25:56 +0200 Subject: [PATCH 031/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Nicolas Hug --- sklearn/linear_model/tests/test_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 509da8baa3108..571321f0307fe 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -159,6 +159,7 @@ def test_linear_regression_sparse(random_state=0): (False, 1, FutureWarning), ("deprecated", 0, None)] ) +# FIXME remove test in 0.26 def test_assure_warning_when_normalize(normalize, n_warnings, warning): # check that we issue a FutureWarning when normalize was set rng = check_random_state(0) From ad7a508ded3f72915573f9be058859ef62198bb0 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 2 Jul 2020 14:30:41 +0200 Subject: [PATCH 032/116] updated the whats new file --- doc/whats_new/v0.24.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 251d2005749d1..fbabf165a16b2 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -150,8 +150,10 @@ Changelog :mod:`sklearn.linear_model` ........................... -- |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression` is deprecated and will be removed - in 0.26. The use of :class:`~sklearn.pipeline.Pipeline` with :class:`~sklearn.preprocessing.StandardScaler` is recommended instead. +- |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression` + is deprecated and will be removed in 0.26. The use of + :class:`~sklearn.pipeline.Pipeline` with + :class:`~sklearn.preprocessing.StandardScaler` is recommended instead. ``normalize`` parameter did not take any effect if ``fit_intercept`` was set to False and therefore was deemed confusing. :pr:`17743` by :user:`Maria Telenczuk ` and :user:`Guillaume Lemaitre From 9cceafedc4c3732ed0e64fd13d662528592e0529 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 2 Jul 2020 14:32:25 +0200 Subject: [PATCH 033/116] updated warning message --- sklearn/linear_model/_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 06c4b0274f492..3bbe0bee1c1c5 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -64,7 +64,8 @@ def _deprecate_normalize(normalize, default): elif (normalize != 'deprecated' and not normalize): warnings.warn( "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26.", FutureWarning + " removed in 0.26. Don't set 'normalize' parameter and leave it to + " its default value", FutureWarning ) return _normalize From 798efccf516fe40e49ae78219666bfe3307093f6 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 2 Jul 2020 14:34:11 +0200 Subject: [PATCH 034/116] clean up --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 3bbe0bee1c1c5..ba05abadaa1fd 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -64,7 +64,7 @@ def _deprecate_normalize(normalize, default): elif (normalize != 'deprecated' and not normalize): warnings.warn( "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. Don't set 'normalize' parameter and leave it to + " removed in 0.26. Don't set 'normalize' parameter and leave it to" " its default value", FutureWarning ) From 3cbb7b08a4be2b7b1898f8ef92d9fa33f168b9e3 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 22 Jul 2020 12:30:24 +0200 Subject: [PATCH 035/116] checking for the version sklearn and gives the appropriate message if default True --- sklearn/linear_model/_base.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index ba05abadaa1fd..44ce65e6706c3 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -22,6 +22,7 @@ from scipy import linalg from scipy import sparse from scipy.special import expit +import sklearn from joblib import Parallel, delayed from ..base import (BaseEstimator, ClassifierMixin, RegressorMixin, @@ -32,7 +33,7 @@ from ..utils import check_random_state from ..utils.extmath import safe_sparse_dot from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale -from ..utils.fixes import sparse_lsqr +from ..utils.fixes import sparse_lsqr, parse_version from ..utils._seq_dataset import ArrayDataset32, CSRDataset32 from ..utils._seq_dataset import ArrayDataset64, CSRDataset64 from ..utils.validation import check_is_fitted, _check_sample_weight @@ -52,7 +53,22 @@ def _deprecate_normalize(normalize, default): else: _normalize = normalize - if default or (normalize != 'deprecated' and normalize): + if default: + assert parse_version(sklearn.__version__) < parse_version('0.26'), ( + "default of 'normalize' should now be set to False" + ) + + if default and parse_version(sklearn.__version__) < parse_version('0.26'): + warnings.warn( + " default of 'normalize' will be set to False and it will be" + " deprecated in version 0.26. It will be removed in version 0.28" + " If you wish to normalize use Pipeline " + " with a StandardScaler in a" + " preprocessing stage:" + " model = make_pipeline(StandardScaler()," + " {type(self).__name__}())", FutureWarning + ) + elif normalize != 'deprecated' and normalize: warnings.warn( "'normalize' was deprecated in version 0.24 and will be" " removed in 0.26. If you still wish to normalize use Pipeline " @@ -61,7 +77,7 @@ def _deprecate_normalize(normalize, default): " model = make_pipeline(StandardScaler()," " {type(self).__name__}())", FutureWarning ) - elif (normalize != 'deprecated' and not normalize): + elif normalize != 'deprecated' and not normalize: warnings.warn( "'normalize' was deprecated in version 0.24 and will be" " removed in 0.26. Don't set 'normalize' parameter and leave it to" From d8ee96c771c9303f42cf7205b547b1e81203f66c Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 22 Jul 2020 15:10:55 +0200 Subject: [PATCH 036/116] added tests for test_deprecate_normalize --- sklearn/linear_model/_base.py | 31 ++++++++++---- sklearn/linear_model/tests/test_base.py | 55 +++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 44ce65e6706c3..610abae8f838e 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -47,6 +47,7 @@ # intercept oscillation. +# FIXME remove function in 0.28 def _deprecate_normalize(normalize, default): if normalize == 'deprecated': _normalize = default @@ -58,30 +59,42 @@ def _deprecate_normalize(normalize, default): "default of 'normalize' should now be set to False" ) - if default and parse_version(sklearn.__version__) < parse_version('0.26'): + if normalize != 'deprecated' and default and \ + parse_version(sklearn.__version__) < parse_version('0.26'): warnings.warn( " default of 'normalize' will be set to False and it will be" " deprecated in version 0.26. It will be removed in version 0.28" - " If you wish to normalize use Pipeline " + " If you wish to normalize use Pipeline" " with a StandardScaler in a" " preprocessing stage:" " model = make_pipeline(StandardScaler()," " {type(self).__name__}())", FutureWarning ) elif normalize != 'deprecated' and normalize: + if parse_version(sklearn.__version__) < parse_version('0.26'): + depr = '0.24' + remove = '0.26' + else: + depr = '0.26' + remove = '0.28' warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. If you still wish to normalize use Pipeline " - " with a StandardScaler in a" - " preprocessing stage:" + f"'normalize' was deprecated in version {depr} and will be" + f" removed in {remove}. If you still wish to normalize use" + " Pipeline with a StandardScaler in a preprocessing stage:" " model = make_pipeline(StandardScaler()," " {type(self).__name__}())", FutureWarning ) elif normalize != 'deprecated' and not normalize: + if parse_version(sklearn.__version__) < parse_version('0.26'): + depr = '0.24' + remove = '0.26' + else: + depr = '0.26' + remove = '0.28' warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. Don't set 'normalize' parameter and leave it to" - " its default value", FutureWarning + f"'normalize' was deprecated in version {depr} and will be" + f" removed in {remove}. Don't set 'normalize' parameter" + " and leave it to its default value", FutureWarning ) return _normalize diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 571321f0307fe..5c844e6402538 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -9,6 +9,7 @@ from scipy import sparse from scipy import linalg +import sklearn from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import assert_almost_equal @@ -16,6 +17,7 @@ from sklearn.utils.fixes import parse_version from sklearn.linear_model import LinearRegression +from sklearn.linear_model._base import _deprecate_normalize from sklearn.linear_model._base import _preprocess_data from sklearn.linear_model._base import _rescale_data from sklearn.linear_model._base import make_dataset @@ -137,6 +139,59 @@ def test_fit_intercept(): lr3_without_intercept.coef_.ndim) +@pytest.fixture(scope='function') +# FIXME remove fixture in 0.28 +def set_correct_version(): + present_version = sklearn.__version__ + yield set_correct_version # provide the fixture value + sklearn.__version__ = present_version + + +@pytest.mark.parametrize('normalize', [True, False, 'deprecated']) +@pytest.mark.parametrize('default', [True, False]) +@pytest.mark.parametrize('version', ['0.24', '0.26']) +# FIXME remove test in 0.28 +def test_deprecate_normalize(set_correct_version, normalize, default, version): + + if version == '0.26' and default: + output = normalize + expected = AssertionError + warning_msg = 'should now be set to False' + elif normalize == 'deprecated': + output = default + expected = None + warning_msg = '' + elif (version == '0.24' and default) or \ + (not default and version == '0.26'): + output = normalize + expected = FutureWarning + warning_msg = '0.28' + elif not default and version == '0.24': + output = normalize + expected = FutureWarning + warning_msg = '0.24' + + sklearn.__version__ = version + + if expected == AssertionError: + with pytest.raises(AssertionError) as record: + normalize = _deprecate_normalize(normalize, default) + assert warning_msg in str(record.value) + assert normalize == output + else: + with pytest.warns(expected) as record: + normalize = _deprecate_normalize(normalize, default) + assert normalize == output + + if expected is None: + n_warnings = 0 + else: + n_warnings = 1 + assert len(record) == n_warnings + if n_warnings: + assert warning_msg in str(record[0].message) + + def test_linear_regression_sparse(random_state=0): # Test that linear regression also works with sparse data random_state = check_random_state(random_state) From 63424b77469d9a9eda6d3797a7bc1ac04fe60081 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 24 Jul 2020 13:35:58 +0200 Subject: [PATCH 037/116] updated all the messages depending on the value of normalize, default and current sklearn version + the test --- sklearn/linear_model/_base.py | 14 +++--- sklearn/linear_model/tests/test_base.py | 59 ++++++++++++++++--------- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 610abae8f838e..b2bfcefb62756 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -58,19 +58,17 @@ def _deprecate_normalize(normalize, default): assert parse_version(sklearn.__version__) < parse_version('0.26'), ( "default of 'normalize' should now be set to False" ) - - if normalize != 'deprecated' and default and \ + if default and normalize == 'deprecated' and \ parse_version(sklearn.__version__) < parse_version('0.26'): warnings.warn( - " default of 'normalize' will be set to False and it will be" - " deprecated in version 0.26. It will be removed in version 0.28" - " If you wish to normalize use Pipeline" - " with a StandardScaler in a" + " default of 'normalize' will be set to False in version 0.26 and" + " deprecated afterwards." + " Pass normalize=False and use Pipeline with a StandardScaler in a" " preprocessing stage:" " model = make_pipeline(StandardScaler()," " {type(self).__name__}())", FutureWarning ) - elif normalize != 'deprecated' and normalize: + elif normalize != 'deprecated' and normalize and not default: if parse_version(sklearn.__version__) < parse_version('0.26'): depr = '0.24' remove = '0.26' @@ -84,7 +82,7 @@ def _deprecate_normalize(normalize, default): " model = make_pipeline(StandardScaler()," " {type(self).__name__}())", FutureWarning ) - elif normalize != 'deprecated' and not normalize: + elif normalize != 'deprecated' and not normalize and not default: if parse_version(sklearn.__version__) < parse_version('0.26'): depr = '0.24' remove = '0.26' diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 5c844e6402538..977f8df9473a5 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -152,36 +152,52 @@ def set_correct_version(): @pytest.mark.parametrize('version', ['0.24', '0.26']) # FIXME remove test in 0.28 def test_deprecate_normalize(set_correct_version, normalize, default, version): - - if version == '0.26' and default: + if not default: + if normalize == 'deprecated': + # no warning + output = default + expected = None + warning_msg = [] + else: + output = normalize + expected = FutureWarning + warning_msg = ['0.26'] + if not normalize: + warning_msg.append('default value') + else: + warning_msg.append('StandardScaler()') + if version == '0.24': + warning_msg.append('0.24') + elif version == '0.26': + warning_msg.append('0.28') + elif default and version == '0.24': + if normalize == 'deprecated': + # warning to pass False and use StandardScaler + output = default + expected = FutureWarning + warning_msg = ['False', '0.26', 'StandardScaler()'] + else: + # no warning + output = normalize + expected = None + warning_msg = [] + elif default and version == '0.26': + # assertion error. From v0.26 there should be no normalize set to True output = normalize expected = AssertionError warning_msg = 'should now be set to False' - elif normalize == 'deprecated': - output = default - expected = None - warning_msg = '' - elif (version == '0.24' and default) or \ - (not default and version == '0.26'): - output = normalize - expected = FutureWarning - warning_msg = '0.28' - elif not default and version == '0.24': - output = normalize - expected = FutureWarning - warning_msg = '0.24' sklearn.__version__ = version if expected == AssertionError: with pytest.raises(AssertionError) as record: - normalize = _deprecate_normalize(normalize, default) - assert warning_msg in str(record.value) - assert normalize == output + _normalize = _deprecate_normalize(normalize, default) + assert all(warning in str(record.value) for warning + in warning_msg) else: with pytest.warns(expected) as record: - normalize = _deprecate_normalize(normalize, default) - assert normalize == output + _normalize = _deprecate_normalize(normalize, default) + assert _normalize == output if expected is None: n_warnings = 0 @@ -189,7 +205,8 @@ def test_deprecate_normalize(set_correct_version, normalize, default, version): n_warnings = 1 assert len(record) == n_warnings if n_warnings: - assert warning_msg in str(record[0].message) + assert all([warning in str(record[0].message) for + warning in warning_msg]) def test_linear_regression_sparse(random_state=0): From 117059e7ee86c4745635214dfbdd22402e48eb89 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 24 Jul 2020 14:29:23 +0200 Subject: [PATCH 038/116] Update sklearn/linear_model/_base.py Co-authored-by: Alexandre Gramfort --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index b2bfcefb62756..8430a7dae34c2 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -62,7 +62,7 @@ def _deprecate_normalize(normalize, default): parse_version(sklearn.__version__) < parse_version('0.26'): warnings.warn( " default of 'normalize' will be set to False in version 0.26 and" - " deprecated afterwards." + " deprecated in version 0.28." " Pass normalize=False and use Pipeline with a StandardScaler in a" " preprocessing stage:" " model = make_pipeline(StandardScaler()," From 5ae8ece2c53123c036735d264ebf084682afaaf1 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 24 Jul 2020 15:11:06 +0200 Subject: [PATCH 039/116] removed functionality from version 0.26 up\ ; --- sklearn/linear_model/_base.py | 29 +++++++------------------ sklearn/linear_model/tests/test_base.py | 15 ++++--------- 2 files changed, 12 insertions(+), 32 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 8430a7dae34c2..51cc01ee02c1c 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -47,17 +47,16 @@ # intercept oscillation. -# FIXME remove function in 0.28 +# FIXME in 0.26: variable 'normalize' should be removed from linear models +# where now normalize=False. default value of 'normalize' should be changed to +# False in linear models where now normalize=True and should be deprecated +# change this function accordingly def _deprecate_normalize(normalize, default): if normalize == 'deprecated': _normalize = default else: _normalize = normalize - if default: - assert parse_version(sklearn.__version__) < parse_version('0.26'), ( - "default of 'normalize' should now be set to False" - ) if default and normalize == 'deprecated' and \ parse_version(sklearn.__version__) < parse_version('0.26'): warnings.warn( @@ -69,29 +68,17 @@ def _deprecate_normalize(normalize, default): " {type(self).__name__}())", FutureWarning ) elif normalize != 'deprecated' and normalize and not default: - if parse_version(sklearn.__version__) < parse_version('0.26'): - depr = '0.24' - remove = '0.26' - else: - depr = '0.26' - remove = '0.28' warnings.warn( - f"'normalize' was deprecated in version {depr} and will be" - f" removed in {remove}. If you still wish to normalize use" + f"'normalize' was deprecated in version 0.24 and will be" + f" removed in 0.26. If you still wish to normalize use" " Pipeline with a StandardScaler in a preprocessing stage:" " model = make_pipeline(StandardScaler()," " {type(self).__name__}())", FutureWarning ) elif normalize != 'deprecated' and not normalize and not default: - if parse_version(sklearn.__version__) < parse_version('0.26'): - depr = '0.24' - remove = '0.26' - else: - depr = '0.26' - remove = '0.28' warnings.warn( - f"'normalize' was deprecated in version {depr} and will be" - f" removed in {remove}. Don't set 'normalize' parameter" + f"'normalize' was deprecated in version 0.24 and will be" + f" removed in 0.26. Don't set 'normalize' parameter" " and leave it to its default value", FutureWarning ) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 977f8df9473a5..cefc5a56ec338 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -149,8 +149,8 @@ def set_correct_version(): @pytest.mark.parametrize('normalize', [True, False, 'deprecated']) @pytest.mark.parametrize('default', [True, False]) -@pytest.mark.parametrize('version', ['0.24', '0.26']) -# FIXME remove test in 0.28 +@pytest.mark.parametrize('version', ['0.24', '0.25']) +# FIXME update test in 0.26 for new versions def test_deprecate_normalize(set_correct_version, normalize, default, version): if not default: if normalize == 'deprecated': @@ -166,11 +166,9 @@ def test_deprecate_normalize(set_correct_version, normalize, default, version): warning_msg.append('default value') else: warning_msg.append('StandardScaler()') - if version == '0.24': + if version == '0.24' or version == '0.25': warning_msg.append('0.24') - elif version == '0.26': - warning_msg.append('0.28') - elif default and version == '0.24': + elif default and (version == '0.24' or version == '0.25'): if normalize == 'deprecated': # warning to pass False and use StandardScaler output = default @@ -181,11 +179,6 @@ def test_deprecate_normalize(set_correct_version, normalize, default, version): output = normalize expected = None warning_msg = [] - elif default and version == '0.26': - # assertion error. From v0.26 there should be no normalize set to True - output = normalize - expected = AssertionError - warning_msg = 'should now be set to False' sklearn.__version__ = version From 3aec6153dcda25e4ae9043e85bacb68898d6196d Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 24 Jul 2020 15:12:18 +0200 Subject: [PATCH 040/116] clean up --- sklearn/linear_model/_base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 51cc01ee02c1c..5f97e2f6cff86 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -69,16 +69,16 @@ def _deprecate_normalize(normalize, default): ) elif normalize != 'deprecated' and normalize and not default: warnings.warn( - f"'normalize' was deprecated in version 0.24 and will be" - f" removed in 0.26. If you still wish to normalize use" + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26. If you still wish to normalize use" " Pipeline with a StandardScaler in a preprocessing stage:" " model = make_pipeline(StandardScaler()," " {type(self).__name__}())", FutureWarning ) elif normalize != 'deprecated' and not normalize and not default: warnings.warn( - f"'normalize' was deprecated in version 0.24 and will be" - f" removed in 0.26. Don't set 'normalize' parameter" + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26. Don't set 'normalize' parameter" " and leave it to its default value", FutureWarning ) From ff6bec1c9a0cdb38ff0b80c4db2909ff9c5d6627 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 24 Jul 2020 16:17:21 +0200 Subject: [PATCH 041/116] added relative import to _base.py --- sklearn/linear_model/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 5f97e2f6cff86..657812fc357a8 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -22,9 +22,9 @@ from scipy import linalg from scipy import sparse from scipy.special import expit -import sklearn from joblib import Parallel, delayed +from .. import __version__ from ..base import (BaseEstimator, ClassifierMixin, RegressorMixin, MultiOutputMixin) from ..utils import check_array @@ -58,7 +58,7 @@ def _deprecate_normalize(normalize, default): _normalize = normalize if default and normalize == 'deprecated' and \ - parse_version(sklearn.__version__) < parse_version('0.26'): + parse_version(__version__) < parse_version('0.26'): warnings.warn( " default of 'normalize' will be set to False in version 0.26 and" " deprecated in version 0.28." From 5170b2c1f013a9501e9d612fe0e0bc6b246c98c9 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 5 Aug 2020 12:37:59 +0200 Subject: [PATCH 042/116] removed checking for version of sklearn in _base.py --- sklearn/linear_model/_base.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 657812fc357a8..e64ed3919ae04 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -24,7 +24,6 @@ from scipy.special import expit from joblib import Parallel, delayed -from .. import __version__ from ..base import (BaseEstimator, ClassifierMixin, RegressorMixin, MultiOutputMixin) from ..utils import check_array @@ -33,7 +32,7 @@ from ..utils import check_random_state from ..utils.extmath import safe_sparse_dot from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale -from ..utils.fixes import sparse_lsqr, parse_version +from ..utils.fixes import sparse_lsqr from ..utils._seq_dataset import ArrayDataset32, CSRDataset32 from ..utils._seq_dataset import ArrayDataset64, CSRDataset64 from ..utils.validation import check_is_fitted, _check_sample_weight @@ -48,17 +47,15 @@ # FIXME in 0.26: variable 'normalize' should be removed from linear models -# where now normalize=False. default value of 'normalize' should be changed to -# False in linear models where now normalize=True and should be deprecated -# change this function accordingly +# in cases where now normalize=False. default value of 'normalize' should be +# changed to False in linear models where now normalize=True def _deprecate_normalize(normalize, default): if normalize == 'deprecated': _normalize = default else: _normalize = normalize - if default and normalize == 'deprecated' and \ - parse_version(__version__) < parse_version('0.26'): + if default and normalize == 'deprecated': warnings.warn( " default of 'normalize' will be set to False in version 0.26 and" " deprecated in version 0.28." From ac18d63b49f684b1ffaa23664c094afb17262eec Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 5 Aug 2020 12:41:28 +0200 Subject: [PATCH 043/116] removed checking for the version from the test --- sklearn/linear_model/tests/test_base.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index cefc5a56ec338..44930ae77dc65 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -9,7 +9,6 @@ from scipy import sparse from scipy import linalg -import sklearn from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import assert_almost_equal @@ -139,19 +138,10 @@ def test_fit_intercept(): lr3_without_intercept.coef_.ndim) -@pytest.fixture(scope='function') -# FIXME remove fixture in 0.28 -def set_correct_version(): - present_version = sklearn.__version__ - yield set_correct_version # provide the fixture value - sklearn.__version__ = present_version - - @pytest.mark.parametrize('normalize', [True, False, 'deprecated']) @pytest.mark.parametrize('default', [True, False]) -@pytest.mark.parametrize('version', ['0.24', '0.25']) # FIXME update test in 0.26 for new versions -def test_deprecate_normalize(set_correct_version, normalize, default, version): +def test_deprecate_normalize(normalize, default): if not default: if normalize == 'deprecated': # no warning @@ -166,9 +156,7 @@ def test_deprecate_normalize(set_correct_version, normalize, default, version): warning_msg.append('default value') else: warning_msg.append('StandardScaler()') - if version == '0.24' or version == '0.25': - warning_msg.append('0.24') - elif default and (version == '0.24' or version == '0.25'): + elif default: if normalize == 'deprecated': # warning to pass False and use StandardScaler output = default @@ -180,8 +168,6 @@ def test_deprecate_normalize(set_correct_version, normalize, default, version): expected = None warning_msg = [] - sklearn.__version__ = version - if expected == AssertionError: with pytest.raises(AssertionError) as record: _normalize = _deprecate_normalize(normalize, default) From 3ee028ebb29a314be64345c708c4ef9a8e359a86 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 28 Oct 2020 12:08:32 +0100 Subject: [PATCH 044/116] updated the message --- sklearn/linear_model/_base.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 5bc2ce012d9a5..dbcaa5f115118 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -11,6 +11,7 @@ # Lars Buitinck # Maryan Morel # Giorgio Patrini +# Maria Telenczuk # License: BSD 3 clause from abc import ABCMeta, abstractmethod @@ -51,7 +52,7 @@ # FIXME in 0.26: variable 'normalize' should be removed from linear models # in cases where now normalize=False. default value of 'normalize' should be # changed to False in linear models where now normalize=True -def _deprecate_normalize(normalize, default): +def _deprecate_normalize(normalize, default, est_name): if normalize == 'deprecated': _normalize = default else: @@ -61,18 +62,23 @@ def _deprecate_normalize(normalize, default): warnings.warn( " default of 'normalize' will be set to False in version 0.26 and" " deprecated in version 0.28." - " Pass normalize=False and use Pipeline with a StandardScaler in a" - " preprocessing stage:" + " \nPass normalize=False and use Pipeline with a StandardScaler in" + " a preprocessing stage:" " model = make_pipeline(StandardScaler()," - " {type(self).__name__}())", FutureWarning + f" {est_name}())", FutureWarning ) elif normalize != 'deprecated' and normalize and not default: warnings.warn( "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. If you still wish to normalize use" + " removed in 0.26. \nIf you still wish to normalize use" " Pipeline with a StandardScaler in a preprocessing stage:" " model = make_pipeline(StandardScaler()," - " {type(self).__name__}())", FutureWarning + f" {est_name}()). \nIf you used sample_weight you can" + " now pass it as follows:" + " model = make_pipeline(StandardScaler(" + "sample_weight=your_sample_weight)," + f" {est_name}(" + "sample_weight=your_sample_weight))", FutureWarning ) elif normalize != 'deprecated' and not normalize and not default: warnings.warn( @@ -551,7 +557,8 @@ def fit(self, X, y, sample_weight=None): self : returns an instance of self. """ - _normalize = _deprecate_normalize(self.normalize, default=False) + _normalize = _deprecate_normalize(self.normalize, default=False, + est_name=type(self).__name__) n_jobs_ = self.n_jobs From 73c3f70f850e99be505e76d5ddb0635267d6f300 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 28 Oct 2020 15:17:18 +0100 Subject: [PATCH 045/116] added parameter est_name --- sklearn/linear_model/tests/test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 6c7e9f79fc6fa..411f3ce81cc09 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -182,12 +182,12 @@ def test_deprecate_normalize(normalize, default): if expected == AssertionError: with pytest.raises(AssertionError) as record: - _normalize = _deprecate_normalize(normalize, default) + _normalize = _deprecate_normalize(normalize, default, 'estimator') assert all(warning in str(record.value) for warning in warning_msg) else: with pytest.warns(expected) as record: - _normalize = _deprecate_normalize(normalize, default) + _normalize = _deprecate_normalize(normalize, default, 'estimator') assert _normalize == output if expected is None: From 04dd2b25047672088a6f6760b8b6d1e1553c6da8 Mon Sep 17 00:00:00 2001 From: maikia Date: Wed, 28 Oct 2020 15:18:48 +0100 Subject: [PATCH 046/116] clean up; --- sklearn/linear_model/tests/test_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 411f3ce81cc09..9d8edd0c78e8a 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -107,6 +107,7 @@ def test_raises_value_error_if_positive_and_sparse(): with pytest.raises(TypeError, match=error_msg): reg.fit(X, y) + def test_raises_value_error_if_sample_weights_greater_than_1d(): # Sample weights must be either scalar or 1D From 509c7c8ab98e0185f504e70ef7c4d826887ed7e6 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 13:50:38 +0100 Subject: [PATCH 047/116] updated the versioning --- sklearn/linear_model/_base.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 8925061ec4b52..a4afb98c7a444 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -49,7 +49,7 @@ # intercept oscillation. -# FIXME in 0.26: variable 'normalize' should be removed from linear models +# FIXME in 1.2: variable 'normalize' should be removed from linear models # in cases where now normalize=False. default value of 'normalize' should be # changed to False in linear models where now normalize=True def _deprecate_normalize(normalize, default, est_name): @@ -60,8 +60,8 @@ def _deprecate_normalize(normalize, default, est_name): if default and normalize == 'deprecated': warnings.warn( - " default of 'normalize' will be set to False in version 0.26 and" - " deprecated in version 0.28." + " default of 'normalize' will be set to False in version 1.2 and" + " deprecated in version 1.4." " \nPass normalize=False and use Pipeline with a StandardScaler in" " a preprocessing stage:" " model = make_pipeline(StandardScaler()," @@ -69,8 +69,8 @@ def _deprecate_normalize(normalize, default, est_name): ) elif normalize != 'deprecated' and normalize and not default: warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. \nIf you still wish to normalize use" + "'normalize' was deprecated in version 1.0 and will be" + " removed in 1.2. \nIf you still wish to normalize use" " Pipeline with a StandardScaler in a preprocessing stage:" " model = make_pipeline(StandardScaler()," f" {est_name}()). \nIf you used sample_weight you can" @@ -82,8 +82,8 @@ def _deprecate_normalize(normalize, default, est_name): ) elif normalize != 'deprecated' and not normalize and not default: warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. Don't set 'normalize' parameter" + "'normalize' was deprecated in version 1.0 and will be" + " removed in 1.2. Don't set 'normalize' parameter" " and leave it to its default value", FutureWarning ) @@ -448,9 +448,9 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - .. deprecated:: 0.24 - ``normalize`` was deprecated in version 0.24 and will be removed in - 0.26. + .. deprecated:: 1.0 + ``normalize`` was deprecated in version 1.0 and will be removed in + 1.2. copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. @@ -645,7 +645,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, if sample_weight is not None: X, y = _rescale_data(X, y, sample_weight=sample_weight) - # FIXME: 'normalize' to be removed in 0.26 + # FIXME: 'normalize' to be removed in 1.2 if hasattr(precompute, '__array__') and ( fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features))): From 1ae7eadc6912750a697326dcd9b43860e64cb4bb Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 13:53:05 +0100 Subject: [PATCH 048/116] update versioning in test_base --- sklearn/linear_model/tests/test_base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 9d8edd0c78e8a..271e2bb7cc222 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -153,7 +153,7 @@ def test_fit_intercept(): @pytest.mark.parametrize('normalize', [True, False, 'deprecated']) @pytest.mark.parametrize('default', [True, False]) -# FIXME update test in 0.26 for new versions +# FIXME update test in 1.2 for new versions def test_deprecate_normalize(normalize, default): if not default: if normalize == 'deprecated': @@ -164,7 +164,7 @@ def test_deprecate_normalize(normalize, default): else: output = normalize expected = FutureWarning - warning_msg = ['0.26'] + warning_msg = ['1.2'] if not normalize: warning_msg.append('default value') else: @@ -174,7 +174,7 @@ def test_deprecate_normalize(normalize, default): # warning to pass False and use StandardScaler output = default expected = FutureWarning - warning_msg = ['False', '0.26', 'StandardScaler()'] + warning_msg = ['False', '1.2', 'StandardScaler()'] else: # no warning output = normalize @@ -223,7 +223,7 @@ def test_linear_regression_sparse(random_state=0): (False, 1, FutureWarning), ("deprecated", 0, None)] ) -# FIXME remove test in 0.26 +# FIXME remove test in 1.6 def test_assure_warning_when_normalize(normalize, n_warnings, warning): # check that we issue a FutureWarning when normalize was set rng = check_random_state(0) @@ -241,7 +241,7 @@ def test_assure_warning_when_normalize(normalize, n_warnings, warning): assert "'normalize' was deprecated" in str(record[0].message) -# FIXME: 'normalize' to be removed in 0.26 +# FIXME: 'normalize' to be removed in 1.2 in LinearRegression @pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize('normalize', [True, False]) @pytest.mark.parametrize('fit_intercept', [True, False]) From 011d751d5ed5e74e07dd06ec9d9294b7484835b9 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 14:08:55 +0100 Subject: [PATCH 049/116] updated the doc --- sklearn/linear_model/_base.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index a4afb98c7a444..28afd4d308384 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -53,6 +53,16 @@ # in cases where now normalize=False. default value of 'normalize' should be # changed to False in linear models where now normalize=True def _deprecate_normalize(normalize, default, est_name): + """ + Normalize is to be deprecated from linear models and a use of a pipeline + with a StandardScaler is to be recommended instead. + Here the appropriate message is selected to be displayed to the user + depending on the default normalize value (as it varies between the linear + models and normalize value selected by the user). Est_name is the name of + the linear estimator which calls this function. + + TODO: it should be updated in v1.2 and removed completely in v1.4 + """ if normalize == 'deprecated': _normalize = default else: From 79a82e9ff750d4c76a50fd12d6462442e149aa83 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 14:13:40 +0100 Subject: [PATCH 050/116] clean up --- sklearn/linear_model/tests/test_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 271e2bb7cc222..32e86b26a499f 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -223,7 +223,7 @@ def test_linear_regression_sparse(random_state=0): (False, 1, FutureWarning), ("deprecated", 0, None)] ) -# FIXME remove test in 1.6 +# FIXME remove test in 1.4 def test_assure_warning_when_normalize(normalize, n_warnings, warning): # check that we issue a FutureWarning when normalize was set rng = check_random_state(0) @@ -392,7 +392,6 @@ def test_linear_regression_pd_sparse_dataframe_warning(): with pytest.warns(None) as record: reg.fit(df.iloc[:, 0:2], df.iloc[:, 3]) - assert not record From 91fd4359b9673412ec7ca161b3bd400a2459baef Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 14:14:22 +0100 Subject: [PATCH 051/116] update versioning --- sklearn/linear_model/tests/test_coordinate_descent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index b59d81ef3cac6..f4b68abb615a0 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -283,7 +283,7 @@ def test_lasso_cv_positive_constraint(): assert min(clf_constrained.coef_) >= 0 -# FIXME: 'normalize' to be removed in 0.26 +# FIXME: 'normalize' to be removed in 1.2 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( "LinearModel, params", @@ -368,7 +368,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): assert_allclose(y_pred_normalize, y_pred_standardize) -# FIXME: 'normalize' to be removed in 0.26 +# FIXME: 'normalize' to be removed in 1.2 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( "LinearModel, params", From 1911725a533751e236e756a3613d7d674f23ffd1 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 14:34:30 +0100 Subject: [PATCH 052/116] cleanup --- sklearn/linear_model/_base.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 28afd4d308384..16597c309eb56 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -83,12 +83,7 @@ def _deprecate_normalize(normalize, default, est_name): " removed in 1.2. \nIf you still wish to normalize use" " Pipeline with a StandardScaler in a preprocessing stage:" " model = make_pipeline(StandardScaler()," - f" {est_name}()). \nIf you used sample_weight you can" - " now pass it as follows:" - " model = make_pipeline(StandardScaler(" - "sample_weight=your_sample_weight)," - f" {est_name}(" - "sample_weight=your_sample_weight))", FutureWarning + f" {est_name}()). \n", FutureWarning ) elif normalize != 'deprecated' and not normalize and not default: warnings.warn( From 8c3998289cab702ba63260d83acf48dea4fd875c Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 15:41:54 +0100 Subject: [PATCH 053/116] added test for sample_weights with normalize and in a pipeline --- .../tests/test_coordinate_descent.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index f4b68abb615a0..dd1fe60aa3ca8 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -368,6 +368,49 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): assert_allclose(y_pred_normalize, y_pred_standardize) +# FIXME: 'normalize' to be removed in 1.2 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@pytest.mark.parametrize( + "estimator, params, is_sparse", + [(LinearRegression, {}, True), + (LinearRegression, {}, False) + ] +) +def test_linear_model_sample_weights_normalize_in_pipeline(estimator, + params, + is_sparse): + rng = np.random.RandomState(0) + + # It would not work with under-determined systems + for n_samples, n_features in ((6, 5), ): + + y = rng.randn(n_samples) + X = rng.randn(n_samples, n_features) + X_test = rng.randn(n_samples, n_features) + if is_sparse: + X = sparse.csr_matrix(X) + X_test = sparse.csr_matrix(X) + + sample_weight = 1.0 + rng.rand(n_samples) + + # linear estimator with explicit sample_weight + reg = estimator(normalize=True, **params) + reg.fit(X, y, sample_weight=sample_weight) + + # linear estimator in a pipeline + reg_pip = make_pipeline(StandardScaler(with_mean=False), + estimator(normalize=False, **params)) + kwargs = {reg_pip.steps[-1][0] + '__sample_weight': sample_weight} + reg_pip.fit(X, y, **kwargs) + + y_pred_norm = reg.predict(X_test) + y_pred_pip = reg_pip.predict(X_test) + + assert_allclose( + reg.coef_ * reg_pip[0].scale_, reg_pip[1].coef_) + assert_allclose(y_pred_norm, y_pred_pip) + + # FIXME: 'normalize' to be removed in 1.2 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( From b49d4cc92a4670beb33fee9b85ed297576ef3bc6 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 16:00:00 +0100 Subject: [PATCH 054/116] updated the deprecate message to sugest the use of the fit params in a pipeline --- sklearn/linear_model/_base.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 16597c309eb56..88c3d356da72c 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -75,7 +75,12 @@ def _deprecate_normalize(normalize, default, est_name): " \nPass normalize=False and use Pipeline with a StandardScaler in" " a preprocessing stage:" " model = make_pipeline(StandardScaler()," - f" {est_name}())", FutureWarning + f" {est_name}(normalize=False))" + "\nIf you wish to use additional parameters in" + " the fit() you can include them as follows:" + " kwargs = {model.steps[-1][0] + " + "'__': }\n" + "model.fit(X, y, **kwargs)", FutureWarning ) elif normalize != 'deprecated' and normalize and not default: warnings.warn( @@ -83,7 +88,11 @@ def _deprecate_normalize(normalize, default, est_name): " removed in 1.2. \nIf you still wish to normalize use" " Pipeline with a StandardScaler in a preprocessing stage:" " model = make_pipeline(StandardScaler()," - f" {est_name}()). \n", FutureWarning + f" {est_name}()). \nIf you wish to use additional parameters in" + " the fit() you can include them as follows:" + " kwargs = {model.steps[-1][0] + " + "'__': }\n" + "model.fit(X, y, **kwargs)", FutureWarning ) elif normalize != 'deprecated' and not normalize and not default: warnings.warn( From f0d81fa472a031063c5634818ebef83cc1b89525 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 16:00:57 +0100 Subject: [PATCH 055/116] test updated --- sklearn/linear_model/tests/test_coordinate_descent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index dd1fe60aa3ca8..0ac5d29481315 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -369,7 +369,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): # FIXME: 'normalize' to be removed in 1.2 -@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +#@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( "estimator, params, is_sparse", [(LinearRegression, {}, True), From 0208ee9c2224e0951c41c87afeaa4518e361074d Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 4 Jan 2021 17:42:59 +0100 Subject: [PATCH 056/116] cleanup; --- sklearn/linear_model/tests/test_coordinate_descent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 0ac5d29481315..dd1fe60aa3ca8 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -369,7 +369,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): # FIXME: 'normalize' to be removed in 1.2 -#@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( "estimator, params, is_sparse", [(LinearRegression, {}, True), From 01fbdd11fe383d95a391aa66ee9eeaaf5c0ac035 Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 5 Jan 2021 12:18:28 +0100 Subject: [PATCH 057/116] update versioning --- sklearn/linear_model/_base.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 6c81bb4b92a75..3165feb5ba353 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -50,7 +50,7 @@ # intercept oscillation. -# FIXME in 1.2: variable 'normalize' should be removed from linear models +# FIXME in v1.2.0: variable 'normalize' should be removed from linear models # in cases where now normalize=False. default value of 'normalize' should be # changed to False in linear models where now normalize=True def _deprecate_normalize(normalize, default, est_name): @@ -62,7 +62,7 @@ def _deprecate_normalize(normalize, default, est_name): models and normalize value selected by the user). Est_name is the name of the linear estimator which calls this function. - TODO: it should be updated in v1.2 and removed completely in v1.4 + TODO: it should be updated in v1.2.0 and removed completely in v1.4.0 """ if normalize == 'deprecated': _normalize = default @@ -71,8 +71,8 @@ def _deprecate_normalize(normalize, default, est_name): if default and normalize == 'deprecated': warnings.warn( - " default of 'normalize' will be set to False in version 1.2 and" - " deprecated in version 1.4." + " default of 'normalize' will be set to False in version 1.2.0 and" + " deprecated in version 1.4.0" " \nPass normalize=False and use Pipeline with a StandardScaler in" " a preprocessing stage:" " model = make_pipeline(StandardScaler()," @@ -85,8 +85,8 @@ def _deprecate_normalize(normalize, default, est_name): ) elif normalize != 'deprecated' and normalize and not default: warnings.warn( - "'normalize' was deprecated in version 1.0 and will be" - " removed in 1.2. \nIf you still wish to normalize use" + "'normalize' was deprecated in version 1.0.0 and will be" + " removed in 1.2.0 \nIf you still wish to normalize use" " Pipeline with a StandardScaler in a preprocessing stage:" " model = make_pipeline(StandardScaler()," f" {est_name}()). \nIf you wish to use additional parameters in" @@ -97,8 +97,8 @@ def _deprecate_normalize(normalize, default, est_name): ) elif normalize != 'deprecated' and not normalize and not default: warnings.warn( - "'normalize' was deprecated in version 1.0 and will be" - " removed in 1.2. Don't set 'normalize' parameter" + "'normalize' was deprecated in version 1.0.0 and will be" + " removed in 1.2.0 Don't set 'normalize' parameter" " and leave it to its default value", FutureWarning ) @@ -463,9 +463,9 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - .. deprecated:: 1.0 - ``normalize`` was deprecated in version 1.0 and will be removed in - 1.2. + .. deprecated:: 1.0.0 + ``normalize`` was deprecated in version 1.0.0 and will be removed + in v1.2.0 copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. @@ -715,7 +715,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, if sample_weight is not None: X, y = _rescale_data(X, y, sample_weight=sample_weight) - # FIXME: 'normalize' to be removed in 1.2 + # FIXME: 'normalize' to be removed in v1.2.0 if hasattr(precompute, '__array__'): if (fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features) From 54e71972f94a4ee6e94c172920bd791358841105 Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 5 Jan 2021 13:21:17 +0100 Subject: [PATCH 058/116] updated the test --- sklearn/linear_model/tests/test_base.py | 38 +++++++++++-------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 32e86b26a499f..dbd37f4886863 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -153,7 +153,7 @@ def test_fit_intercept(): @pytest.mark.parametrize('normalize', [True, False, 'deprecated']) @pytest.mark.parametrize('default', [True, False]) -# FIXME update test in 1.2 for new versions +# FIXME update test in 1.2.0 for new versions def test_deprecate_normalize(normalize, default): if not default: if normalize == 'deprecated': @@ -164,7 +164,7 @@ def test_deprecate_normalize(normalize, default): else: output = normalize expected = FutureWarning - warning_msg = ['1.2'] + warning_msg = ['1.2.0'] if not normalize: warning_msg.append('default value') else: @@ -174,31 +174,25 @@ def test_deprecate_normalize(normalize, default): # warning to pass False and use StandardScaler output = default expected = FutureWarning - warning_msg = ['False', '1.2', 'StandardScaler()'] + warning_msg = ['False', '1.2.0', 'StandardScaler()'] else: # no warning output = normalize expected = None warning_msg = [] - if expected == AssertionError: - with pytest.raises(AssertionError) as record: - _normalize = _deprecate_normalize(normalize, default, 'estimator') - assert all(warning in str(record.value) for warning - in warning_msg) - else: - with pytest.warns(expected) as record: - _normalize = _deprecate_normalize(normalize, default, 'estimator') - assert _normalize == output + with pytest.warns(expected) as record: + _normalize = _deprecate_normalize(normalize, default, 'estimator') + assert _normalize == output - if expected is None: - n_warnings = 0 - else: - n_warnings = 1 - assert len(record) == n_warnings - if n_warnings: - assert all([warning in str(record[0].message) for - warning in warning_msg]) + if expected is None: + n_warnings = 0 + else: + n_warnings = 1 + assert len(record) == n_warnings + if n_warnings: + assert all([warning in str(record[0].message) for + warning in warning_msg]) def test_linear_regression_sparse(random_state=0): @@ -223,7 +217,7 @@ def test_linear_regression_sparse(random_state=0): (False, 1, FutureWarning), ("deprecated", 0, None)] ) -# FIXME remove test in 1.4 +# FIXME remove test in 1.4.0 def test_assure_warning_when_normalize(normalize, n_warnings, warning): # check that we issue a FutureWarning when normalize was set rng = check_random_state(0) @@ -241,7 +235,7 @@ def test_assure_warning_when_normalize(normalize, n_warnings, warning): assert "'normalize' was deprecated" in str(record[0].message) -# FIXME: 'normalize' to be removed in 1.2 in LinearRegression +# FIXME: 'normalize' to be removed in 1.2.0 in LinearRegression @pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize('normalize', [True, False]) @pytest.mark.parametrize('fit_intercept', [True, False]) From eb913f6fb0a837c0d6ba13b4a88359312642630c Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Tue, 12 Jan 2021 12:48:09 +0100 Subject: [PATCH 059/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 3165feb5ba353..459c35378a5d8 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -50,7 +50,7 @@ # intercept oscillation. -# FIXME in v1.2.0: variable 'normalize' should be removed from linear models +# FIXME in v1.2.0: parameter 'normalize' should be removed from linear models # in cases where now normalize=False. default value of 'normalize' should be # changed to False in linear models where now normalize=True def _deprecate_normalize(normalize, default, est_name): From 053ceaa8968fef3e9949218b9bdf06d02aaa5da3 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Tue, 12 Jan 2021 12:48:39 +0100 Subject: [PATCH 060/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 459c35378a5d8..5ff2706dc0e4b 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -51,7 +51,7 @@ # FIXME in v1.2.0: parameter 'normalize' should be removed from linear models -# in cases where now normalize=False. default value of 'normalize' should be +# in cases where now normalize=False. The default value of 'normalize' should be # changed to False in linear models where now normalize=True def _deprecate_normalize(normalize, default, est_name): """ From 4a6d7bc273cd27a9ef4c4df33bca2741ff3c217a Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 12 Jan 2021 12:59:04 +0100 Subject: [PATCH 061/116] updated the docstring --- sklearn/linear_model/_base.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 5ff2706dc0e4b..fdc52461435ab 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -53,15 +53,32 @@ # FIXME in v1.2.0: parameter 'normalize' should be removed from linear models # in cases where now normalize=False. The default value of 'normalize' should be # changed to False in linear models where now normalize=True -def _deprecate_normalize(normalize, default, est_name): - """ - Normalize is to be deprecated from linear models and a use of a pipeline +def _deprecate_normalize(normalize, default, estimator_name): + """ Normalize is to be deprecated from linear models and a use of a pipeline with a StandardScaler is to be recommended instead. Here the appropriate message is selected to be displayed to the user depending on the default normalize value (as it varies between the linear models and normalize value selected by the user). Est_name is the name of the linear estimator which calls this function. + Parameters + ---------- + normalize : bool, + normalize value passed by the user + + default : bool, + default normalize value used by the estimator + + estimator_name : string, + name of the linear estimator which calls this function. + The name will be used for writing the deprecation warnings + + Returns + ------- + normalize : bool, + normalize value which should further be used by the estimator at this + stage of the depreciation process + TODO: it should be updated in v1.2.0 and removed completely in v1.4.0 """ if normalize == 'deprecated': From 10e738026fb44e84c53c52bb2956ff99e8d9e07c Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 12 Jan 2021 13:24:25 +0100 Subject: [PATCH 062/116] clean up --- sklearn/linear_model/_base.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index fdc52461435ab..18d1b7ee658e9 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -51,15 +51,14 @@ # FIXME in v1.2.0: parameter 'normalize' should be removed from linear models -# in cases where now normalize=False. The default value of 'normalize' should be -# changed to False in linear models where now normalize=True +# in cases where now normalize=False. The default value of 'normalize' should +# be changed to False in linear models where now normalize=True def _deprecate_normalize(normalize, default, estimator_name): - """ Normalize is to be deprecated from linear models and a use of a pipeline - with a StandardScaler is to be recommended instead. + """ Normalize is to be deprecated from linear models and a use of + a pipeline with a StandardScaler is to be recommended instead. Here the appropriate message is selected to be displayed to the user depending on the default normalize value (as it varies between the linear - models and normalize value selected by the user). Est_name is the name of - the linear estimator which calls this function. + models and normalize value selected by the user). Parameters ---------- @@ -93,7 +92,7 @@ def _deprecate_normalize(normalize, default, estimator_name): " \nPass normalize=False and use Pipeline with a StandardScaler in" " a preprocessing stage:" " model = make_pipeline(StandardScaler()," - f" {est_name}(normalize=False))" + f" {estimator_name}(normalize=False))" "\nIf you wish to use additional parameters in" " the fit() you can include them as follows:" " kwargs = {model.steps[-1][0] + " @@ -102,13 +101,13 @@ def _deprecate_normalize(normalize, default, estimator_name): ) elif normalize != 'deprecated' and normalize and not default: warnings.warn( - "'normalize' was deprecated in version 1.0.0 and will be" - " removed in 1.2.0 \nIf you still wish to normalize use" - " Pipeline with a StandardScaler in a preprocessing stage:" - " model = make_pipeline(StandardScaler()," - f" {est_name}()). \nIf you wish to use additional parameters in" - " the fit() you can include them as follows:" - " kwargs = {model.steps[-1][0] + " + "'normalize' was deprecated in version 1.0.0 and will be " + "removed in 1.2.0 \nIf you still wish to normalize use " + "Pipeline with a StandardScaler in a preprocessing stage: " + "model = make_pipeline(StandardScaler(), " + f"{estimator_name}()). \nIf you wish to use additional " + "parameters in the fit() you can include them as follows: " + "kwargs = {model.steps[-1][0] + " "'__': }\n" "model.fit(X, y, **kwargs)", FutureWarning ) From 4a078f486ff3f8324ef2ee9f96f54cb34e2b70ea Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Tue, 12 Jan 2021 13:25:48 +0100 Subject: [PATCH 063/116] Update sklearn/linear_model/tests/test_coordinate_descent.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_coordinate_descent.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 2ca803092fc82..b4f15032e609a 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -398,8 +398,10 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, reg.fit(X, y, sample_weight=sample_weight) # linear estimator in a pipeline - reg_pip = make_pipeline(StandardScaler(with_mean=False), - estimator(normalize=False, **params)) + reg_pip = make_pipeline( + StandardScaler(with_mean=False), + estimator(normalize=False, **params) + ) kwargs = {reg_pip.steps[-1][0] + '__sample_weight': sample_weight} reg_pip.fit(X, y, **kwargs) From a66a14ee980ce9f1af34c2cdb820a18686142744 Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 12 Jan 2021 13:28:04 +0100 Subject: [PATCH 064/116] update variable name --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 18d1b7ee658e9..f3683d3a26a29 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -584,7 +584,7 @@ def fit(self, X, y, sample_weight=None): """ _normalize = _deprecate_normalize(self.normalize, default=False, - est_name=type(self).__name__) + estimator_name=type(self).__name__) n_jobs_ = self.n_jobs From 9cd41564e9c6e2b9fbb3ee3bb2e9c2952a187810 Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 12 Jan 2021 13:28:24 +0100 Subject: [PATCH 065/116] update params in the test --- sklearn/linear_model/tests/test_coordinate_descent.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index b4f15032e609a..eb3eff0850e67 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -371,13 +371,13 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): # FIXME: 'normalize' to be removed in 1.2 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( - "estimator, params, is_sparse", + "estimator, estimator_params, is_sparse", [(LinearRegression, {}, True), (LinearRegression, {}, False) ] ) def test_linear_model_sample_weights_normalize_in_pipeline(estimator, - params, + estimator_params, is_sparse): rng = np.random.RandomState(0) @@ -394,13 +394,13 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, sample_weight = 1.0 + rng.rand(n_samples) # linear estimator with explicit sample_weight - reg = estimator(normalize=True, **params) + reg = estimator(normalize=True, **estimator_params) reg.fit(X, y, sample_weight=sample_weight) # linear estimator in a pipeline reg_pip = make_pipeline( StandardScaler(with_mean=False), - estimator(normalize=False, **params) + estimator(normalize=False, **estimator_params) ) kwargs = {reg_pip.steps[-1][0] + '__sample_weight': sample_weight} reg_pip.fit(X, y, **kwargs) From 787fe674f703eb2223c8ba16b99ca0e1a0480d03 Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 12 Jan 2021 13:29:44 +0100 Subject: [PATCH 066/116] update param names --- sklearn/linear_model/tests/test_coordinate_descent.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index eb3eff0850e67..c5d17c01f55f0 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -394,8 +394,8 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, sample_weight = 1.0 + rng.rand(n_samples) # linear estimator with explicit sample_weight - reg = estimator(normalize=True, **estimator_params) - reg.fit(X, y, sample_weight=sample_weight) + reg_with_normalize = estimator(normalize=True, **estimator_params) + reg_with_normalize.fit(X, y, sample_weight=sample_weight) # linear estimator in a pipeline reg_pip = make_pipeline( @@ -405,11 +405,11 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, kwargs = {reg_pip.steps[-1][0] + '__sample_weight': sample_weight} reg_pip.fit(X, y, **kwargs) - y_pred_norm = reg.predict(X_test) + y_pred_norm = reg_with_normalize.predict(X_test) y_pred_pip = reg_pip.predict(X_test) assert_allclose( - reg.coef_ * reg_pip[0].scale_, reg_pip[1].coef_) + reg_with_normalize.coef_ * reg_pip[0].scale_, reg_pip[1].coef_) assert_allclose(y_pred_norm, y_pred_pip) From b0c7bc8bb453a2facbb6e52ef680e517a88f4bd4 Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 12 Jan 2021 13:30:44 +0100 Subject: [PATCH 067/116] update param names --- .../linear_model/tests/test_coordinate_descent.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index c5d17c01f55f0..38cc7aacf1251 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -398,18 +398,20 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, reg_with_normalize.fit(X, y, sample_weight=sample_weight) # linear estimator in a pipeline - reg_pip = make_pipeline( + reg_with_scalar = make_pipeline( StandardScaler(with_mean=False), estimator(normalize=False, **estimator_params) ) - kwargs = {reg_pip.steps[-1][0] + '__sample_weight': sample_weight} - reg_pip.fit(X, y, **kwargs) + kwargs = {reg_with_scalar.steps[-1][0] + '__sample_weight': + sample_weight} + reg_with_scalar.fit(X, y, **kwargs) y_pred_norm = reg_with_normalize.predict(X_test) - y_pred_pip = reg_pip.predict(X_test) + y_pred_pip = reg_with_scalar.predict(X_test) assert_allclose( - reg_with_normalize.coef_ * reg_pip[0].scale_, reg_pip[1].coef_) + reg_with_normalize.coef_ * reg_with_scalar[0].scale_, + reg_with_scalar[1].coef_) assert_allclose(y_pred_norm, y_pred_pip) From b01c01dfc1c8adb72933b392cb6d89b3b2fc6fd8 Mon Sep 17 00:00:00 2001 From: maikia Date: Tue, 12 Jan 2021 13:32:18 +0100 Subject: [PATCH 068/116] cleanup --- sklearn/linear_model/tests/test_coordinate_descent.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 38cc7aacf1251..8c697468a8179 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -411,7 +411,8 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, assert_allclose( reg_with_normalize.coef_ * reg_with_scalar[0].scale_, - reg_with_scalar[1].coef_) + reg_with_scalar[1].coef_ + ) assert_allclose(y_pred_norm, y_pred_pip) From 3d14885c37e69de6c6a24ea07a3d9696db5cfffe Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 13:17:06 +0100 Subject: [PATCH 069/116] update the whatsnew --- doc/whats_new/v1.0.rst | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 7e18e1e1d81f5..06969c57e2d11 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -88,13 +88,16 @@ Changelog :pr:`18842` by :user:`Hong Shao Yang `. - |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression` - is deprecated and will be removed in 1.2. The use of - :class:`~sklearn.pipeline.Pipeline` with - :class:`~sklearn.preprocessing.StandardScaler` is recommended instead. - ``normalize`` parameter did not take any effect if ``fit_intercept`` was set - to False and therefore was deemed confusing. - :pr:`17743` by :user:`Maria Telenczuk ` and :user:`Guillaume Lemaitre - ` and :user:`Alexandre Gramfort `. + is deprecated and will be removed in 1.2. + Motivation for this deprecation: ``normalize`` parameter did not take any + effect if ``fit_intercept`` was set to False and therefore was deemed + confusing. + The behavior of the deprecated LinearRegression(normalize=True) can be + reproduced with :class:`~sklearn.pipeline.Pipeline` with + :class:`~sklearn.preprocessing.StandardScaler`as follows: + make_pipeline(StandardScaler(with_mean=False), LinearRegression()). + :pr:`17743` by :user:`Maria Telenczuk ` and + :user:`Alexandre Gramfort `. Code and Documentation Contributors ----------------------------------- From 169dae51367399a1c65510aab082522f671041fa Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 13:19:37 +0100 Subject: [PATCH 070/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 1a13ec100eb76..b23e7e898413d 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -77,7 +77,21 @@ def _deprecate_normalize(normalize, default, estimator_name): normalize : bool, normalize value which should further be used by the estimator at this stage of the depreciation process - + Parameters + ---------- + normalize : bool or str + The value of `normalize` set in the estimator. + + default : bool + The previous default value of `normalize` in the estimator. + + estimator_name : str + The name of the linear model estimator. + + Returns + ------- + _normalize : bool + The validated `normalize` value. TODO: it should be updated in v1.2.0 and removed completely in v1.4.0 """ if normalize == 'deprecated': From d61aad0cfc7ea9a502fe00682da56f3ba4579d4d Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 13:20:47 +0100 Subject: [PATCH 071/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index b23e7e898413d..2a5ec55b7e7d6 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -92,7 +92,13 @@ def _deprecate_normalize(normalize, default, estimator_name): ------- _normalize : bool The validated `normalize` value. - TODO: it should be updated in v1.2.0 and removed completely in v1.4.0 + Notes + ----- + This function should be update in 1.2: + + - explain the changes to do + + This function should be completely removed in 1.4. """ if normalize == 'deprecated': _normalize = default From 511261ed2a30189043a9400eb7e8af8c9a26c035 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 13:37:43 +0100 Subject: [PATCH 072/116] add value error on wrong normalize --- sklearn/linear_model/_base.py | 16 ++++++++++------ sklearn/linear_model/tests/test_base.py | 9 +++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 2a5ec55b7e7d6..89311422b723e 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -81,13 +81,13 @@ def _deprecate_normalize(normalize, default, estimator_name): ---------- normalize : bool or str The value of `normalize` set in the estimator. - + default : bool The previous default value of `normalize` in the estimator. - + estimator_name : str The name of the linear model estimator. - + Returns ------- _normalize : bool @@ -95,11 +95,15 @@ def _deprecate_normalize(normalize, default, estimator_name): Notes ----- This function should be update in 1.2: - - - explain the changes to do - + + (TODO: - explain the changes to do) + This function should be completely removed in 1.4. """ + if normalize not in [True, False, 'deprecated']: + raise ValueError("Leave 'normalize' to its default value or set it " + "to True or False") + if normalize == 'deprecated': _normalize = default else: diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index dbd37f4886863..083a4cb2a553d 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -151,6 +151,15 @@ def test_fit_intercept(): lr3_without_intercept.coef_.ndim) +def test_error_on_wrong_normalize(): + normalize = 'wrong' + default = True + error_msg = "Leave 'normalize' to its default" + with pytest.raises(ValueError, match=error_msg): + _deprecate_normalize(normalize, default, 'estimator') + ValueError + + @pytest.mark.parametrize('normalize', [True, False, 'deprecated']) @pytest.mark.parametrize('default', [True, False]) # FIXME update test in 1.2.0 for new versions From 86b1c219f051044f5cbaee978bcf7d815b97da92 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 13:56:38 +0100 Subject: [PATCH 073/116] update the info on what to do in v1.2 --- sklearn/linear_model/_base.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 89311422b723e..2558028ef2fe6 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -95,11 +95,17 @@ def _deprecate_normalize(normalize, default, estimator_name): Notes ----- This function should be update in 1.2: - - (TODO: - explain the changes to do) - + depending on value of `normalize`: + - True, warning: `normalize` was deprecated in 1.2 and will be removed in + 1.4. Suggest to use pipeline instead + - False, `normalize` was deprecated in 1.2 and it will be removed in 1.4. + Leave normalize to its default value + - `deprecated` - this should only be possible with default == False as from + 1.2 `normalize` in all the linear models should be either removed or the + default should be set to False This function should be completely removed in 1.4. """ + if normalize not in [True, False, 'deprecated']: raise ValueError("Leave 'normalize' to its default value or set it " "to True or False") From 80eb9e5d26f76fef70ddeb881ea12b5ae621a36f Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 13:57:50 +0100 Subject: [PATCH 074/116] remove patch version from warnings --- sklearn/linear_model/_base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 2558028ef2fe6..2310f0a7a2f79 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -117,8 +117,8 @@ def _deprecate_normalize(normalize, default, estimator_name): if default and normalize == 'deprecated': warnings.warn( - " default of 'normalize' will be set to False in version 1.2.0 and" - " deprecated in version 1.4.0" + " default of 'normalize' will be set to False in version 1.2 and" + " deprecated in version 1.4" " \nPass normalize=False and use Pipeline with a StandardScaler in" " a preprocessing stage:" " model = make_pipeline(StandardScaler()," @@ -131,8 +131,8 @@ def _deprecate_normalize(normalize, default, estimator_name): ) elif normalize != 'deprecated' and normalize and not default: warnings.warn( - "'normalize' was deprecated in version 1.0.0 and will be " - "removed in 1.2.0 \nIf you still wish to normalize use " + "'normalize' was deprecated in version 1.0 and will be " + "removed in 1.2 \nIf you still wish to normalize use " "Pipeline with a StandardScaler in a preprocessing stage: " "model = make_pipeline(StandardScaler(), " f"{estimator_name}()). \nIf you wish to use additional " @@ -143,8 +143,8 @@ def _deprecate_normalize(normalize, default, estimator_name): ) elif normalize != 'deprecated' and not normalize and not default: warnings.warn( - "'normalize' was deprecated in version 1.0.0 and will be" - " removed in 1.2.0 Don't set 'normalize' parameter" + "'normalize' was deprecated in version 1.0 and will be" + " removed in 1.2 Don't set 'normalize' parameter" " and leave it to its default value", FutureWarning ) From 7aee6ea0308e6ff0a18fc08e3df1e6ada3d7d3f8 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 14:01:12 +0100 Subject: [PATCH 075/116] update warning msg --- sklearn/linear_model/_base.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 2310f0a7a2f79..8ba46c7e7d402 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -117,15 +117,14 @@ def _deprecate_normalize(normalize, default, estimator_name): if default and normalize == 'deprecated': warnings.warn( - " default of 'normalize' will be set to False in version 1.2 and" - " deprecated in version 1.4" - " \nPass normalize=False and use Pipeline with a StandardScaler in" - " a preprocessing stage:" - " model = make_pipeline(StandardScaler()," - f" {estimator_name}(normalize=False))" - "\nIf you wish to use additional parameters in" - " the fit() you can include them as follows:" - " kwargs = {model.steps[-1][0] + " + "The default of 'normalize' will be set to False in version 1.2 " + "and deprecated in version 1.4. \nPass normalize=False and use " + "Pipeline with a StandardScaler in a preprocessing stage: " + "model = make_pipeline(StandardScaler(), \n" + f"{estimator_name}(normalize=False))\n" + "If you wish to use additional parameters in " + "the fit() you can include them as follows:\n" + "kwargs = {model.steps[-1][0] + " "'__': }\n" "model.fit(X, y, **kwargs)", FutureWarning ) From 64cf6aa12e4ed7908d40b5ac891f19121bb91284 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 14:04:50 +0100 Subject: [PATCH 076/116] update the warning message --- sklearn/linear_model/_base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 8ba46c7e7d402..5c0c77efc040e 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -119,8 +119,9 @@ def _deprecate_normalize(normalize, default, estimator_name): warnings.warn( "The default of 'normalize' will be set to False in version 1.2 " "and deprecated in version 1.4. \nPass normalize=False and use " - "Pipeline with a StandardScaler in a preprocessing stage: " - "model = make_pipeline(StandardScaler(), \n" + "Pipeline with a StandardScaler in a preprocessing stage if you " + "wish to reproduce the previous behavior:\n" + "model = make_pipeline(StandardScaler(with_mean=False), \n" f"{estimator_name}(normalize=False))\n" "If you wish to use additional parameters in " "the fit() you can include them as follows:\n" @@ -132,8 +133,9 @@ def _deprecate_normalize(normalize, default, estimator_name): warnings.warn( "'normalize' was deprecated in version 1.0 and will be " "removed in 1.2 \nIf you still wish to normalize use " - "Pipeline with a StandardScaler in a preprocessing stage: " - "model = make_pipeline(StandardScaler(), " + "Pipeline with a StandardScaler in a preprocessing stage if you " + "wish to reproduce the previous behavior:\n" + "model = make_pipeline(StandardScaler(with_mean=False), " f"{estimator_name}()). \nIf you wish to use additional " "parameters in the fit() you can include them as follows: " "kwargs = {model.steps[-1][0] + " From c8f045626b739d9a09fad088f262384f8623100a Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 14:17:35 +0100 Subject: [PATCH 077/116] update the if statement --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 5c0c77efc040e..e96e27faa9043 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -142,7 +142,7 @@ def _deprecate_normalize(normalize, default, estimator_name): "'__': }\n" "model.fit(X, y, **kwargs)", FutureWarning ) - elif normalize != 'deprecated' and not normalize and not default: + elif not normalize and not default: warnings.warn( "'normalize' was deprecated in version 1.0 and will be" " removed in 1.2 Don't set 'normalize' parameter" From 866ced7451dfcd1ae8d39d9cc19ad4f126d6ff8b Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:18:36 +0100 Subject: [PATCH 078/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 8ba46c7e7d402..4ff2196bfe7e9 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -508,9 +508,8 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - .. deprecated:: 1.0.0 - ``normalize`` was deprecated in version 1.0.0 and will be removed - in v1.2.0 + .. deprecated:: 1.0 + `normalize` was deprecated in version 1.0 and will be removed in 1.2. copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. From 505bcd9b8b3af3bd2c0ee26846c03ae92244f5ab Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:18:58 +0100 Subject: [PATCH 079/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 4ff2196bfe7e9..5fb6e2a87fe29 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -759,7 +759,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, if sample_weight is not None: X, y = _rescale_data(X, y, sample_weight=sample_weight) - # FIXME: 'normalize' to be removed in v1.2.0 + # FIXME: 'normalize' to be removed in 1.2 if hasattr(precompute, '__array__'): if (fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features) From f969696cc625b527f4bcf67542d3e143c4caf597 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:19:09 +0100 Subject: [PATCH 080/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 5fb6e2a87fe29..a789ba347c693 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -762,8 +762,8 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, # FIXME: 'normalize' to be removed in 1.2 if hasattr(precompute, '__array__'): if (fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) - or normalize and not np.allclose(X_scale, np.ones(n_features) - )): + or normalize and not np.allclose(X_scale, np.ones(n_features)) + ): warnings.warn("Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", From 828a3e904c8b3170bccc2fb57783809f0baf4b15 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 14:27:06 +0100 Subject: [PATCH 081/116] improve formatting --- sklearn/linear_model/_base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index e96e27faa9043..e1acfec0fc620 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -767,10 +767,10 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, if (fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features) )): - warnings.warn("Gram matrix was provided but X was centered" - " to fit intercept, " - "or X was normalized : recomputing Gram matrix.", - UserWarning) + warnings.warn("Gram matrix was provided but X was centered " + "to fit intercept, or X was normalized : " + "recomputing Gram matrix.", UserWarning + ) # recompute Gram precompute = 'auto' Xy = None From a9403c5a19a2a5beb674c40f8db61c1cfec84893 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 14:30:34 +0100 Subject: [PATCH 082/116] cleanup --- sklearn/linear_model/_base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index e5ebf15fe4f72..ba43f3f0fd6db 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -766,10 +766,11 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy, if (fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or normalize and not np.allclose(X_scale, np.ones(n_features) )): - warnings.warn("Gram matrix was provided but X was centered " - "to fit intercept, or X was normalized : " - "recomputing Gram matrix.", UserWarning - ) + warnings.warn( + "Gram matrix was provided but X was centered to fit " + "intercept, or X was normalized : recomputing Gram matrix.", + UserWarning + ) # recompute Gram precompute = 'auto' Xy = None From 4683f62f57898e1dedf9381796160f826017dd7e Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:31:14 +0100 Subject: [PATCH 083/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 083a4cb2a553d..ec81c41693ca5 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -164,6 +164,7 @@ def test_error_on_wrong_normalize(): @pytest.mark.parametrize('default', [True, False]) # FIXME update test in 1.2.0 for new versions def test_deprecate_normalize(normalize, default): + # test all possible case of the normalize parameter deprecation if not default: if normalize == 'deprecated': # no warning From 1a0dfe184303a3f3d3cf60d593aa8f8b25757642 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:31:23 +0100 Subject: [PATCH 084/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index ec81c41693ca5..c19de3525af93 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -174,7 +174,7 @@ def test_deprecate_normalize(normalize, default): else: output = normalize expected = FutureWarning - warning_msg = ['1.2.0'] + warning_msg = ['1.2'] if not normalize: warning_msg.append('default value') else: From f1f21f0a65d3544a01d0ec5ed3ea2c672544ac8d Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:31:34 +0100 Subject: [PATCH 085/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index c19de3525af93..dbfd2132ca2eb 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -184,7 +184,7 @@ def test_deprecate_normalize(normalize, default): # warning to pass False and use StandardScaler output = default expected = FutureWarning - warning_msg = ['False', '1.2.0', 'StandardScaler()'] + warning_msg = ['False', '1.2', 'StandardScaler()'] else: # no warning output = normalize From 620d011323ab51ca591550b031808d1e95d3fac0 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:32:00 +0100 Subject: [PATCH 086/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index dbfd2132ca2eb..fe58ba3953e1c 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -195,10 +195,7 @@ def test_deprecate_normalize(normalize, default): _normalize = _deprecate_normalize(normalize, default, 'estimator') assert _normalize == output - if expected is None: - n_warnings = 0 - else: - n_warnings = 1 + n_warnings = 0 if expected is None else 1 assert len(record) == n_warnings if n_warnings: assert all([warning in str(record[0].message) for From 270a946f7598e1b2410f9e0fca3c9e996b2414e4 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:32:21 +0100 Subject: [PATCH 087/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index fe58ba3953e1c..98157220a3cdf 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -198,8 +198,10 @@ def test_deprecate_normalize(normalize, default): n_warnings = 0 if expected is None else 1 assert len(record) == n_warnings if n_warnings: - assert all([warning in str(record[0].message) for - warning in warning_msg]) + assert all([ + warning in str(record[0].message) + for warning in warning_msg + ]) def test_linear_regression_sparse(random_state=0): From 48c704e80e5af43526b5fec0adfc00303bcea9ea Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:32:29 +0100 Subject: [PATCH 088/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 98157220a3cdf..38b12549403cc 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -226,7 +226,7 @@ def test_linear_regression_sparse(random_state=0): (False, 1, FutureWarning), ("deprecated", 0, None)] ) -# FIXME remove test in 1.4.0 +# FIXME remove test in 1.4 def test_assure_warning_when_normalize(normalize, n_warnings, warning): # check that we issue a FutureWarning when normalize was set rng = check_random_state(0) From 2afefc5da7642fc55ad5862072296ecd3937ab48 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:33:03 +0100 Subject: [PATCH 089/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 38b12549403cc..c95bbbe49ba9a 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -228,7 +228,8 @@ def test_linear_regression_sparse(random_state=0): ) # FIXME remove test in 1.4 def test_assure_warning_when_normalize(normalize, n_warnings, warning): - # check that we issue a FutureWarning when normalize was set + # check that we issue a FutureWarning when normalize was set in + # LinearRegression rng = check_random_state(0) n_samples = 200 n_features = 2 From 68aa605c81118b78cf9639014bd01fdc2a8f2033 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:42:46 +0100 Subject: [PATCH 090/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index c95bbbe49ba9a..cdd19fdd83219 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -227,7 +227,7 @@ def test_linear_regression_sparse(random_state=0): ("deprecated", 0, None)] ) # FIXME remove test in 1.4 -def test_assure_warning_when_normalize(normalize, n_warnings, warning): +def test_linear_regression_normalize_deprecation(normalize, n_warnings, warning): # check that we issue a FutureWarning when normalize was set in # LinearRegression rng = check_random_state(0) From c97a6be38005c1bfffd42e2a98fd7bfd372462ea Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:43:22 +0100 Subject: [PATCH 091/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index cdd19fdd83219..e11c828d3f0a4 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -234,7 +234,7 @@ def test_linear_regression_normalize_deprecation(normalize, n_warnings, warning) n_samples = 200 n_features = 2 X = rng.randn(n_samples, n_features) - X[X < 0.1] = 0. + X[X < 0.1] = 0.0 y = rng.rand(n_samples) model = LinearRegression(normalize=normalize) From 0b035a3ae092a1662dff2fc620f72607e8ec702f Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:43:59 +0100 Subject: [PATCH 092/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index e11c828d3f0a4..221f745f16488 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -245,7 +245,7 @@ def test_linear_regression_normalize_deprecation(normalize, n_warnings, warning) assert "'normalize' was deprecated" in str(record[0].message) -# FIXME: 'normalize' to be removed in 1.2.0 in LinearRegression +# FIXME: 'normalize' to be removed in 1.2 in LinearRegression @pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize('normalize', [True, False]) @pytest.mark.parametrize('fit_intercept', [True, False]) From 48f3b87376aa97d170d9264ef464244aadbdc381 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Thu, 14 Jan 2021 14:53:25 +0100 Subject: [PATCH 093/116] Update sklearn/linear_model/tests/test_coordinate_descent.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_coordinate_descent.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 8c697468a8179..e62ae2d843d9a 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -373,8 +373,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): @pytest.mark.parametrize( "estimator, estimator_params, is_sparse", [(LinearRegression, {}, True), - (LinearRegression, {}, False) - ] + (LinearRegression, {}, False)] ) def test_linear_model_sample_weights_normalize_in_pipeline(estimator, estimator_params, From 57b88195f745be9b9c1ebf697ccd9ca084734f17 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 15:02:53 +0100 Subject: [PATCH 094/116] update the test --- .../tests/test_coordinate_descent.py | 61 +++++++++---------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 8c697468a8179..52218fd084bf5 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -371,49 +371,46 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): # FIXME: 'normalize' to be removed in 1.2 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( - "estimator, estimator_params, is_sparse", - [(LinearRegression, {}, True), - (LinearRegression, {}, False) + "estimator, is_sparse", + [(LinearRegression, True), + (LinearRegression, False) ] ) def test_linear_model_sample_weights_normalize_in_pipeline(estimator, - estimator_params, is_sparse): rng = np.random.RandomState(0) + n_samples, n_features = 6, 5 - # It would not work with under-determined systems - for n_samples, n_features in ((6, 5), ): + y = rng.randn(n_samples) + X = rng.randn(n_samples, n_features) + X_test = rng.randn(n_samples, n_features) + if is_sparse: + X = sparse.csr_matrix(X) + X_test = sparse.csr_matrix(X) - y = rng.randn(n_samples) - X = rng.randn(n_samples, n_features) - X_test = rng.randn(n_samples, n_features) - if is_sparse: - X = sparse.csr_matrix(X) - X_test = sparse.csr_matrix(X) + sample_weight = 1.0 + rng.rand(n_samples) - sample_weight = 1.0 + rng.rand(n_samples) + # linear estimator with explicit sample_weight + reg_with_normalize = estimator(normalize=True) + reg_with_normalize.fit(X, y, sample_weight=sample_weight) - # linear estimator with explicit sample_weight - reg_with_normalize = estimator(normalize=True, **estimator_params) - reg_with_normalize.fit(X, y, sample_weight=sample_weight) + # linear estimator in a pipeline + reg_with_scalar = make_pipeline( + StandardScaler(with_mean=False), + estimator(normalize=False) + ) + kwargs = {reg_with_scalar.steps[-1][0] + '__sample_weight': + sample_weight} + reg_with_scalar.fit(X, y, **kwargs) - # linear estimator in a pipeline - reg_with_scalar = make_pipeline( - StandardScaler(with_mean=False), - estimator(normalize=False, **estimator_params) + y_pred_norm = reg_with_normalize.predict(X_test) + y_pred_pip = reg_with_scalar.predict(X_test) + + assert_allclose( + reg_with_normalize.coef_ * reg_with_scalar[0].scale_, + reg_with_scalar[1].coef_ ) - kwargs = {reg_with_scalar.steps[-1][0] + '__sample_weight': - sample_weight} - reg_with_scalar.fit(X, y, **kwargs) - - y_pred_norm = reg_with_normalize.predict(X_test) - y_pred_pip = reg_with_scalar.predict(X_test) - - assert_allclose( - reg_with_normalize.coef_ * reg_with_scalar[0].scale_, - reg_with_scalar[1].coef_ - ) - assert_allclose(y_pred_norm, y_pred_pip) + assert_allclose(y_pred_norm, y_pred_pip) # FIXME: 'normalize' to be removed in 1.2 From 46d8606da161e04b43b288b8eaa11a1d5b1f4a56 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 15:08:19 +0100 Subject: [PATCH 095/116] update tests --- sklearn/linear_model/tests/test_coordinate_descent.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 52218fd084bf5..46d04a8967376 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -26,6 +26,7 @@ from sklearn.utils._testing import assert_warns_message from sklearn.utils._testing import ignore_warnings from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import _convert_container from sklearn.utils._testing import TempMemmap from sklearn.utils.fixes import parse_version @@ -384,9 +385,10 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) X_test = rng.randn(n_samples, n_features) + if is_sparse: X = sparse.csr_matrix(X) - X_test = sparse.csr_matrix(X) + X_test = _convert_container(X, 'sparse') sample_weight = 1.0 + rng.rand(n_samples) From 45d9026d855e359b4eac08f87034979ed64a83ca Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 15:10:17 +0100 Subject: [PATCH 096/116] cleanup the test --- sklearn/linear_model/tests/test_coordinate_descent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 46d04a8967376..af1af5a5d5931 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -390,7 +390,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, X = sparse.csr_matrix(X) X_test = _convert_container(X, 'sparse') - sample_weight = 1.0 + rng.rand(n_samples) + sample_weight = rng.rand(n_samples) # linear estimator with explicit sample_weight reg_with_normalize = estimator(normalize=True) @@ -402,7 +402,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, estimator(normalize=False) ) kwargs = {reg_with_scalar.steps[-1][0] + '__sample_weight': - sample_weight} + sample_weight} reg_with_scalar.fit(X, y, **kwargs) y_pred_norm = reg_with_normalize.predict(X_test) From 4a21c229ff9809e84b18601ba19b0d168775e4f5 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 15:11:20 +0100 Subject: [PATCH 097/116] typo --- .../linear_model/tests/test_coordinate_descent.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index af1af5a5d5931..e97cd4778f781 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -397,20 +397,20 @@ def test_linear_model_sample_weights_normalize_in_pipeline(estimator, reg_with_normalize.fit(X, y, sample_weight=sample_weight) # linear estimator in a pipeline - reg_with_scalar = make_pipeline( + reg_with_scaler = make_pipeline( StandardScaler(with_mean=False), estimator(normalize=False) ) - kwargs = {reg_with_scalar.steps[-1][0] + '__sample_weight': + kwargs = {reg_with_scaler.steps[-1][0] + '__sample_weight': sample_weight} - reg_with_scalar.fit(X, y, **kwargs) + reg_with_scaler.fit(X, y, **kwargs) y_pred_norm = reg_with_normalize.predict(X_test) - y_pred_pip = reg_with_scalar.predict(X_test) + y_pred_pip = reg_with_scaler.predict(X_test) assert_allclose( - reg_with_normalize.coef_ * reg_with_scalar[0].scale_, - reg_with_scalar[1].coef_ + reg_with_normalize.coef_ * reg_with_scaler[0].scale_, + reg_with_scaler[1].coef_ ) assert_allclose(y_pred_norm, y_pred_pip) From 8609a9e084b8233c951edf00113b0368d083c4cb Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 15:25:46 +0100 Subject: [PATCH 098/116] correct the test --- sklearn/linear_model/tests/test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 221f745f16488..f69ec7823d3a1 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -178,13 +178,13 @@ def test_deprecate_normalize(normalize, default): if not normalize: warning_msg.append('default value') else: - warning_msg.append('StandardScaler()') + warning_msg.append('StandardScaler(') elif default: if normalize == 'deprecated': # warning to pass False and use StandardScaler output = default expected = FutureWarning - warning_msg = ['False', '1.2', 'StandardScaler()'] + warning_msg = ['False', '1.2', 'StandardScaler('] else: # no warning output = normalize From 84f7be71b4add931c25ce92dd41abebd88662847 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 15:26:55 +0100 Subject: [PATCH 099/116] update getting the class name --- sklearn/linear_model/_base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index ba43f3f0fd6db..d3f360569c317 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -613,8 +613,10 @@ def fit(self, X, y, sample_weight=None): self : returns an instance of self. """ - _normalize = _deprecate_normalize(self.normalize, default=False, - estimator_name=type(self).__name__) + _normalize = _deprecate_normalize( + self.normalize, default=False, + estimator_name=self.__class__.__name__ + ) n_jobs_ = self.n_jobs From 81f276d1170208ed7a371e34fc3e311d6126ddd8 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 15:28:48 +0100 Subject: [PATCH 100/116] cleanup --- sklearn/linear_model/tests/test_coordinate_descent.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index e97cd4778f781..a7d20cb308a7b 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -377,8 +377,9 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): (LinearRegression, False) ] ) -def test_linear_model_sample_weights_normalize_in_pipeline(estimator, - is_sparse): +def test_linear_model_sample_weights_normalize_in_pipeline( + estimator, is_sparse +): rng = np.random.RandomState(0) n_samples, n_features = 6, 5 From 065371483624417091ee63d227d9c7b81f98a851 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 15:30:49 +0100 Subject: [PATCH 101/116] sklearn/linear_model/tests/test_base.py --- sklearn/linear_model/tests/test_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index f69ec7823d3a1..abcf6a43fc25d 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -227,7 +227,9 @@ def test_linear_regression_sparse(random_state=0): ("deprecated", 0, None)] ) # FIXME remove test in 1.4 -def test_linear_regression_normalize_deprecation(normalize, n_warnings, warning): +def test_linear_regression_normalize_deprecation( + normalize, n_warnings, warning + ): # check that we issue a FutureWarning when normalize was set in # LinearRegression rng = check_random_state(0) From 80822f7b555ff8596466950abf8128030a187cc2 Mon Sep 17 00:00:00 2001 From: maikia Date: Thu, 14 Jan 2021 15:40:28 +0100 Subject: [PATCH 102/116] cleanup --- sklearn/linear_model/_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index d3f360569c317..db8b036e9d52f 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -511,7 +511,8 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): on an estimator with ``normalize=False``. .. deprecated:: 1.0 - `normalize` was deprecated in version 1.0 and will be removed in 1.2. + `normalize` was deprecated in version 1.0 and will be + removed in 1.2. copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. From db71adf4ecc60b8ebabcf9b6832dc1aa92028938 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 14:04:23 +0100 Subject: [PATCH 103/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index db8b036e9d52f..b92cc91a9b685 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -50,7 +50,7 @@ # intercept oscillation. -# FIXME in v1.2.0: parameter 'normalize' should be removed from linear models +# FIXME in 1.2: parameter 'normalize' should be removed from linear models # in cases where now normalize=False. The default value of 'normalize' should # be changed to False in linear models where now normalize=True def _deprecate_normalize(normalize, default, estimator_name): From e8d7396abccdb7115def635de184fa42552c4ee1 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 14:04:44 +0100 Subject: [PATCH 104/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index b92cc91a9b685..a80b63eb2ed0d 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -92,6 +92,7 @@ def _deprecate_normalize(normalize, default, estimator_name): ------- _normalize : bool The validated `normalize` value. + Notes ----- This function should be update in 1.2: From 26b729a40269ca817d405b4b4f9074ff12f61d3f Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 14:05:07 +0100 Subject: [PATCH 105/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index a80b63eb2ed0d..5704ae6c9628c 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -95,8 +95,7 @@ def _deprecate_normalize(normalize, default, estimator_name): Notes ----- - This function should be update in 1.2: - depending on value of `normalize`: + This function should be updated in 1.2 depending on the value of `normalize`: - True, warning: `normalize` was deprecated in 1.2 and will be removed in 1.4. Suggest to use pipeline instead - False, `normalize` was deprecated in 1.2 and it will be removed in 1.4. From ba547a01995c8644797db4b0d1eca628a366479a Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 14:05:28 +0100 Subject: [PATCH 106/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index abcf6a43fc25d..5149549020687 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -162,7 +162,7 @@ def test_error_on_wrong_normalize(): @pytest.mark.parametrize('normalize', [True, False, 'deprecated']) @pytest.mark.parametrize('default', [True, False]) -# FIXME update test in 1.2.0 for new versions +# FIXME update test in 1.2 for new versions def test_deprecate_normalize(normalize, default): # test all possible case of the normalize parameter deprecation if not default: From 4bd9b63193db6de6e0b211fd1652fcee57110f14 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 14:05:37 +0100 Subject: [PATCH 107/116] Update sklearn/linear_model/tests/test_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 5149549020687..75cc9dd5fd8f1 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -229,7 +229,7 @@ def test_linear_regression_sparse(random_state=0): # FIXME remove test in 1.4 def test_linear_regression_normalize_deprecation( normalize, n_warnings, warning - ): +): # check that we issue a FutureWarning when normalize was set in # LinearRegression rng = check_random_state(0) From 32447a864a1398db3bcc71295a0ca1f4f63384f1 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 14:05:52 +0100 Subject: [PATCH 108/116] Update sklearn/linear_model/tests/test_coordinate_descent.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_coordinate_descent.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index a7d20cb308a7b..4983175430bb3 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -374,8 +374,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): @pytest.mark.parametrize( "estimator, is_sparse", [(LinearRegression, True), - (LinearRegression, False) - ] + (LinearRegression, False)] ) def test_linear_model_sample_weights_normalize_in_pipeline( estimator, is_sparse From 5efe0de66af7d934c0cf4ef7db6660137118f909 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 14:06:04 +0100 Subject: [PATCH 109/116] Update sklearn/linear_model/tests/test_coordinate_descent.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/tests/test_coordinate_descent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 4983175430bb3..ecbc16125f0d2 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -411,7 +411,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline( assert_allclose( reg_with_normalize.coef_ * reg_with_scaler[0].scale_, reg_with_scaler[1].coef_ - ) + ) assert_allclose(y_pred_norm, y_pred_pip) From e9527e53a198527d8eba8649bdc007901ac79adb Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 14:06:22 +0100 Subject: [PATCH 110/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 5704ae6c9628c..bc3d4c63ac66b 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -97,7 +97,7 @@ def _deprecate_normalize(normalize, default, estimator_name): ----- This function should be updated in 1.2 depending on the value of `normalize`: - True, warning: `normalize` was deprecated in 1.2 and will be removed in - 1.4. Suggest to use pipeline instead + 1.4. Suggest to use pipeline instead. - False, `normalize` was deprecated in 1.2 and it will be removed in 1.4. Leave normalize to its default value - `deprecated` - this should only be possible with default == False as from From 66868007da3676ddab864d2fe67cdcebf947bb55 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 14:06:36 +0100 Subject: [PATCH 111/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index bc3d4c63ac66b..3619264ccdd1b 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -99,7 +99,7 @@ def _deprecate_normalize(normalize, default, estimator_name): - True, warning: `normalize` was deprecated in 1.2 and will be removed in 1.4. Suggest to use pipeline instead. - False, `normalize` was deprecated in 1.2 and it will be removed in 1.4. - Leave normalize to its default value + Leave normalize to its default value. - `deprecated` - this should only be possible with default == False as from 1.2 `normalize` in all the linear models should be either removed or the default should be set to False From 69b0080f7f8834b060a6423e0d68e3c8a48b3cba Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 18 Jan 2021 14:10:29 +0100 Subject: [PATCH 112/116] update a doc --- sklearn/linear_model/_base.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 3619264ccdd1b..2d54400394b95 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -77,25 +77,11 @@ def _deprecate_normalize(normalize, default, estimator_name): normalize : bool, normalize value which should further be used by the estimator at this stage of the depreciation process - Parameters - ---------- - normalize : bool or str - The value of `normalize` set in the estimator. - - default : bool - The previous default value of `normalize` in the estimator. - estimator_name : str - The name of the linear model estimator. - - Returns - ------- - _normalize : bool - The validated `normalize` value. - Notes ----- - This function should be updated in 1.2 depending on the value of `normalize`: + This function should be updated in 1.2 depending on the value of + `normalize`: - True, warning: `normalize` was deprecated in 1.2 and will be removed in 1.4. Suggest to use pipeline instead. - False, `normalize` was deprecated in 1.2 and it will be removed in 1.4. From a149dcc3f31094d4afa3fe9a4b8c99048acbd10f Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 18 Jan 2021 15:04:31 +0100 Subject: [PATCH 113/116] add the doc to the test --- sklearn/linear_model/tests/test_coordinate_descent.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index ecbc16125f0d2..4138b11fc6827 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -379,6 +379,10 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): def test_linear_model_sample_weights_normalize_in_pipeline( estimator, is_sparse ): + # Test that the results for running linear regression LinearRegression with + # sample_weight set and with normalize set to True gives similar results as + # LinearRegression with no normalize in a pipeline with a StandardScaler + # and set sample_weight. rng = np.random.RandomState(0) n_samples, n_features = 6, 5 From 00367783ddb6c383ba16c87ca3dbdd20f2db8d29 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 15:05:22 +0100 Subject: [PATCH 114/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 2d54400394b95..c7db0901f4153 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -88,7 +88,7 @@ def _deprecate_normalize(normalize, default, estimator_name): Leave normalize to its default value. - `deprecated` - this should only be possible with default == False as from 1.2 `normalize` in all the linear models should be either removed or the - default should be set to False + default should be set to False. This function should be completely removed in 1.4. """ From 7ae55a340c642c31eccef15fa2047a5f514470d1 Mon Sep 17 00:00:00 2001 From: Maria Telenczuk Date: Mon, 18 Jan 2021 15:05:40 +0100 Subject: [PATCH 115/116] Update sklearn/linear_model/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/linear_model/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index c7db0901f4153..f84d4234c193c 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -603,7 +603,7 @@ def fit(self, X, y, sample_weight=None): _normalize = _deprecate_normalize( self.normalize, default=False, estimator_name=self.__class__.__name__ - ) + ) n_jobs_ = self.n_jobs From 81d34b4507fd93d7ff220c0a42721548cc6fcc5f Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 20 Jan 2021 19:50:10 +0100 Subject: [PATCH 116/116] Extend test_linear_model_sample_weights_normalize_in_pipeline to better test the impact of with_mean --- .../tests/test_coordinate_descent.py | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 4138b11fc6827..062edf1db595c 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -372,42 +372,44 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params): # FIXME: 'normalize' to be removed in 1.2 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( - "estimator, is_sparse", - [(LinearRegression, True), - (LinearRegression, False)] + "estimator, is_sparse, with_mean", + [(LinearRegression, True, False), + (LinearRegression, False, True), + (LinearRegression, False, False)] ) def test_linear_model_sample_weights_normalize_in_pipeline( - estimator, is_sparse + estimator, is_sparse, with_mean ): # Test that the results for running linear regression LinearRegression with # sample_weight set and with normalize set to True gives similar results as # LinearRegression with no normalize in a pipeline with a StandardScaler # and set sample_weight. rng = np.random.RandomState(0) - n_samples, n_features = 6, 5 - - y = rng.randn(n_samples) - X = rng.randn(n_samples, n_features) - X_test = rng.randn(n_samples, n_features) - + X, y = make_regression(n_samples=20, n_features=5, noise=1e-2, + random_state=rng) + # make sure the data is not centered to make the problem more + # difficult + X += 10 + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, + random_state=rng) if is_sparse: - X = sparse.csr_matrix(X) - X_test = _convert_container(X, 'sparse') + X_train = sparse.csr_matrix(X_train) + X_test = _convert_container(X_train, 'sparse') - sample_weight = rng.rand(n_samples) + sample_weight = rng.rand(X_train.shape[0]) # linear estimator with explicit sample_weight reg_with_normalize = estimator(normalize=True) - reg_with_normalize.fit(X, y, sample_weight=sample_weight) + reg_with_normalize.fit(X_train, y_train, sample_weight=sample_weight) # linear estimator in a pipeline reg_with_scaler = make_pipeline( - StandardScaler(with_mean=False), + StandardScaler(with_mean=with_mean), estimator(normalize=False) ) kwargs = {reg_with_scaler.steps[-1][0] + '__sample_weight': sample_weight} - reg_with_scaler.fit(X, y, **kwargs) + reg_with_scaler.fit(X_train, y_train, **kwargs) y_pred_norm = reg_with_normalize.predict(X_test) y_pred_pip = reg_with_scaler.predict(X_test)