From eea6fa7f18146c20f261fd609ef2de2bf366d072 Mon Sep 17 00:00:00 2001 From: Kevin Roice Date: Wed, 22 Dec 2021 01:45:08 +0000 Subject: [PATCH 01/10] renamed base_estimator for CalibratedClassifierCV --- sklearn/calibration.py | 81 +++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index cbdb88e1647d3..1ad2151f213c0 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -72,13 +72,13 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) for model fitting and calibration are disjoint. The calibration is based on the :term:`decision_function` method of the - `base_estimator` if it exists, else on :term:`predict_proba`. + `estimator` if it exists, else on :term:`predict_proba`. Read more in the :ref:`User Guide `. Parameters ---------- - base_estimator : estimator instance, default=None + estimator : estimator instance, default=None The classifier whose output need to be calibrated to provide more accurate `predict_proba` outputs. The default classifier is a :class:`~sklearn.svm.LinearSVC`. @@ -108,7 +108,7 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) Refer to the :ref:`User Guide ` for the various cross-validation strategies that can be used here. - If "prefit" is passed, it is assumed that `base_estimator` has been + If "prefit" is passed, it is assumed that `estimator` has been fitted already and all data is used for calibration. .. versionchanged:: 0.22 @@ -130,7 +130,7 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) Determines how the calibrator is fitted when `cv` is not `'prefit'`. Ignored if `cv='prefit'`. - If `True`, the `base_estimator` is fitted using training data, and + If `True`, the `estimator` is fitted using training data, and calibrated using testing data, for each `cv` fold. The final estimator is an ensemble of `n_cv` fitted classifier and calibrator pairs, where `n_cv` is the number of cross-validation folds. The output is the @@ -139,7 +139,7 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) If `False`, `cv` is used to compute unbiased predictions, via :func:`~sklearn.model_selection.cross_val_predict`, which are then used for calibration. At prediction time, the classifier used is the - `base_estimator` trained on all the data. + `estimator` trained on all the data. Note that this method is also internally implemented in :mod:`sklearn.svm` estimators with the `probabilities=True` parameter. @@ -152,13 +152,13 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) n_features_in_ : int Number of features seen during :term:`fit`. Only defined if the - underlying base_estimator exposes such an attribute when fit. + underlying estimator exposes such an attribute when fit. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Only defined if the - underlying base_estimator exposes such an attribute when fit. + underlying estimator exposes such an attribute when fit. .. versionadded:: 1.0 @@ -166,12 +166,12 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) or `ensemble=False`) The list of classifier and calibrator pairs. - - When `cv="prefit"`, the fitted `base_estimator` and fitted + - When `cv="prefit"`, the fitted `estimator` and fitted calibrator. - When `cv` is not "prefit" and `ensemble=True`, `n_cv` fitted - `base_estimator` and calibrator pairs. `n_cv` is the number of + `estimator` and calibrator pairs. `n_cv` is the number of cross-validation folds. - - When `cv` is not "prefit" and `ensemble=False`, the `base_estimator`, + - When `cv` is not "prefit" and `ensemble=False`, the `estimator`, fitted on all the data, and fitted calibrator. .. versionchanged:: 0.24 @@ -204,9 +204,9 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) >>> X, y = make_classification(n_samples=100, n_features=2, ... n_redundant=0, random_state=42) >>> base_clf = GaussianNB() - >>> calibrated_clf = CalibratedClassifierCV(base_estimator=base_clf, cv=3) + >>> calibrated_clf = CalibratedClassifierCV(estimator=base_clf, cv=3) >>> calibrated_clf.fit(X, y) - CalibratedClassifierCV(base_estimator=GaussianNB(), cv=3) + CalibratedClassifierCV(estimator=GaussianNB(), cv=3) >>> len(calibrated_clf.calibrated_classifiers_) 3 >>> calibrated_clf.predict_proba(X)[:5, :] @@ -225,11 +225,11 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) >>> base_clf.fit(X_train, y_train) GaussianNB() >>> calibrated_clf = CalibratedClassifierCV( - ... base_estimator=base_clf, + ... estimator=base_clf, ... cv="prefit" ... ) >>> calibrated_clf.fit(X_calib, y_calib) - CalibratedClassifierCV(base_estimator=GaussianNB(), cv='prefit') + CalibratedClassifierCV(estimator=GaussianNB(), cv='prefit') >>> len(calibrated_clf.calibrated_classifiers_) 1 >>> calibrated_clf.predict_proba([[-0.5, 0.5]]) @@ -238,19 +238,26 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) def __init__( self, - base_estimator=None, + estimator=None, *, method="sigmoid", cv=None, n_jobs=None, ensemble=True, + base_estimator="deprecated" ): - self.base_estimator = base_estimator + self.estimator = estimator self.method = method self.cv = cv self.n_jobs = n_jobs self.ensemble = ensemble + if base_estimator != "deprecated": + warnings.warn("'base_estimator' was renamed to estimator in version 1.0 and " + "will be removed in 1.2.", + FutureWarning) + self.estimator = base_estimator + def fit(self, X, y, sample_weight=None, **fit_params): """Fit the calibrated model. @@ -282,25 +289,25 @@ def fit(self, X, y, sample_weight=None, **fit_params): for sample_aligned_params in fit_params.values(): check_consistent_length(y, sample_aligned_params) - if self.base_estimator is None: + if self.estimator is None: # we want all classifiers that don't expose a random_state # to be deterministic (and we don't want to expose this one). - base_estimator = LinearSVC(random_state=0) + estimator = LinearSVC(random_state=0) else: - base_estimator = self.base_estimator + estimator = self.estimator self.calibrated_classifiers_ = [] if self.cv == "prefit": - # `classes_` should be consistent with that of base_estimator - check_is_fitted(self.base_estimator, attributes=["classes_"]) - self.classes_ = self.base_estimator.classes_ + # `classes_` should be consistent with that of estimator + check_is_fitted(self.estimator, attributes=["classes_"]) + self.classes_ = self.estimator.classes_ - pred_method, method_name = _get_prediction_method(base_estimator) + pred_method, method_name = _get_prediction_method(estimator) n_classes = len(self.classes_) predictions = _compute_predictions(pred_method, method_name, X, n_classes) calibrated_classifier = _fit_calibrator( - base_estimator, + estimator, predictions, y, self.classes_, @@ -315,10 +322,10 @@ def fit(self, X, y, sample_weight=None, **fit_params): n_classes = len(self.classes_) # sample_weight checks - fit_parameters = signature(base_estimator.fit).parameters + fit_parameters = signature(estimator.fit).parameters supports_sw = "sample_weight" in fit_parameters if sample_weight is not None and not supports_sw: - estimator_name = type(base_estimator).__name__ + estimator_name = type(estimator).__name__ warnings.warn( f"Since {estimator_name} does not appear to accept sample_weight, " "sample weights will only be used for the calibration itself. This " @@ -351,7 +358,7 @@ def fit(self, X, y, sample_weight=None, **fit_params): parallel = Parallel(n_jobs=self.n_jobs) self.calibrated_classifiers_ = parallel( delayed(_fit_classifier_calibrator_pair)( - clone(base_estimator), + clone(estimator), X, y, train=train, @@ -365,7 +372,7 @@ def fit(self, X, y, sample_weight=None, **fit_params): for train, test in cv.split(X, y) ) else: - this_estimator = clone(base_estimator) + this_estimator = clone(estimator) _, method_name = _get_prediction_method(this_estimator) fit_params = ( {"sample_weight": sample_weight} @@ -402,7 +409,7 @@ def fit(self, X, y, sample_weight=None, **fit_params): ) self.calibrated_classifiers_.append(calibrated_classifier) - first_clf = self.calibrated_classifiers_[0].base_estimator + first_clf = self.calibrated_classifiers_[0].estimator if hasattr(first_clf, "n_features_in_"): self.n_features_in_ = first_clf.n_features_in_ if hasattr(first_clf, "feature_names_in_"): @@ -418,7 +425,7 @@ def predict_proba(self, X): Parameters ---------- X : array-like of shape (n_samples, n_features) - The samples, as accepted by `base_estimator.predict_proba`. + The samples, as accepted by `estimator.predict_proba`. Returns ------- @@ -446,7 +453,7 @@ def predict(self, X): Parameters ---------- X : array-like of shape (n_samples, n_features) - The samples, as accepted by `base_estimator.predict`. + The samples, as accepted by `estimator.predict`. Returns ------- @@ -570,7 +577,7 @@ def _get_prediction_method(clf): return method, "predict_proba" else: raise RuntimeError( - "'base_estimator' has no 'decision_function' or 'predict_proba' method." + "'estimator' has no 'decision_function' or 'predict_proba' method." ) @@ -669,7 +676,7 @@ class _CalibratedClassifier: Parameters ---------- - base_estimator : estimator instance + estimator : estimator instance Fitted classifier. calibrators : list of fitted estimator instances @@ -687,8 +694,8 @@ class _CalibratedClassifier: non-parametric approach based on isotonic regression. """ - def __init__(self, base_estimator, calibrators, *, classes, method="sigmoid"): - self.base_estimator = base_estimator + def __init__(self, estimator, calibrators, *, classes, method="sigmoid"): + self.estimator = estimator self.calibrators = calibrators self.classes = classes self.method = method @@ -710,11 +717,11 @@ def predict_proba(self, X): The predicted probabilities. Can be exact zeros. """ n_classes = len(self.classes) - pred_method, method_name = _get_prediction_method(self.base_estimator) + pred_method, method_name = _get_prediction_method(self.estimator) predictions = _compute_predictions(pred_method, method_name, X, n_classes) label_encoder = LabelEncoder().fit(self.classes) - pos_class_indices = label_encoder.transform(self.base_estimator.classes_) + pos_class_indices = label_encoder.transform(self.estimator.classes_) proba = np.zeros((_num_samples(X), n_classes)) for class_idx, this_pred, calibrator in zip( From 8bc6acd3f48ca7ceebfab71cb4af91f8faaf43ac Mon Sep 17 00:00:00 2001 From: Kevin Roice Date: Wed, 22 Dec 2021 01:54:15 +0000 Subject: [PATCH 02/10] updated change log --- doc/whats_new/v1.0.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 3d9e9e3784c66..58ce4549a21f9 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -19,6 +19,13 @@ Version 1.0.2 Changelog --------- +:mod:`sklearn.calibration` +...................... + +- |Enhancement| Rename variable names in :class:`CalibratedClassifierCV` and + :class:`_CalibratedClassifier` to improve readability. `base_estimator` + isrenamed to `estimator`. :pr:`9104` by :user:`Kevin Roice `. + :mod:`sklearn.cluster` ...................... From 780a1dd8427f23d2c7298d600120701974a31547 Mon Sep 17 00:00:00 2001 From: Kevin Roice Date: Wed, 22 Dec 2021 02:33:16 +0000 Subject: [PATCH 03/10] fixed typo --- doc/whats_new/v1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 58ce4549a21f9..a147b1ecce7d2 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -24,7 +24,7 @@ Changelog - |Enhancement| Rename variable names in :class:`CalibratedClassifierCV` and :class:`_CalibratedClassifier` to improve readability. `base_estimator` - isrenamed to `estimator`. :pr:`9104` by :user:`Kevin Roice `. + is renamed to `estimator`. :pr:`9104` by :user:`Kevin Roice `. :mod:`sklearn.cluster` ...................... From 4c7f2ff9909b30abbb2eece14ee7e2e9733b67b7 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 19 May 2022 19:04:13 +0200 Subject: [PATCH 04/10] fixes --- doc/modules/grid_search.rst | 6 +-- doc/whats_new/v1.0.rst | 7 ---- doc/whats_new/v1.2.rst | 8 ++++ sklearn/calibration.py | 31 ++++++++++++---- sklearn/tests/test_calibration.py | 61 ++++++++++++++++++++----------- 5 files changed, 75 insertions(+), 38 deletions(-) diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 9128c7d3c9841..2daf820ef5112 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -602,9 +602,9 @@ parameters of composite or nested estimators such as >>> from sklearn.datasets import make_moons >>> X, y = make_moons() >>> calibrated_forest = CalibratedClassifierCV( - ... base_estimator=RandomForestClassifier(n_estimators=10)) + ... estimator=RandomForestClassifier(n_estimators=10)) >>> param_grid = { - ... 'base_estimator__max_depth': [2, 4, 6, 8]} + ... 'estimator__max_depth': [2, 4, 6, 8]} >>> search = GridSearchCV(calibrated_forest, param_grid, cv=5) >>> search.fit(X, y) GridSearchCV(cv=5, @@ -612,7 +612,7 @@ parameters of composite or nested estimators such as param_grid={'base_estimator__max_depth': [2, 4, 6, 8]}) Here, ```` is the parameter name of the nested estimator, -in this case ``base_estimator``. +in this case ``estimator``. If the meta-estimator is constructed as a collection of estimators as in `pipeline.Pipeline`, then ```` refers to the name of the estimator, see :ref:`pipeline_nested_parameters`. In practice, there can be several diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index b2fbe000bc1ec..6ece2f16b6e93 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -19,13 +19,6 @@ Version 1.0.2 Changelog --------- -:mod:`sklearn.calibration` -...................... - -- |Enhancement| Rename variable names in :class:`CalibratedClassifierCV` and - :class:`_CalibratedClassifier` to improve readability. `base_estimator` - is renamed to `estimator`. :pr:`9104` by :user:`Kevin Roice `. - :mod:`sklearn.cluster` ...................... diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 8e5851cca632f..7e6e805d77f99 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -33,6 +33,14 @@ Changelog :pr:`123456` by :user:`Joe Bloggs `. where 123456 is the *pull request* number, not the issue number. +:mod:`sklearn.calibration` +.......................... + +- |API| Rename `base_estimator` to `estimator` in + :class:`CalibratedClassifierCV` to improve readability and consistency. The + parameter `base_estimator` is deprecated and will be removed in 1.4. + :pr:`9104` by :user:`Kevin Roice `. + :mod:`sklearn.cluster` ...................... diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 0236163e6185d..f253629fc45fd 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -83,6 +83,8 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) accurate `predict_proba` outputs. The default classifier is a :class:`~sklearn.svm.LinearSVC`. + .. versionadded:: 1.2 + method : {'sigmoid', 'isotonic'}, default='sigmoid' The method to use for calibration. Can be 'sigmoid' which corresponds to Platt's method (i.e. a logistic regression model) or @@ -145,6 +147,13 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) .. versionadded:: 0.24 + base_estimator: estimator instance + This parameter is deprecated. Use `estimator` instead. + + .. deprecated:: 1.2 + The parameter `base_estimator` is deprecated in 1.2 and will be + removed in 1.4. Use `estimator` instead. + Attributes ---------- classes_ : ndarray of shape (n_classes,) @@ -244,19 +253,14 @@ def __init__( cv=None, n_jobs=None, ensemble=True, - base_estimator="deprecated" + base_estimator="deprecated", ): self.estimator = estimator self.method = method self.cv = cv self.n_jobs = n_jobs self.ensemble = ensemble - - if base_estimator != "deprecated": - warnings.warn("'base_estimator' was renamed to estimator in version 1.0 and " - "will be removed in 1.2.", - FutureWarning) - self.estimator = base_estimator + self.base_estimator = base_estimator def fit(self, X, y, sample_weight=None, **fit_params): """Fit the calibrated model. @@ -289,6 +293,19 @@ def fit(self, X, y, sample_weight=None, **fit_params): for sample_aligned_params in fit_params.values(): check_consistent_length(y, sample_aligned_params) + if self.base_estimator != "deprecated": + if self.estimator is not None: + raise ValueError( + "Both `base_estimator` and `estimator` are set. Only set " + "`estimator` since `base_estimator` is deprecated." + ) + warnings.warn( + "`base_estimator` was renamed to `estimator` in version 1.2 and " + "will be removed in 1.4.", + FutureWarning, + ) + self.estimator = self.base_estimator + if self.estimator is None: # we want all classifiers that don't expose a random_state # to be deterministic (and we don't want to expose this one). diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index fb8a6d4f344b2..c9a31438dafd6 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -140,12 +140,12 @@ def test_calibration_regressor(data, ensemble): def test_calibration_default_estimator(data): - # Check base_estimator default is LinearSVC + # Check estimator default is LinearSVC X, y = data calib_clf = CalibratedClassifierCV(cv=2) calib_clf.fit(X, y) - base_est = calib_clf.calibrated_classifiers_[0].base_estimator + base_est = calib_clf.calibrated_classifiers_[0].estimator assert isinstance(base_est, LinearSVC) @@ -175,10 +175,8 @@ def test_sample_weight(data, method, ensemble): X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_test = X[n_samples:] - base_estimator = LinearSVC(random_state=42) - calibrated_clf = CalibratedClassifierCV( - base_estimator, method=method, ensemble=ensemble - ) + estimator = LinearSVC(random_state=42) + calibrated_clf = CalibratedClassifierCV(estimator, method=method, ensemble=ensemble) calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) probs_with_sw = calibrated_clf.predict_proba(X_test) @@ -198,16 +196,16 @@ def test_parallel_execution(data, method, ensemble): X, y = data X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) - base_estimator = LinearSVC(random_state=42) + estimator = LinearSVC(random_state=42) cal_clf_parallel = CalibratedClassifierCV( - base_estimator, method=method, n_jobs=2, ensemble=ensemble + estimator, method=method, n_jobs=2, ensemble=ensemble ) cal_clf_parallel.fit(X_train, y_train) probs_parallel = cal_clf_parallel.predict_proba(X_test) cal_clf_sequential = CalibratedClassifierCV( - base_estimator, method=method, n_jobs=1, ensemble=ensemble + estimator, method=method, n_jobs=1, ensemble=ensemble ) cal_clf_sequential.fit(X_train, y_train) probs_sequential = cal_clf_sequential.predict_proba(X_test) @@ -297,7 +295,7 @@ def predict(self, X): clf = DummyClassifier().fit(X, y) calibrator = ZeroCalibrator() cal_clf = _CalibratedClassifier( - base_estimator=clf, calibrators=[calibrator], classes=clf.classes_ + estimator=clf, calibrators=[calibrator], classes=clf.classes_ ) probas = cal_clf.predict_proba(X) @@ -624,7 +622,7 @@ def test_calibration_votingclassifier(): ) vote.fit(X, y) - calib_clf = CalibratedClassifierCV(base_estimator=vote, cv="prefit") + calib_clf = CalibratedClassifierCV(estimator=vote, cv="prefit") # smoke test: should not raise an error calib_clf.fit(X, y) @@ -911,9 +909,9 @@ def test_calibrated_classifier_cv_double_sample_weights_equivalence(method, ense y_twice[::2] = y y_twice[1::2] = y - base_estimator = LogisticRegression() + estimator = LogisticRegression() calibrated_clf_without_weights = CalibratedClassifierCV( - base_estimator, + estimator, method=method, ensemble=ensemble, cv=2, @@ -929,8 +927,8 @@ def test_calibrated_classifier_cv_double_sample_weights_equivalence(method, ense calibrated_clf_without_weights.calibrated_classifiers_, ): assert_allclose( - est_with_weights.base_estimator.coef_, - est_without_weights.base_estimator.coef_, + est_with_weights.estimator.coef_, + est_without_weights.estimator.coef_, ) # Check that the predictions are the same @@ -978,11 +976,11 @@ def test_calibration_with_sample_weight_base_estimator(sample_weight, data): def test_calibration_without_sample_weight_base_estimator(data): - """Check that even if the base_estimator doesn't support + """Check that even if the estimator doesn't support sample_weight, fitting with sample_weight still works. There should be a warning, since the sample_weight is not passed - on to the base_estimator. + on to the estimator. """ X, y = data sample_weight = np.ones_like(y) @@ -1031,9 +1029,9 @@ def test_calibrated_classifier_cv_zeros_sample_weights_equivalence(method, ensem sample_weight = np.zeros_like(y) sample_weight[::2] = 1 - base_estimator = LogisticRegression() + estimator = LogisticRegression() calibrated_clf_without_weights = CalibratedClassifierCV( - base_estimator, + estimator, method=method, ensemble=ensemble, cv=2, @@ -1049,8 +1047,8 @@ def test_calibrated_classifier_cv_zeros_sample_weights_equivalence(method, ensem calibrated_clf_without_weights.calibrated_classifiers_, ): assert_allclose( - est_with_weights.base_estimator.coef_, - est_without_weights.base_estimator.coef_, + est_with_weights.estimator.coef_, + est_without_weights.estimator.coef_, ) # Check that the predictions are the same @@ -1058,3 +1056,24 @@ def test_calibrated_classifier_cv_zeros_sample_weights_equivalence(method, ensem y_pred_without_weights = calibrated_clf_without_weights.predict_proba(X) assert_allclose(y_pred_with_weights, y_pred_without_weights) + + +# TODO (remove in 1.4) +def test_calibrated_classifier_error_base_estimator(data): + """Check that we raise an error is a user set both `base_estimator` and + `estimator`.""" + calibrated_classifier = CalibratedClassifierCV( + base_estimator=LogisticRegression(), estimator=LogisticRegression() + ) + with pytest.raises(ValueError, match="Both `base_estimator` and `estimator`"): + calibrated_classifier.fit(*data) + + +# TODO (remove in 1.4) +def test_calibrated_classifier_deprecation_base_estimator(data): + """Check that we raise a warning regarding the deprecation of + `base_estimator`.""" + calibrated_classifier = CalibratedClassifierCV(base_estimator=LogisticRegression()) + warn_msg = "`base_estimator` was renamed to `estimator`" + with pytest.warns(FutureWarning, match=warn_msg): + calibrated_classifier.fit(*data) From a89627c97275720d333372c123f6242bea9563b4 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 19 May 2022 19:05:38 +0200 Subject: [PATCH 05/10] fix pr number --- doc/whats_new/v1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 7e6e805d77f99..e2376bfda19fc 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -39,7 +39,7 @@ Changelog - |API| Rename `base_estimator` to `estimator` in :class:`CalibratedClassifierCV` to improve readability and consistency. The parameter `base_estimator` is deprecated and will be removed in 1.4. - :pr:`9104` by :user:`Kevin Roice `. + :pr:`22054` by :user:`Kevin Roice `. :mod:`sklearn.cluster` ...................... From dfd8bf2f93a89f59c8a9fb6c7a4d66e54a1f2224 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 19 May 2022 19:25:23 +0200 Subject: [PATCH 06/10] use ellipsis in docstring --- sklearn/calibration.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index f253629fc45fd..9b54ad754a14a 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -213,9 +213,9 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) >>> X, y = make_classification(n_samples=100, n_features=2, ... n_redundant=0, random_state=42) >>> base_clf = GaussianNB() - >>> calibrated_clf = CalibratedClassifierCV(estimator=base_clf, cv=3) + >>> calibrated_clf = CalibratedClassifierCV(base_clf, cv=3) >>> calibrated_clf.fit(X, y) - CalibratedClassifierCV(estimator=GaussianNB(), cv=3) + CalibratedClassifierCV(...) >>> len(calibrated_clf.calibrated_classifiers_) 3 >>> calibrated_clf.predict_proba(X)[:5, :] @@ -233,12 +233,9 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) >>> base_clf = GaussianNB() >>> base_clf.fit(X_train, y_train) GaussianNB() - >>> calibrated_clf = CalibratedClassifierCV( - ... estimator=base_clf, - ... cv="prefit" - ... ) + >>> calibrated_clf = CalibratedClassifierCV(base_clf, cv="prefit") >>> calibrated_clf.fit(X_calib, y_calib) - CalibratedClassifierCV(estimator=GaussianNB(), cv='prefit') + CalibratedClassifierCV(...) >>> len(calibrated_clf.calibrated_classifiers_) 1 >>> calibrated_clf.predict_proba([[-0.5, 0.5]]) From 051dc47451c22446e06a3ead51ccd2c954d59dea Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 19 May 2022 20:14:39 +0200 Subject: [PATCH 07/10] DOC fixes missing space in docstring --- doc/modules/grid_search.rst | 6 +++--- sklearn/calibration.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 2daf820ef5112..34728e5db9617 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -608,8 +608,8 @@ parameters of composite or nested estimators such as >>> search = GridSearchCV(calibrated_forest, param_grid, cv=5) >>> search.fit(X, y) GridSearchCV(cv=5, - estimator=CalibratedClassifierCV(...), - param_grid={'base_estimator__max_depth': [2, 4, 6, 8]}) + estimator=CalibratedClassifierCV(...), + param_grid={'estimator__max_depth': [2, 4, 6, 8]}) Here, ```` is the parameter name of the nested estimator, in this case ``estimator``. @@ -625,7 +625,7 @@ levels of nesting:: ... ('model', calibrated_forest)]) >>> param_grid = { ... 'select__k': [1, 2], - ... 'model__base_estimator__max_depth': [2, 4, 6, 8]} + ... 'model__estimator__max_depth': [2, 4, 6, 8]} >>> search = GridSearchCV(pipe, param_grid, cv=5).fit(X, y) Please refer to :ref:`pipeline` for performing parameter searches over diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 9b54ad754a14a..3f56727c2fba7 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -147,7 +147,7 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) .. versionadded:: 0.24 - base_estimator: estimator instance + base_estimator : estimator instance This parameter is deprecated. Use `estimator` instead. .. deprecated:: 1.2 From 2c6c58d212a481157df91493f3908c1450722bc5 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 30 May 2022 17:35:23 +0200 Subject: [PATCH 08/10] remove antipattern --- sklearn/calibration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 3f56727c2fba7..f04fb8be47632 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -301,14 +301,14 @@ def fit(self, X, y, sample_weight=None, **fit_params): "will be removed in 1.4.", FutureWarning, ) - self.estimator = self.base_estimator + estimator = self.base_estimator + else: + estimator = self.estimator - if self.estimator is None: + if estimator is None: # we want all classifiers that don't expose a random_state # to be deterministic (and we don't want to expose this one). estimator = LinearSVC(random_state=0) - else: - estimator = self.estimator self.calibrated_classifiers_ = [] if self.cv == "prefit": From fe351eec81fa89e793e93403b6fea89e7afa1441 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 30 May 2022 13:53:51 -0400 Subject: [PATCH 09/10] ENH Minor nits --- doc/modules/grid_search.rst | 4 ++-- sklearn/tests/test_calibration.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 34728e5db9617..280117c23d853 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -608,8 +608,8 @@ parameters of composite or nested estimators such as >>> search = GridSearchCV(calibrated_forest, param_grid, cv=5) >>> search.fit(X, y) GridSearchCV(cv=5, - estimator=CalibratedClassifierCV(...), - param_grid={'estimator__max_depth': [2, 4, 6, 8]}) + estimator=CalibratedClassifierCV(...), + param_grid={'estimator__max_depth': [2, 4, 6, 8]}) Here, ```` is the parameter name of the nested estimator, in this case ``estimator``. diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index c9a31438dafd6..cb404b4c73199 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -1058,7 +1058,7 @@ def test_calibrated_classifier_cv_zeros_sample_weights_equivalence(method, ensem assert_allclose(y_pred_with_weights, y_pred_without_weights) -# TODO (remove in 1.4) +# TODO(1.4): Remove def test_calibrated_classifier_error_base_estimator(data): """Check that we raise an error is a user set both `base_estimator` and `estimator`.""" @@ -1069,7 +1069,7 @@ def test_calibrated_classifier_error_base_estimator(data): calibrated_classifier.fit(*data) -# TODO (remove in 1.4) +# TODO(1.4): Remove def test_calibrated_classifier_deprecation_base_estimator(data): """Check that we raise a warning regarding the deprecation of `base_estimator`.""" From 3f12e3fea316f1cf44f06068f0644ce98cc2313d Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 30 May 2022 13:55:49 -0400 Subject: [PATCH 10/10] DOC Adds more comments --- sklearn/calibration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index f04fb8be47632..d5eba5a761ebe 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -290,6 +290,7 @@ def fit(self, X, y, sample_weight=None, **fit_params): for sample_aligned_params in fit_params.values(): check_consistent_length(y, sample_aligned_params) + # TODO(1.4): Remove when base_estimator is removed if self.base_estimator != "deprecated": if self.estimator is not None: raise ValueError(