From 2f6de703d6ca3f3f1c1f9eae726d02ec436c047e Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 25 Feb 2021 10:49:14 +0100 Subject: [PATCH 01/15] Test and doc for n_features_in_ for sklearn.calibration --- sklearn/calibration.py | 3 +++ sklearn/tests/test_common.py | 1 - sklearn/tests/test_docstring_parameters.py | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index b60a415b4419b..85decdd76ee84 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -141,6 +141,9 @@ class CalibratedClassifierCV(ClassifierMixin, classes_ : ndarray of shape (n_classes,) The class labels. + n_features_in_ : int + Number of features seen during :term:`fit`. + calibrated_classifiers_ : list (len() equal to cv or 1 if `cv="prefit"` \ or `ensemble=False`) The list of classifier and calibrator pairs. diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index bfd7f98268350..45943ad0b007c 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -261,7 +261,6 @@ def test_search_cv(estimator, check, request): # # check_classifiers_train would need to be updated with the error message N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE = { - 'calibration', 'compose', 'feature_extraction', 'mixture', diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index cd2bdba449799..5c2e02bd3e247 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -176,7 +176,6 @@ def _construct_searchcv_instance(SearchCV): N_FEATURES_MODULES_TO_IGNORE = { - 'calibration', 'cluster', 'compose', 'covariance', From 24d8e90b3e494b87af8752a799c3631ee09a2911 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 17 Mar 2021 16:49:05 +0100 Subject: [PATCH 02/15] CalibratedClassifierCV delegates n_features_in_ validation to base_estimator --- sklearn/calibration.py | 40 +++++++++++++++++++++---------- sklearn/tests/test_calibration.py | 22 ++++++++++++++--- 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 85decdd76ee84..96c1331c381e5 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -33,6 +33,8 @@ from .utils.fixes import delayed from .utils.validation import check_is_fitted, check_consistent_length from .utils.validation import _check_sample_weight, _num_samples +from .utils.validation import _num_features +from .utils import _safe_indexing from .pipeline import Pipeline from .isotonic import IsotonicRegression from .svm import LinearSVC @@ -144,6 +146,8 @@ class CalibratedClassifierCV(ClassifierMixin, n_features_in_ : int Number of features seen during :term:`fit`. + .. versionadded:: 0.24 + calibrated_classifiers_ : list (len() equal to cv or 1 if `cv="prefit"` \ or `ensemble=False`) The list of classifier and calibrator pairs. @@ -259,8 +263,20 @@ def fit(self, X, y, sample_weight=None): check_is_fitted(self.base_estimator[-1]) else: check_is_fitted(self.base_estimator) + try: + self.n_features_in_ = _num_features(X) + except TypeError: + # X is not necessarily tabular as base_estimator might be able + # to accept non-tabular data. + pass with suppress(AttributeError): - self.n_features_in_ = base_estimator.n_features_in_ + if self.n_features_in_ != base_estimator.n_features_in_: + raise ValueError( + f"Base estimator {base_estimator.__class__.__name__} " + f"was prefit on {base_estimator.n_features_in_} " + f"features but CalibratedClassifierCV is fit " + f"with {self.n_features_in_} features." + ) self.classes_ = self.base_estimator.classes_ pred_method = _get_prediction_method(base_estimator) @@ -273,10 +289,6 @@ def fit(self, X, y, sample_weight=None): ) self.calibrated_classifiers_.append(calibrated_classifier) else: - X, y = self._validate_data( - X, y, accept_sparse=['csc', 'csr', 'coo'], - force_all_finite=False, allow_nd=True - ) # Set `classes_` using all `y` label_encoder_ = LabelEncoder().fit(y) self.classes_ = label_encoder_.classes_ @@ -337,6 +349,9 @@ def fit(self, X, y, sample_weight=None): ) self.calibrated_classifiers_.append(calibrated_classifier) + first_clf = next(iter(self.calibrated_classifiers_)).base_estimator + if hasattr(first_clf, "n_features_in_"): + self.n_features_in_ = first_clf.n_features_in_ return self def predict_proba(self, X): @@ -355,7 +370,6 @@ def predict_proba(self, X): The predicted probas. """ check_is_fitted(self) - # Compute the arithmetic mean of the predictions of the calibrated # classifiers mean_proba = np.zeros((_num_samples(X), len(self.classes_))) @@ -434,19 +448,21 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw, ------- calibrated_classifier : _CalibratedClassifier instance """ + X_train, y_train = _safe_indexing(X, train), _safe_indexing(y, train) + X_test, y_test = _safe_indexing(X, test), _safe_indexing(y, test) if sample_weight is not None and supports_sw: - estimator.fit(X[train], y[train], - sample_weight=sample_weight[train]) + estimator.fit(X_train, y_train, + sample_weight=_safe_indexing(sample_weight, train)) else: - estimator.fit(X[train], y[train]) + estimator.fit(X_train, y_train) n_classes = len(classes) pred_method = _get_prediction_method(estimator) - predictions = _compute_predictions(pred_method, X[test], n_classes) + predictions = _compute_predictions(pred_method, X_test, n_classes) - sw = None if sample_weight is None else sample_weight[test] + sw = None if sample_weight is None else _safe_indexing(sample_weight, test) calibrated_classifier = _fit_calibrator( - estimator, predictions, y[test], classes, method, sample_weight=sw + estimator, predictions, y_test, classes, method, sample_weight=sw ) return calibrated_classifier diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 86a638c4a7679..c870a747c576e 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -2,6 +2,7 @@ # License: BSD 3 clause import pytest +import re import numpy as np from numpy.testing import assert_allclose from scipy import sparse @@ -14,6 +15,7 @@ assert_almost_equal, assert_array_equal, ignore_warnings) +from sklearn.utils.validation import _num_features from sklearn.utils.extmath import softmax from sklearn.exceptions import NotFittedError from sklearn.datasets import make_classification, make_blobs @@ -547,9 +549,7 @@ def test_calibration_pipeline(text_data, text_data_pipeline): calib_clf.fit(X, y) # Check attributes are obtained from fitted estimator assert_array_equal(calib_clf.classes_, clf.classes_) - msg = "'CalibratedClassifierCV' object has no attribute" - with pytest.raises(AttributeError, match=msg): - calib_clf.n_features_in_ + assert _num_features(X) == calib_clf.n_features_in_ # Ensure that no error is thrown with predict and predict_proba calib_clf.predict(X) @@ -578,6 +578,22 @@ def test_calibration_attributes(clf, cv): assert calib_clf.n_features_in_ == X.shape[1] +def test_calibration_inconsistent_prefit_n_features_in(): + # Check that `n_features_in_` from prefit base estimator + # is consistent with training set + X, y = make_classification(n_samples=10, n_features=5, + n_classes=2, random_state=7) + clf = LinearSVC(C=1).fit(X, y) + calib_clf = CalibratedClassifierCV(clf, cv='prefit') + + msg = re.escape( + "Base estimator LinearSVC was prefit on 5 features " + "but CalibratedClassifierCV is fit with 3 features." + ) + with pytest.raises(ValueError, match=msg): + calib_clf.fit(X[:, :3], y) + + # FIXME: remove in 1.1 def test_calibrated_classifier_cv_deprecation(data): # Check that we raise the proper deprecation warning if accessing From 4a2ce3f0dd98cf4d8e934b9efa32ead67fdb8689 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 17 Mar 2021 18:02:00 +0100 Subject: [PATCH 03/15] Better n_features_in_ handling --- sklearn/calibration.py | 21 ++++++++++---------- sklearn/tests/test_calibration.py | 33 ++++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 96c1331c381e5..8ae9397466543 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -144,7 +144,8 @@ class CalibratedClassifierCV(ClassifierMixin, The class labels. n_features_in_ : int - Number of features seen during :term:`fit`. + Number of features seen during :term:`fit`. Only defined if the + underlying base_estimator exposes such an attribute when fit. .. versionadded:: 0.24 @@ -263,20 +264,20 @@ def fit(self, X, y, sample_weight=None): check_is_fitted(self.base_estimator[-1]) else: check_is_fitted(self.base_estimator) - try: - self.n_features_in_ = _num_features(X) - except TypeError: - # X is not necessarily tabular as base_estimator might be able - # to accept non-tabular data. - pass with suppress(AttributeError): - if self.n_features_in_ != base_estimator.n_features_in_: + # Only perform the check and set self.n_features_in_ if the + # base estimator has the attribute defined. + expected_n_features_in = base_estimator.n_features_in_ + actual_n_features_in = _num_features(X) + if expected_n_features_in != actual_n_features_in: raise ValueError( f"Base estimator {base_estimator.__class__.__name__} " - f"was prefit on {base_estimator.n_features_in_} " + f"was prefit on {expected_n_features_in} " f"features but CalibratedClassifierCV is fit " - f"with {self.n_features_in_} features." + f"with {actual_n_features_in} features." ) + else: + self.n_features_in_ = actual_n_features_in self.classes_ = self.base_estimator.classes_ pred_method = _get_prediction_method(base_estimator) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index c870a747c576e..281ff240367aa 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -514,19 +514,19 @@ def decision_function(self, X): @pytest.fixture -def text_data(): - text_data = [ +def dict_data(): + dict_data = [ {'state': 'NY', 'age': 'adult'}, {'state': 'TX', 'age': 'adult'}, {'state': 'VT', 'age': 'child'}, ] text_labels = [1, 0, 1] - return text_data, text_labels + return dict_data, text_labels @pytest.fixture -def text_data_pipeline(text_data): - X, y = text_data +def dict_data_pipeline(dict_data): + X, y = dict_data pipeline_prefit = Pipeline([ ('vectorizer', DictVectorizer()), ('clf', RandomForestClassifier()) @@ -534,7 +534,7 @@ def text_data_pipeline(text_data): return pipeline_prefit.fit(X, y) -def test_calibration_pipeline(text_data, text_data_pipeline): +def test_calibration_dict_pipeline(dict_data, dict_data_pipeline): """Test that calibration works in prefit pipeline with transformer `X` is not array-like, sparse matrix or dataframe at the start. @@ -543,13 +543,19 @@ def test_calibration_pipeline(text_data, text_data_pipeline): Also test it can predict without running into validation errors. See https://github.com/scikit-learn/scikit-learn/issues/19637 """ - X, y = text_data - clf = text_data_pipeline + X, y = dict_data + clf = dict_data_pipeline calib_clf = CalibratedClassifierCV(clf, cv='prefit') calib_clf.fit(X, y) # Check attributes are obtained from fitted estimator assert_array_equal(calib_clf.classes_, clf.classes_) - assert _num_features(X) == calib_clf.n_features_in_ + + # Neither the pipeline nor the calibration meta-estimator + # expose the n_features_in_ check on this kind of data. + with pytest.raises(AttributeError): + clf.n_features_in_ + with pytest.raises(AttributeError): + calib_clf.n_features_in_ # Ensure that no error is thrown with predict and predict_proba calib_clf.predict(X) @@ -566,6 +572,7 @@ def test_calibration_attributes(clf, cv): n_classes=2, random_state=7) if cv == 'prefit': clf = clf.fit(X, y) + calib_clf = CalibratedClassifierCV(clf, cv=cv) calib_clf.fit(X, y) @@ -577,6 +584,14 @@ def test_calibration_attributes(clf, cv): assert_array_equal(calib_clf.classes_, classes) assert calib_clf.n_features_in_ == X.shape[1] + if cv == "prefit": + msg = ( + "Base estimator LinearSVC was prefit on 5 features but " + "CalibratedClassifierCV is fit with 10 features." + ) + with pytest.raises(ValueError, match=re.escape(msg)): + calib_clf.fit(np.concatenate([X, X], axis=1), y) + def test_calibration_inconsistent_prefit_n_features_in(): # Check that `n_features_in_` from prefit base estimator From 446aff17f1af3aeab62d7fec02e91e224a6097e6 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 17 Mar 2021 18:14:56 +0100 Subject: [PATCH 04/15] Useless import --- sklearn/tests/test_calibration.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 281ff240367aa..4ebe8e4f7eae0 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -15,7 +15,6 @@ assert_almost_equal, assert_array_equal, ignore_warnings) -from sklearn.utils.validation import _num_features from sklearn.utils.extmath import softmax from sklearn.exceptions import NotFittedError from sklearn.datasets import make_classification, make_blobs From d4f51187871591ad29f71386eb14556011699bae Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 17 Mar 2021 18:19:47 +0100 Subject: [PATCH 05/15] Update sklearn/tests/test_calibration.py --- sklearn/tests/test_calibration.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 4ebe8e4f7eae0..e25ea3ded4a60 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -571,7 +571,6 @@ def test_calibration_attributes(clf, cv): n_classes=2, random_state=7) if cv == 'prefit': clf = clf.fit(X, y) - calib_clf = CalibratedClassifierCV(clf, cv=cv) calib_clf.fit(X, y) From 8ccd12c180a135e3ec26c0bd32e31e0083d53f9b Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 17 Mar 2021 18:24:23 +0100 Subject: [PATCH 06/15] Update sklearn/tests/test_calibration.py --- sklearn/tests/test_calibration.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index e25ea3ded4a60..265cb0a6ca7d1 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -582,14 +582,6 @@ def test_calibration_attributes(clf, cv): assert_array_equal(calib_clf.classes_, classes) assert calib_clf.n_features_in_ == X.shape[1] - if cv == "prefit": - msg = ( - "Base estimator LinearSVC was prefit on 5 features but " - "CalibratedClassifierCV is fit with 10 features." - ) - with pytest.raises(ValueError, match=re.escape(msg)): - calib_clf.fit(np.concatenate([X, X], axis=1), y) - def test_calibration_inconsistent_prefit_n_features_in(): # Check that `n_features_in_` from prefit base estimator From 057a1f9b447d47840c1cc6ccff5580e4e3ae8dde Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 17 Mar 2021 19:27:41 +0100 Subject: [PATCH 07/15] Update check_complex_data to generate data that is valid for stratified 3-fold --- sklearn/utils/estimator_checks.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 7561c64abe6a8..71f5b3b42de42 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -959,10 +959,13 @@ def check_dtype_object(name, estimator_orig): def check_complex_data(name, estimator_orig): + rng = np.random.RandomState(42) # check that estimators raise an exception on providing complex data - X = np.random.sample(10) + 1j * np.random.sample(10) + X = rng.uniform(size=10) + 1j * rng.uniform(size=10) X = X.reshape(-1, 1) - y = np.random.sample(10) + 1j * np.random.sample(10) + + # Something both valid for classification and regression + y = rng.randint(low=0, high=2, size=10) + 1j estimator = clone(estimator_orig) with raises(ValueError, match="Complex data not supported"): estimator.fit(X, y) From af0f3a90ba7bc04a3701234ad20038d4528812b6 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 29 Mar 2021 16:08:47 +0200 Subject: [PATCH 08/15] Delegate n_features_in_ check even when prefit --- sklearn/calibration.py | 23 ++++------------------- sklearn/tests/test_calibration.py | 5 +---- 2 files changed, 5 insertions(+), 23 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 8ae9397466543..9cf65b730682c 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -258,26 +258,11 @@ def fit(self, X, y, sample_weight=None): self.calibrated_classifiers_ = [] if self.cv == "prefit": - # `classes_` and `n_features_in_` should be consistent with that - # of base_estimator + # `classes_` should be consistent with that of base_estimator if isinstance(self.base_estimator, Pipeline): check_is_fitted(self.base_estimator[-1]) else: check_is_fitted(self.base_estimator) - with suppress(AttributeError): - # Only perform the check and set self.n_features_in_ if the - # base estimator has the attribute defined. - expected_n_features_in = base_estimator.n_features_in_ - actual_n_features_in = _num_features(X) - if expected_n_features_in != actual_n_features_in: - raise ValueError( - f"Base estimator {base_estimator.__class__.__name__} " - f"was prefit on {expected_n_features_in} " - f"features but CalibratedClassifierCV is fit " - f"with {actual_n_features_in} features." - ) - else: - self.n_features_in_ = actual_n_features_in self.classes_ = self.base_estimator.classes_ pred_method = _get_prediction_method(base_estimator) @@ -350,9 +335,9 @@ def fit(self, X, y, sample_weight=None): ) self.calibrated_classifiers_.append(calibrated_classifier) - first_clf = next(iter(self.calibrated_classifiers_)).base_estimator - if hasattr(first_clf, "n_features_in_"): - self.n_features_in_ = first_clf.n_features_in_ + first_clf = next(iter(self.calibrated_classifiers_)).base_estimator + if hasattr(first_clf, "n_features_in_"): + self.n_features_in_ = first_clf.n_features_in_ return self def predict_proba(self, X): diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 265cb0a6ca7d1..9bab9a678a19e 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -591,10 +591,7 @@ def test_calibration_inconsistent_prefit_n_features_in(): clf = LinearSVC(C=1).fit(X, y) calib_clf = CalibratedClassifierCV(clf, cv='prefit') - msg = re.escape( - "Base estimator LinearSVC was prefit on 5 features " - "but CalibratedClassifierCV is fit with 3 features." - ) + msg = "X has 3 features, but LinearSVC is expecting 5 features as input." with pytest.raises(ValueError, match=msg): calib_clf.fit(X[:, :3], y) From 1425db0c2afb9dec3ebd4db254353de9c442cc18 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 29 Mar 2021 16:10:38 +0200 Subject: [PATCH 09/15] More intuitive check --- sklearn/tests/test_calibration.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 9bab9a678a19e..0b16fb6a16c90 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -551,10 +551,8 @@ def test_calibration_dict_pipeline(dict_data, dict_data_pipeline): # Neither the pipeline nor the calibration meta-estimator # expose the n_features_in_ check on this kind of data. - with pytest.raises(AttributeError): - clf.n_features_in_ - with pytest.raises(AttributeError): - calib_clf.n_features_in_ + assert not hasattr(clf, 'n_features_in_') + assert not hasattr(calib_clf, 'n_features_in_') # Ensure that no error is thrown with predict and predict_proba calib_clf.predict(X) From aab7e7f32269bf04ef3a4446de388c0d406726f7 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 29 Mar 2021 16:12:00 +0200 Subject: [PATCH 10/15] More intuitive first classifier lookup --- sklearn/calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 9cf65b730682c..18c615281e144 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -335,7 +335,7 @@ def fit(self, X, y, sample_weight=None): ) self.calibrated_classifiers_.append(calibrated_classifier) - first_clf = next(iter(self.calibrated_classifiers_)).base_estimator + first_clf = self.calibrated_classifiers_[0].base_estimator if hasattr(first_clf, "n_features_in_"): self.n_features_in_ = first_clf.n_features_in_ return self From cb1548e01a90ec70f745d2c98f33a86b290efae7 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 29 Mar 2021 16:14:08 +0200 Subject: [PATCH 11/15] Remove unused imports --- sklearn/calibration.py | 2 -- sklearn/tests/test_calibration.py | 1 - 2 files changed, 3 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 18c615281e144..4e4a9116eff90 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -9,7 +9,6 @@ import warnings from inspect import signature -from contextlib import suppress from functools import partial from math import log @@ -33,7 +32,6 @@ from .utils.fixes import delayed from .utils.validation import check_is_fitted, check_consistent_length from .utils.validation import _check_sample_weight, _num_samples -from .utils.validation import _num_features from .utils import _safe_indexing from .pipeline import Pipeline from .isotonic import IsotonicRegression diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 0b16fb6a16c90..53d620b41031c 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -2,7 +2,6 @@ # License: BSD 3 clause import pytest -import re import numpy as np from numpy.testing import assert_allclose from scipy import sparse From c52d2b7724cd6e943427ecb03ffe89f3f743b07e Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 29 Mar 2021 16:33:49 +0200 Subject: [PATCH 12/15] Simplify classes_ check on prefit pipeline --- sklearn/calibration.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 4e4a9116eff90..79d7f4b0961d0 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -257,10 +257,7 @@ def fit(self, X, y, sample_weight=None): self.calibrated_classifiers_ = [] if self.cv == "prefit": # `classes_` should be consistent with that of base_estimator - if isinstance(self.base_estimator, Pipeline): - check_is_fitted(self.base_estimator[-1]) - else: - check_is_fitted(self.base_estimator) + check_is_fitted(self.base_estimator, attributes=["classes_"]) self.classes_ = self.base_estimator.classes_ pred_method = _get_prediction_method(base_estimator) From d85441ae9b3dec2ce9cbd2870efaae006e3bda7f Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 29 Mar 2021 16:37:34 +0200 Subject: [PATCH 13/15] One more useless import --- sklearn/calibration.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 79d7f4b0961d0..da0c34d97dc01 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -33,7 +33,6 @@ from .utils.validation import check_is_fitted, check_consistent_length from .utils.validation import _check_sample_weight, _num_samples from .utils import _safe_indexing -from .pipeline import Pipeline from .isotonic import IsotonicRegression from .svm import LinearSVC from .model_selection import check_cv, cross_val_predict From e166e0e1c1cc51c6f31c5f10b31b49fd5fa55a64 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 29 Mar 2021 18:28:03 +0200 Subject: [PATCH 14/15] style in indexing sample_weight --- sklearn/calibration.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index da0c34d97dc01..e32788a16b399 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -430,9 +430,15 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw, """ X_train, y_train = _safe_indexing(X, train), _safe_indexing(y, train) X_test, y_test = _safe_indexing(X, test), _safe_indexing(y, test) - if sample_weight is not None and supports_sw: - estimator.fit(X_train, y_train, - sample_weight=_safe_indexing(sample_weight, train)) + if sample_weight is not None: + sw_train = _safe_indexing(sample_weight, train) + sw_test = _safe_indexing(sample_weight, test) + else: + sw_train = None + sw_test = None + + if supports_sw: + estimator.fit(X_train, y_train, sample_weight=sw_train) else: estimator.fit(X_train, y_train) @@ -440,9 +446,8 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw, pred_method = _get_prediction_method(estimator) predictions = _compute_predictions(pred_method, X_test, n_classes) - sw = None if sample_weight is None else _safe_indexing(sample_weight, test) calibrated_classifier = _fit_calibrator( - estimator, predictions, y_test, classes, method, sample_weight=sw + estimator, predictions, y_test, classes, method, sample_weight=sw_test ) return calibrated_classifier From 1ff64be6efc22d48abebf0c1020655a5a0e50a33 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 30 Mar 2021 17:46:23 +0200 Subject: [PATCH 15/15] Update sklearn/calibration.py Co-authored-by: Thomas J. Fan --- sklearn/calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index e32788a16b399..c6289d1df2936 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -430,7 +430,7 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw, """ X_train, y_train = _safe_indexing(X, train), _safe_indexing(y, train) X_test, y_test = _safe_indexing(X, test), _safe_indexing(y, test) - if sample_weight is not None: + if supports_sw and sample_weight is not None: sw_train = _safe_indexing(sample_weight, train) sw_test = _safe_indexing(sample_weight, test) else: