Skip to content

DEP loss_ attribute in gradient boosting #23079

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats_new/v1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,11 @@ Changelog
the output feature names.
:pr:`21762` by :user:`Zhehao Liu <MaxwellLZH>` and `Thomas Fan`_.

- |API| The attribute `loss_` of :class:`ensemble.GradientBoostingClassifier` and
:class:`ensemble.GradientBoostingRegressor` has been deprecated and will be removed
in version 1.3.
:pr:`23079` by :user:`Christian Lorentzen <lorentzenchr>`.

:mod:`sklearn.feature_extraction`
.................................

Expand Down
51 changes: 34 additions & 17 deletions sklearn/ensemble/_gb.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def _fit_stage(
"""Fit another stage of ``_n_classes`` trees to the boosting model."""

assert sample_mask.dtype == bool
loss = self.loss_
loss = self._loss
original_y = y

# Need to pass a copy of raw_predictions to negative_gradient()
Expand Down Expand Up @@ -328,11 +328,11 @@ def _check_params(self):
loss_class = _gb_losses.LOSS_FUNCTIONS[self.loss]

if is_classifier(self):
self.loss_ = loss_class(self.n_classes_)
self._loss = loss_class(self.n_classes_)
elif self.loss in ("huber", "quantile"):
self.loss_ = loss_class(self.alpha)
self._loss = loss_class(self.alpha)
else:
self.loss_ = loss_class()
self._loss = loss_class()

check_scalar(
self.subsample,
Expand All @@ -346,7 +346,7 @@ def _check_params(self):
if self.init is not None:
# init must be an estimator or 'zero'
if isinstance(self.init, BaseEstimator):
self.loss_.check_init_estimator(self.init)
self._loss.check_init_estimator(self.init)
elif not (isinstance(self.init, str) and self.init == "zero"):
raise ValueError(
"The init parameter must be an estimator or 'zero'. "
Expand Down Expand Up @@ -439,9 +439,9 @@ def _init_state(self):

self.init_ = self.init
if self.init_ is None:
self.init_ = self.loss_.init_estimator()
self.init_ = self._loss.init_estimator()

self.estimators_ = np.empty((self.n_estimators, self.loss_.K), dtype=object)
self.estimators_ = np.empty((self.n_estimators, self._loss.K), dtype=object)
self.train_score_ = np.zeros((self.n_estimators,), dtype=np.float64)
# do oob?
if self.subsample < 1.0:
Expand Down Expand Up @@ -471,7 +471,7 @@ def _resize_state(self):
)

self.estimators_ = np.resize(
self.estimators_, (total_n_estimators, self.loss_.K)
self.estimators_, (total_n_estimators, self._loss.K)
)
self.train_score_ = np.resize(self.train_score_, total_n_estimators)
if self.subsample < 1 or hasattr(self, "oob_improvement_"):
Expand Down Expand Up @@ -607,7 +607,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
# fit initial model and initialize raw predictions
if self.init_ == "zero":
raw_predictions = np.zeros(
shape=(X.shape[0], self.loss_.K), dtype=np.float64
shape=(X.shape[0], self._loss.K), dtype=np.float64
)
else:
# XXX clean this once we have a support_sample_weight tag
Expand All @@ -634,7 +634,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
else: # regular estimator whose input checking failed
raise

raw_predictions = self.loss_.get_init_raw_predictions(X, self.init_)
raw_predictions = self._loss.get_init_raw_predictions(X, self.init_)

begin_at_stage = 0

Expand Down Expand Up @@ -712,7 +712,7 @@ def _fit_stages(
do_oob = self.subsample < 1.0
sample_mask = np.ones((n_samples,), dtype=bool)
n_inbag = max(1, int(self.subsample * n_samples))
loss_ = self.loss_
loss_ = self._loss

if self.verbose:
verbose_reporter = VerboseReporter(verbose=self.verbose)
Expand Down Expand Up @@ -804,10 +804,10 @@ def _raw_predict_init(self, X):
X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)
if self.init_ == "zero":
raw_predictions = np.zeros(
shape=(X.shape[0], self.loss_.K), dtype=np.float64
shape=(X.shape[0], self._loss.K), dtype=np.float64
)
else:
raw_predictions = self.loss_.get_init_raw_predictions(X, self.init_).astype(
raw_predictions = self._loss.get_init_raw_predictions(X, self.init_).astype(
np.float64
)
return raw_predictions
Expand Down Expand Up @@ -978,6 +978,15 @@ def apply(self, X):
def n_features_(self):
return self.n_features_in_

# TODO(1.3): Remove
# mypy error: Decorated property not supported
@deprecated( # type: ignore
"Attribute `loss_` was deprecated in version 1.1 and will be removed in 1.3."
)
@property
def loss_(self):
return self._loss


class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
"""Gradient Boosting for classification.
Expand Down Expand Up @@ -1214,6 +1223,10 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
loss_ : LossFunction
The concrete ``LossFunction`` object.

.. deprecated:: 1.1
Attribute `loss_` was deprecated in version 1.1 and will be
removed in 1.3.

init_ : estimator
The estimator that provides the initial predictions.
Set via the ``init`` argument or ``loss.init_estimator``.
Expand Down Expand Up @@ -1434,7 +1447,7 @@ def predict(self, X):
The predicted values.
"""
raw_predictions = self.decision_function(X)
encoded_labels = self.loss_._raw_prediction_to_decision(raw_predictions)
encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)
return self.classes_.take(encoded_labels, axis=0)

def staged_predict(self, X):
Expand All @@ -1456,7 +1469,7 @@ def staged_predict(self, X):
The predicted value of the input samples.
"""
for raw_predictions in self._staged_raw_predict(X):
encoded_labels = self.loss_._raw_prediction_to_decision(raw_predictions)
encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)
yield self.classes_.take(encoded_labels, axis=0)

def predict_proba(self, X):
Expand All @@ -1482,7 +1495,7 @@ def predict_proba(self, X):
"""
raw_predictions = self.decision_function(X)
try:
return self.loss_._raw_prediction_to_proba(raw_predictions)
return self._loss._raw_prediction_to_proba(raw_predictions)
except NotFittedError:
raise
except AttributeError as e:
Expand Down Expand Up @@ -1534,7 +1547,7 @@ def staged_predict_proba(self, X):
"""
try:
for raw_predictions in self._staged_raw_predict(X):
yield self.loss_._raw_prediction_to_proba(raw_predictions)
yield self._loss._raw_prediction_to_proba(raw_predictions)
except NotFittedError:
raise
except AttributeError as e:
Expand Down Expand Up @@ -1781,6 +1794,10 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
loss_ : LossFunction
The concrete ``LossFunction`` object.

.. deprecated:: 1.1
Attribute `loss_` was deprecated in version 1.1 and will be
removed in 1.3.

init_ : estimator
The estimator that provides the initial predictions.
Set via the ``init`` argument or ``loss.init_estimator``.
Expand Down
17 changes: 16 additions & 1 deletion sklearn/ensemble/tests/test_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def test_max_feature_regression():
random_state=1,
)
gbrt.fit(X_train, y_train)
log_loss = gbrt.loss_(y_test, gbrt.decision_function(X_test))
log_loss = gbrt._loss(y_test, gbrt.decision_function(X_test))
assert log_loss < 0.5, "GB failed with deviance %.4f" % log_loss


Expand Down Expand Up @@ -1535,3 +1535,18 @@ def test_loss_deprecated(old_loss, new_loss, Estimator):
est2 = Estimator(loss=new_loss, random_state=0)
est2.fit(X, y)
assert_allclose(est1.predict(X), est2.predict(X))


# TODO(1.3): remove
@pytest.mark.parametrize(
"Estimator", [GradientBoostingClassifier, GradientBoostingRegressor]
)
def test_loss_attribute_deprecation(Estimator):
# Check that we raise the proper deprecation warning if accessing
# `loss_`.
X = np.array([[1, 2], [3, 4]])
y = np.array([1, 0])
est = Estimator().fit(X, y)

with pytest.warns(FutureWarning, match="`loss_` was deprecated"):
est.loss_