scikit-learn · jeremiedbb · Dec 19, 2023 · Dec 18, 2023 · Dec 18, 2023 · Dec 18, 2023
diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
@@ -547,6 +547,11 @@ Changelog
   type promotion rules of NumPy 2.
   :pr:`27899` by :user:`Olivier Grisel <ogrisel>`.
 
+- |API| The attribute `loss_function_` of :class:`linear_model.SGDClassifier` and
+  :class:`linear_model.SGDOneClassSVM` has been deprecated and will be removed in
+  version 1.6.
+  :pr:`27979` by :user:`Christian Lorentzen <lorentzenchr>`.
+
 :mod:`sklearn.metrics`
 ......................
 

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
@@ -22,7 +22,7 @@
 )
 from ..exceptions import ConvergenceWarning
 from ..model_selection import ShuffleSplit, StratifiedShuffleSplit
-from ..utils import check_random_state, compute_class_weight
+from ..utils import check_random_state, compute_class_weight, deprecated
 from ..utils._param_validation import Hidden, Interval, StrOptions
 from ..utils.extmath import safe_sparse_dot
 from ..utils.metaestimators import available_if
@@ -323,6 +323,16 @@ def _make_validation_score_cb(
             classes=classes,
         )
 
+    # TODO(1.6): Remove
+    # mypy error: Decorated property not supported
+    @deprecated(  # type: ignore
+        "Attribute `loss_function_` was deprecated in version 1.4 and will be removed "
+        "in 1.6."
+    )
+    @property
+    def loss_function_(self):
+        return self._loss_function_
+
 
 def _prepare_fit_binary(est, y, i, input_dtye):
     """Initialization for fit_binary.
@@ -455,7 +465,7 @@ def fit_binary(
         intercept,
         average_coef,
         average_intercept,
-        est.loss_function_,
+        est._loss_function_,
         penalty_type,
         alpha,
         C,
@@ -619,7 +629,7 @@ def _partial_fit(
                 % (n_features, self.coef_.shape[-1])
             )
 
-        self.loss_function_ = self._get_loss_function(loss)
+        self._loss_function_ = self._get_loss_function(loss)
         if not hasattr(self, "t_"):
             self.t_ = 1.0
 
@@ -1132,6 +1142,10 @@ class SGDClassifier(BaseSGDClassifier):
 
     loss_function_ : concrete ``LossFunction``
 
+        .. deprecated:: 1.4
+            Attribute `loss_function_` was deprecated in version 1.4 and will be
+            removed in 1.6.
+
     classes_ : array of shape (n_classes,)
 
     t_ : int
@@ -2122,6 +2136,10 @@ class SGDOneClassSVM(BaseSGD, OutlierMixin):
 
     loss_function_ : concrete ``LossFunction``
 
+        .. deprecated:: 1.4
+            ``loss_function_`` was deprecated in version 1.4 and will be removed in
+            1.6.
+
     n_features_in_ : int
         Number of features seen during :term:`fit`.
 
@@ -2260,7 +2278,7 @@ def _fit_one_class(self, X, alpha, C, sample_weight, learning_rate, max_iter):
             intercept[0],
             average_coef,
             average_intercept[0],
-            self.loss_function_,
+            self._loss_function_,
             penalty_type,
             alpha,
             C,
@@ -2354,7 +2372,7 @@ def _partial_fit(
             self._average_coef = np.zeros(n_features, dtype=X.dtype, order="C")
             self._average_intercept = np.zeros(1, dtype=X.dtype, order="C")
 
-        self.loss_function_ = self._get_loss_function(loss)
+        self._loss_function_ = self._get_loss_function(loss)
         if not hasattr(self, "t_"):
             self.t_ = 1.0
 

diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
@@ -756,10 +756,13 @@ def test_sgd_proba(klass):
         p = clf.predict_proba([[-1, -1]])
         assert p[0, 1] < 0.5
 
-        p = clf.predict_log_proba([[3, 2]])
-        assert p[0, 1] > p[0, 0]
-        p = clf.predict_log_proba([[-1, -1]])
-        assert p[0, 1] < p[0, 0]
+        # If predict_proba is 0, we get "RuntimeWarning: divide by zero encountered
+        # in log". We avoid it here.
+        with np.errstate(divide="ignore"):
+            p = clf.predict_log_proba([[3, 2]])
+            assert p[0, 1] > p[0, 0]
+            p = clf.predict_log_proba([[-1, -1]])
+            assert p[0, 1] < p[0, 0]
 
     # log loss multiclass probability estimates
     clf = klass(loss="log_loss", alpha=0.01, max_iter=10).fit(X2, Y2)
@@ -2196,3 +2199,16 @@ def test_sgd_numerical_consistency(SGDEstimator):
     sgd_32.fit(X_32, Y_32)
 
     assert_allclose(sgd_64.coef_, sgd_32.coef_)
+
+
+# TODO(1.6): remove
+@pytest.mark.parametrize("Estimator", [SGDClassifier, SGDOneClassSVM])
+def test_loss_attribute_deprecation(Estimator):
+    # Check that we raise the proper deprecation warning if accessing
+    # `loss_function_`.
+    X = np.array([[1, 2], [3, 4]])
+    y = np.array([1, 0])
+    est = Estimator().fit(X, y)
+
+    with pytest.warns(FutureWarning, match="`loss_function_` was deprecated"):
+        est.loss_function_