ENH Adding variable force_alpha to classes in naive_bayes.py (#22269)

Micky774 · jjerphan · thomasjpfan · web-flow · commit 65b300effa25 · 2022-07-22T11:57:48.000+02:00
Co-authored-by: Julien Jerphanion &lt;git@jjerphan.xyz&gt;
Co-authored-by: Thomas J. Fan &lt;thomasjpfan@gmail.com&gt;
Co-authored-by: arka204 &lt;kmichalik204@gmail.com&gt;
Co-authored-by: Shao Yang Hong &lt;shaoyang.hong@ninjavan.co&gt;
Co-authored-by: Shao Yang Hong &lt;hongsy2006@gmail.com&gt;
diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
@@ -282,6 +282,18 @@ Changelog
   :pr:`10805` by :user:`Mathias Andersen <MrMathias>` and
   :pr:`23471` by :user:`Meekail Zain <micky774>`
 
+:mod:`sklearn.naive_bayes`
+..........................
+
+- |Enhancement| A new parameter `force_alpha` was added to
+  :class:`naive_bayes.BernoulliNB`, :class:`naive_bayes.ComplementNB`,
+  :class:`naive_bayes.CategoricalNB`, and :class:`naive_bayes.MultinomialNB`,
+  allowing user to set parameter alpha to a very small number, greater or equal
+  0, which was earlier automatically changed to `1e-10` instead.
+  :pr:`16747` by :user:`arka204`,
+  :pr:`18805` by :user:`hongshaoyang`,
+  :pr:`22269` by :user:`Meekail Zain <micky774>`.
+
 Code and Documentation Contributors
 -----------------------------------
 
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
@@ -30,7 +30,7 @@
 from .utils.multiclass import _check_partial_fit_first_call
 from .utils.validation import check_is_fitted, check_non_negative
 from .utils.validation import _check_sample_weight
-from .utils._param_validation import Interval
+from .utils._param_validation import Interval, Hidden, StrOptions
 
 __all__ = [
     "BernoulliNB",
@@ -549,12 +549,14 @@ class _BaseDiscreteNB(_BaseNB):
         "alpha": [Interval(Real, 0, None, closed="left"), "array-like"],
         "fit_prior": ["boolean"],
         "class_prior": ["array-like", None],
+        "force_alpha": ["boolean", Hidden(StrOptions({"warn"}))],
     }
 
-    def __init__(self, alpha=1.0, fit_prior=True, class_prior=None):
+    def __init__(self, alpha=1.0, fit_prior=True, class_prior=None, force_alpha="warn"):
         self.alpha = alpha
         self.fit_prior = fit_prior
         self.class_prior = class_prior
+        self.force_alpha = force_alpha
 
     @abstractmethod
     def _count(self, X, Y):
@@ -622,22 +624,34 @@ def _check_alpha(self):
         alpha = (
             np.asarray(self.alpha) if not isinstance(self.alpha, Real) else self.alpha
         )
+        alpha_min = np.min(alpha)
         if isinstance(alpha, np.ndarray):
             if not alpha.shape[0] == self.n_features_in_:
                 raise ValueError(
                     "When alpha is an array, it should contains `n_features`. "
                     f"Got {alpha.shape[0]} elements instead of {self.n_features_in_}."
                 )
             # check that all alpha are positive
-            if np.min(alpha) < 0:
+            if alpha_min < 0:
                 raise ValueError("All values in alpha must be greater than 0.")
-        alpha_min = 1e-10
-        if np.min(alpha) < alpha_min:
+        alpha_lower_bound = 1e-10
+        # TODO(1.4): Replace w/ deprecation of self.force_alpha
+        # See gh #22269
+        _force_alpha = self.force_alpha
+        if _force_alpha == "warn" and alpha_min < alpha_lower_bound:
+            _force_alpha = False
+            warnings.warn(
+                "The default value for `force_alpha` will change to `True` in 1.4. To"
+                " suppress this warning, manually set the value of `force_alpha`.",
+                FutureWarning,
+            )
+        if alpha_min < alpha_lower_bound and not _force_alpha:
             warnings.warn(
                 "alpha too small will result in numeric errors, setting alpha ="
-                f" {alpha_min:.1e}"
+                f" {alpha_lower_bound:.1e}. Use `force_alpha=True` to keep alpha"
+                " unchanged."
             )
-            return np.maximum(alpha, alpha_min)
+            return np.maximum(alpha, alpha_lower_bound)
         return alpha
 
     def partial_fit(self, X, y, classes=None, sample_weight=None):
@@ -812,7 +826,16 @@ class MultinomialNB(_BaseDiscreteNB):
     ----------
     alpha : float or array-like of shape (n_features,), default=1.0
         Additive (Laplace/Lidstone) smoothing parameter
-        (0 for no smoothing).
+        (set alpha=0 and force_alpha=True, for no smoothing).
+
+    force_alpha : bool, default=False
+        If False and alpha is less than 1e-10, it will set alpha to
+        1e-10. If True, alpha will remain unchanged. This may cause
+        numerical errors if alpha is too close to 0.
+
+        .. versionadded:: 1.2
+        .. deprecated:: 1.2
+           The default value of `force_alpha` will change to `True` in v1.4.
 
     fit_prior : bool, default=True
         Whether to learn class prior probabilities or not.
@@ -881,15 +904,22 @@ class MultinomialNB(_BaseDiscreteNB):
     >>> X = rng.randint(5, size=(6, 100))
     >>> y = np.array([1, 2, 3, 4, 5, 6])
     >>> from sklearn.naive_bayes import MultinomialNB
-    >>> clf = MultinomialNB()
+    >>> clf = MultinomialNB(force_alpha=True)
     >>> clf.fit(X, y)
-    MultinomialNB()
+    MultinomialNB(force_alpha=True)
     >>> print(clf.predict(X[2:3]))
     [3]
     """
 
-    def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None):
-        super().__init__(alpha=alpha, fit_prior=fit_prior, class_prior=class_prior)
+    def __init__(
+        self, *, alpha=1.0, force_alpha="warn", fit_prior=True, class_prior=None
+    ):
+        super().__init__(
+            alpha=alpha,
+            fit_prior=fit_prior,
+            class_prior=class_prior,
+            force_alpha=force_alpha,
+        )
 
     def _more_tags(self):
         return {"requires_positive_X": True}
@@ -928,7 +958,17 @@ class ComplementNB(_BaseDiscreteNB):
     Parameters
     ----------
     alpha : float or array-like of shape (n_features,), default=1.0
-        Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
+        Additive (Laplace/Lidstone) smoothing parameter
+        (set alpha=0 and force_alpha=True, for no smoothing).
+
+    force_alpha : bool, default=False
+        If False and alpha is less than 1e-10, it will set alpha to
+        1e-10. If True, alpha will remain unchanged. This may cause
+        numerical errors if alpha is too close to 0.
+
+        .. versionadded:: 1.2
+        .. deprecated:: 1.2
+           The default value of `force_alpha` will change to `True` in v1.4.
 
     fit_prior : bool, default=True
         Only used in edge case with a single class in the training set.
@@ -1005,9 +1045,9 @@ class ComplementNB(_BaseDiscreteNB):
     >>> X = rng.randint(5, size=(6, 100))
     >>> y = np.array([1, 2, 3, 4, 5, 6])
     >>> from sklearn.naive_bayes import ComplementNB
-    >>> clf = ComplementNB()
+    >>> clf = ComplementNB(force_alpha=True)
     >>> clf.fit(X, y)
-    ComplementNB()
+    ComplementNB(force_alpha=True)
     >>> print(clf.predict(X[2:3]))
     [3]
     """
@@ -1017,8 +1057,21 @@ class ComplementNB(_BaseDiscreteNB):
         "norm": ["boolean"],
     }
 
-    def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None, norm=False):
-        super().__init__(alpha=alpha, fit_prior=fit_prior, class_prior=class_prior)
+    def __init__(
+        self,
+        *,
+        alpha=1.0,
+        force_alpha="warn",
+        fit_prior=True,
+        class_prior=None,
+        norm=False,
+    ):
+        super().__init__(
+            alpha=alpha,
+            force_alpha=force_alpha,
+            fit_prior=fit_prior,
+            class_prior=class_prior,
+        )
         self.norm = norm
 
     def _more_tags(self):
@@ -1064,7 +1117,16 @@ class BernoulliNB(_BaseDiscreteNB):
     ----------
     alpha : float or array-like of shape (n_features,), default=1.0
         Additive (Laplace/Lidstone) smoothing parameter
-        (0 for no smoothing).
+        (set alpha=0 and force_alpha=True, for no smoothing).
+
+    force_alpha : bool, default=False
+        If False and alpha is less than 1e-10, it will set alpha to
+        1e-10. If True, alpha will remain unchanged. This may cause
+        numerical errors if alpha is too close to 0.
+
+        .. versionadded:: 1.2
+        .. deprecated:: 1.2
+           The default value of `force_alpha` will change to `True` in v1.4.
 
     binarize : float or None, default=0.0
         Threshold for binarizing (mapping to booleans) of sample features.
@@ -1144,9 +1206,9 @@ class BernoulliNB(_BaseDiscreteNB):
     >>> X = rng.randint(5, size=(6, 100))
     >>> Y = np.array([1, 2, 3, 4, 4, 5])
     >>> from sklearn.naive_bayes import BernoulliNB
-    >>> clf = BernoulliNB()
+    >>> clf = BernoulliNB(force_alpha=True)
     >>> clf.fit(X, Y)
-    BernoulliNB()
+    BernoulliNB(force_alpha=True)
     >>> print(clf.predict(X[2:3]))
     [3]
     """
@@ -1156,8 +1218,21 @@ class BernoulliNB(_BaseDiscreteNB):
         "binarize": [None, Interval(Real, 0, None, closed="left")],
     }
 
-    def __init__(self, *, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None):
-        super().__init__(alpha=alpha, fit_prior=fit_prior, class_prior=class_prior)
+    def __init__(
+        self,
+        *,
+        alpha=1.0,
+        force_alpha="warn",
+        binarize=0.0,
+        fit_prior=True,
+        class_prior=None,
+    ):
+        super().__init__(
+            alpha=alpha,
+            fit_prior=fit_prior,
+            class_prior=class_prior,
+            force_alpha=force_alpha,
+        )
         self.binarize = binarize
 
     def _check_X(self, X):
@@ -1219,7 +1294,16 @@ class CategoricalNB(_BaseDiscreteNB):
     ----------
     alpha : float, default=1.0
         Additive (Laplace/Lidstone) smoothing parameter
-        (0 for no smoothing).
+        (set alpha=0 and force_alpha=True, for no smoothing).
+
+    force_alpha : bool, default=False
+        If False and alpha is less than 1e-10, it will set alpha to
+        1e-10. If True, alpha will remain unchanged. This may cause
+        numerical errors if alpha is too close to 0.
+
+        .. versionadded:: 1.2
+        .. deprecated:: 1.2
+           The default value of `force_alpha` will change to `True` in v1.4.
 
     fit_prior : bool, default=True
         Whether to learn class prior probabilities or not.
@@ -1301,9 +1385,9 @@ class CategoricalNB(_BaseDiscreteNB):
     >>> X = rng.randint(5, size=(6, 100))
     >>> y = np.array([1, 2, 3, 4, 5, 6])
     >>> from sklearn.naive_bayes import CategoricalNB
-    >>> clf = CategoricalNB()
+    >>> clf = CategoricalNB(force_alpha=True)
     >>> clf.fit(X, y)
-    CategoricalNB()
+    CategoricalNB(force_alpha=True)
     >>> print(clf.predict(X[2:3]))
     [3]
     """
@@ -1319,9 +1403,20 @@ class CategoricalNB(_BaseDiscreteNB):
     }
 
     def __init__(
-        self, *, alpha=1.0, fit_prior=True, class_prior=None, min_categories=None
+        self,
+        *,
+        alpha=1.0,
+        force_alpha="warn",
+        fit_prior=True,
+        class_prior=None,
+        min_categories=None,
     ):
-        super().__init__(alpha=alpha, fit_prior=fit_prior, class_prior=class_prior)
+        super().__init__(
+            alpha=alpha,
+            force_alpha=force_alpha,
+            fit_prior=fit_prior,
+            class_prior=class_prior,
+        )
         self.min_categories = min_categories
 
     def fit(self, X, y, sample_weight=None):
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
@@ -71,7 +71,7 @@ def test_calibration(data, method, ensemble):
     X_test, y_test = X[n_samples:], y[n_samples:]
 
     # Naive-Bayes
-    clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train)
+    clf = MultinomialNB(force_alpha=True).fit(X_train, y_train, sample_weight=sw_train)
     prob_pos_clf = clf.predict_proba(X_test)[:, 1]
 
     cal_clf = CalibratedClassifierCV(clf, cv=y.size + 1, ensemble=ensemble)
@@ -322,7 +322,7 @@ def test_calibration_prefit():
     X_test, y_test = X[2 * n_samples :], y[2 * n_samples :]
 
     # Naive-Bayes
-    clf = MultinomialNB()
+    clf = MultinomialNB(force_alpha=True)
     # Check error if clf not prefit
     unfit_clf = CalibratedClassifierCV(clf, cv="prefit")
     with pytest.raises(NotFittedError):
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
@@ -268,6 +268,14 @@ def test_fit_docstring_attributes(name, Estimator):
         est.set_params(n_init="auto")
 
     # TODO(1.4): TO BE REMOVED for 1.4 (avoid FutureWarning)
+    if Estimator.__name__ in (
+        "MultinomialNB",
+        "ComplementNB",
+        "BernoulliNB",
+        "CategoricalNB",
+    ):
+        est.set_params(force_alpha=True)
+
     if Estimator.__name__ == "QuantileRegressor":
         solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"
         est.set_params(solver=solver)
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
@@ -41,6 +41,9 @@
 from sklearn import datasets
 from sklearn.datasets import load_breast_cancer
 
+msg = "The default value for `force_alpha` will change"
+pytestmark = pytest.mark.filterwarnings(f"ignore:{msg}:FutureWarning")
+
 iris = datasets.load_iris()
 rng = np.random.RandomState(0)
 perm = rng.permutation(iris.target.size)
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py