scikit-learn · jmschrei · Jun 19, 2017 · Sep 23, 2016 · Sep 23, 2016 · Sep 23, 2016
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -253,6 +253,10 @@ Bug fixes
    - Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until
      ``max_iter`` if finds a large inlier group early. :issue:`8251` by :user:`aivision2020`.
 
+   - Fixed a bug where :class:`sklearn.naive_bayes.MultinomialNB` and :class:`sklearn.naive_bayes.BernoulliNB`
+     failed when `alpha=0`. :issue:`5814` by :user:`Yichuan Liu <yl565>` and 
+     :user:`Herilalaina Rakotoarison <herilalaina>`.
+
    - Fixed a bug where :func:`datasets.make_moons` gives an
      incorrect result when ``n_samples`` is odd.
      :issue:`8198` by :user:`Josh Levy <levy5674>`.

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
@@ -15,6 +15,7 @@
 #         (parts based on earlier work by Mathieu Blondel)
 #
 # License: BSD 3 clause
+import warnings
 
 from abc import ABCMeta, abstractmethod
 
@@ -436,6 +437,8 @@ def _joint_log_likelihood(self, X):
         joint_log_likelihood = np.array(joint_log_likelihood).T
         return joint_log_likelihood
 
+_ALPHA_MIN = 1e-10
+
 
 class BaseDiscreteNB(BaseNB):
     """Abstract base class for naive Bayes on discrete/categorical data
@@ -460,6 +463,16 @@ def _update_class_log_prior(self, class_prior=None):
         else:
             self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
 
+    def _check_alpha(self):
+        if self.alpha < 0:
+            raise ValueError('Smoothing parameter alpha = %.1e. '
+                             'alpha should be > 0.' % self.alpha)
+        if self.alpha < _ALPHA_MIN:
+            warnings.warn('alpha too small will result in numeric errors, '
+                          'setting alpha = %.1e' % _ALPHA_MIN)
+            return _ALPHA_MIN
+        return self.alpha
+
     def partial_fit(self, X, y, classes=None, sample_weight=None):
         """Incremental fit on a batch of samples.
 
@@ -538,7 +551,8 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
         # be called by the user explicitly just once after several consecutive
         # calls to partial_fit and prior any call to predict[_[log_]proba]
         # to avoid computing the smooth log probas at each call to partial fit
-        self._update_feature_log_prob()
+        alpha = self._check_alpha()
+        self._update_feature_log_prob(alpha)
         self._update_class_log_prior(class_prior=class_prior)
         return self
 
@@ -588,7 +602,8 @@ def fit(self, X, y, sample_weight=None):
         self.feature_count_ = np.zeros((n_effective_classes, n_features),
                                        dtype=np.float64)
         self._count(X, Y)
-        self._update_feature_log_prob()
+        alpha = self._check_alpha()
+        self._update_feature_log_prob(alpha)
         self._update_class_log_prior(class_prior=class_prior)
         return self
 
@@ -694,9 +709,9 @@ def _count(self, X, Y):
         self.feature_count_ += safe_sparse_dot(Y.T, X)
         self.class_count_ += Y.sum(axis=0)
 
-    def _update_feature_log_prob(self):
+    def _update_feature_log_prob(self, alpha):
         """Apply smoothing to raw counts and recompute log probabilities"""
-        smoothed_fc = self.feature_count_ + self.alpha
+        smoothed_fc = self.feature_count_ + alpha
         smoothed_cc = smoothed_fc.sum(axis=1)
 
         self.feature_log_prob_ = (np.log(smoothed_fc) -
@@ -796,10 +811,10 @@ def _count(self, X, Y):
         self.feature_count_ += safe_sparse_dot(Y.T, X)
         self.class_count_ += Y.sum(axis=0)
 
-    def _update_feature_log_prob(self):
+    def _update_feature_log_prob(self, alpha):
         """Apply smoothing to raw counts and recompute log probabilities"""
-        smoothed_fc = self.feature_count_ + self.alpha
-        smoothed_cc = self.class_count_ + self.alpha * 2
+        smoothed_fc = self.feature_count_ + alpha
+        smoothed_cc = self.class_count_ + alpha * 2
 
         self.feature_log_prob_ = (np.log(smoothed_fc) -
                                   np.log(smoothed_cc.reshape(-1, 1)))

diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
@@ -14,7 +14,9 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
+from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_greater
+from sklearn.utils.testing import assert_warns
 
 from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
 
@@ -480,7 +482,7 @@ def test_feature_log_prob_bnb():
     denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T
 
     # Check manual estimate matches
-    assert_array_equal(clf.feature_log_prob_, (num - denom))
+    assert_array_almost_equal(clf.feature_log_prob_, (num - denom))
 
 
 def test_bnb():
@@ -536,3 +538,49 @@ def test_naive_bayes_scale_invariance():
               for f in [1E-10, 1, 1E10]]
     assert_array_equal(labels[0], labels[1])
     assert_array_equal(labels[1], labels[2])
+
+
+def test_alpha():
+    # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
+    X = np.array([[1, 0], [1, 1]])
+    y = np.array([0, 1])
+    nb = BernoulliNB(alpha=0.)
+    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
+    assert_warns(UserWarning, nb.fit, X, y)
+    prob = np.array([[1, 0], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    nb = MultinomialNB(alpha=0.)
+    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
+    assert_warns(UserWarning, nb.fit, X, y)
+    prob = np.array([[2./3, 1./3], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    # Test sparse X
+    X = scipy.sparse.csr_matrix(X)
+    nb = BernoulliNB(alpha=0.)
+    assert_warns(UserWarning, nb.fit, X, y)
+    prob = np.array([[1, 0], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    nb = MultinomialNB(alpha=0.)
+    assert_warns(UserWarning, nb.fit, X, y)
+    prob = np.array([[2./3, 1./3], [0, 1]])
+    assert_array_almost_equal(nb.predict_proba(X), prob)
+
+    # Test for alpha < 0
+    X = np.array([[1, 0], [1, 1]])
+    y = np.array([0, 1])
+    expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
+                    'alpha should be > 0.')
+    b_nb = BernoulliNB(alpha=-0.1)
+    m_nb = MultinomialNB(alpha=-0.1)
+    assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y)
+    assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)
+
+    b_nb = BernoulliNB(alpha=-0.1)
+    m_nb = MultinomialNB(alpha=-0.1)
+    assert_raise_message(ValueError, expected_msg, b_nb.partial_fit,
+                         X, y, classes=[0, 1])
+    assert_raise_message(ValueError, expected_msg, m_nb.partial_fit,
+                         X, y, classes=[0, 1])