Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ Bug fixes
- Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until
``max_iter`` if finds a large inlier group early. :issue:`8251` by :user:`aivision2020`.

- Fixed a bug where :class:`sklearn.naive_bayes.MultinomialNB` and :class:`sklearn.naive_bayes.BernoulliNB`
failed when `alpha=0`. :issue:`5814` by :user:`Yichuan Liu <yl565>` and
:user:`Herilalaina Rakotoarison <herilalaina>`.

- Fixed a bug where :func:`datasets.make_moons` gives an
incorrect result when ``n_samples`` is odd.
:issue:`8198` by :user:`Josh Levy <levy5674>`.
Expand Down
29 changes: 22 additions & 7 deletions sklearn/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# (parts based on earlier work by Mathieu Blondel)
#
# License: BSD 3 clause
import warnings

from abc import ABCMeta, abstractmethod

Expand Down Expand Up @@ -436,6 +437,8 @@ def _joint_log_likelihood(self, X):
joint_log_likelihood = np.array(joint_log_likelihood).T
return joint_log_likelihood

_ALPHA_MIN = 1e-10


class BaseDiscreteNB(BaseNB):
"""Abstract base class for naive Bayes on discrete/categorical data
Expand All @@ -460,6 +463,16 @@ def _update_class_log_prior(self, class_prior=None):
else:
self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)

def _check_alpha(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you went through this before, but why isn't alpha just initially set to an appropriate value and then used as before, instead of changing the code a lot as below?

if self.alpha < 0:
raise ValueError('Smoothing parameter alpha = %.1e. '
'alpha should be > 0.' % self.alpha)
if self.alpha < _ALPHA_MIN:
warnings.warn('alpha too small will result in numeric errors, '
'setting alpha = %.1e' % _ALPHA_MIN)
return _ALPHA_MIN
return self.alpha

def partial_fit(self, X, y, classes=None, sample_weight=None):
"""Incremental fit on a batch of samples.

Expand Down Expand Up @@ -538,7 +551,8 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
# be called by the user explicitly just once after several consecutive
# calls to partial_fit and prior any call to predict[_[log_]proba]
# to avoid computing the smooth log probas at each call to partial fit
self._update_feature_log_prob()
alpha = self._check_alpha()
self._update_feature_log_prob(alpha)
self._update_class_log_prior(class_prior=class_prior)
return self

Expand Down Expand Up @@ -588,7 +602,8 @@ def fit(self, X, y, sample_weight=None):
self.feature_count_ = np.zeros((n_effective_classes, n_features),
dtype=np.float64)
self._count(X, Y)
self._update_feature_log_prob()
alpha = self._check_alpha()
self._update_feature_log_prob(alpha)
self._update_class_log_prior(class_prior=class_prior)
return self

Expand Down Expand Up @@ -694,9 +709,9 @@ def _count(self, X, Y):
self.feature_count_ += safe_sparse_dot(Y.T, X)
self.class_count_ += Y.sum(axis=0)

def _update_feature_log_prob(self):
def _update_feature_log_prob(self, alpha):
"""Apply smoothing to raw counts and recompute log probabilities"""
smoothed_fc = self.feature_count_ + self.alpha
smoothed_fc = self.feature_count_ + alpha
smoothed_cc = smoothed_fc.sum(axis=1)

self.feature_log_prob_ = (np.log(smoothed_fc) -
Expand Down Expand Up @@ -796,10 +811,10 @@ def _count(self, X, Y):
self.feature_count_ += safe_sparse_dot(Y.T, X)
self.class_count_ += Y.sum(axis=0)

def _update_feature_log_prob(self):
def _update_feature_log_prob(self, alpha):
"""Apply smoothing to raw counts and recompute log probabilities"""
smoothed_fc = self.feature_count_ + self.alpha
smoothed_cc = self.class_count_ + self.alpha * 2
smoothed_fc = self.feature_count_ + alpha
smoothed_cc = self.class_count_ + alpha * 2

self.feature_log_prob_ = (np.log(smoothed_fc) -
np.log(smoothed_cc.reshape(-1, 1)))
Expand Down
50 changes: 49 additions & 1 deletion sklearn/tests/test_naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_raise_message
from sklearn.utils.testing import assert_greater
from sklearn.utils.testing import assert_warns

from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB

Expand Down Expand Up @@ -480,7 +482,7 @@ def test_feature_log_prob_bnb():
denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T

# Check manual estimate matches
assert_array_equal(clf.feature_log_prob_, (num - denom))
assert_array_almost_equal(clf.feature_log_prob_, (num - denom))


def test_bnb():
Expand Down Expand Up @@ -536,3 +538,49 @@ def test_naive_bayes_scale_invariance():
for f in [1E-10, 1, 1E10]]
assert_array_equal(labels[0], labels[1])
assert_array_equal(labels[1], labels[2])


def test_alpha():
# Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
X = np.array([[1, 0], [1, 1]])
y = np.array([0, 1])
nb = BernoulliNB(alpha=0.)
assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[1, 0], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

nb = MultinomialNB(alpha=0.)
assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[2./3, 1./3], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

# Test sparse X
X = scipy.sparse.csr_matrix(X)
nb = BernoulliNB(alpha=0.)
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[1, 0], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

nb = MultinomialNB(alpha=0.)
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[2./3, 1./3], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

# Test for alpha < 0
X = np.array([[1, 0], [1, 1]])
y = np.array([0, 1])
expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
'alpha should be > 0.')
b_nb = BernoulliNB(alpha=-0.1)
m_nb = MultinomialNB(alpha=-0.1)
assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y)
assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)

b_nb = BernoulliNB(alpha=-0.1)
m_nb = MultinomialNB(alpha=-0.1)
assert_raise_message(ValueError, expected_msg, b_nb.partial_fit,
X, y, classes=[0, 1])
assert_raise_message(ValueError, expected_msg, m_nb.partial_fit,
X, y, classes=[0, 1])