30
30
from .utils .multiclass import _check_partial_fit_first_call
31
31
from .utils .validation import check_is_fitted , check_non_negative
32
32
from .utils .validation import _check_sample_weight
33
- from .utils ._param_validation import Interval
33
+ from .utils ._param_validation import Interval , Hidden , StrOptions
34
34
35
35
__all__ = [
36
36
"BernoulliNB" ,
@@ -549,12 +549,14 @@ class _BaseDiscreteNB(_BaseNB):
549
549
"alpha" : [Interval (Real , 0 , None , closed = "left" ), "array-like" ],
550
550
"fit_prior" : ["boolean" ],
551
551
"class_prior" : ["array-like" , None ],
552
+ "force_alpha" : ["boolean" , Hidden (StrOptions ({"warn" }))],
552
553
}
553
554
554
- def __init__ (self , alpha = 1.0 , fit_prior = True , class_prior = None ):
555
+ def __init__ (self , alpha = 1.0 , fit_prior = True , class_prior = None , force_alpha = "warn" ):
555
556
self .alpha = alpha
556
557
self .fit_prior = fit_prior
557
558
self .class_prior = class_prior
559
+ self .force_alpha = force_alpha
558
560
559
561
@abstractmethod
560
562
def _count (self , X , Y ):
@@ -622,22 +624,34 @@ def _check_alpha(self):
622
624
alpha = (
623
625
np .asarray (self .alpha ) if not isinstance (self .alpha , Real ) else self .alpha
624
626
)
627
+ alpha_min = np .min (alpha )
625
628
if isinstance (alpha , np .ndarray ):
626
629
if not alpha .shape [0 ] == self .n_features_in_ :
627
630
raise ValueError (
628
631
"When alpha is an array, it should contains `n_features`. "
629
632
f"Got { alpha .shape [0 ]} elements instead of { self .n_features_in_ } ."
630
633
)
631
634
# check that all alpha are positive
632
- if np . min ( alpha ) < 0 :
635
+ if alpha_min < 0 :
633
636
raise ValueError ("All values in alpha must be greater than 0." )
634
- alpha_min = 1e-10
635
- if np .min (alpha ) < alpha_min :
637
+ alpha_lower_bound = 1e-10
638
+ # TODO(1.4): Replace w/ deprecation of self.force_alpha
639
+ # See gh #22269
640
+ _force_alpha = self .force_alpha
641
+ if _force_alpha == "warn" and alpha_min < alpha_lower_bound :
642
+ _force_alpha = False
643
+ warnings .warn (
644
+ "The default value for `force_alpha` will change to `True` in 1.4. To"
645
+ " suppress this warning, manually set the value of `force_alpha`." ,
646
+ FutureWarning ,
647
+ )
648
+ if alpha_min < alpha_lower_bound and not _force_alpha :
636
649
warnings .warn (
637
650
"alpha too small will result in numeric errors, setting alpha ="
638
- f" { alpha_min :.1e} "
651
+ f" { alpha_lower_bound :.1e} . Use `force_alpha=True` to keep alpha"
652
+ " unchanged."
639
653
)
640
- return np .maximum (alpha , alpha_min )
654
+ return np .maximum (alpha , alpha_lower_bound )
641
655
return alpha
642
656
643
657
def partial_fit (self , X , y , classes = None , sample_weight = None ):
@@ -812,7 +826,16 @@ class MultinomialNB(_BaseDiscreteNB):
812
826
----------
813
827
alpha : float or array-like of shape (n_features,), default=1.0
814
828
Additive (Laplace/Lidstone) smoothing parameter
815
- (0 for no smoothing).
829
+ (set alpha=0 and force_alpha=True, for no smoothing).
830
+
831
+ force_alpha : bool, default=False
832
+ If False and alpha is less than 1e-10, it will set alpha to
833
+ 1e-10. If True, alpha will remain unchanged. This may cause
834
+ numerical errors if alpha is too close to 0.
835
+
836
+ .. versionadded:: 1.2
837
+ .. deprecated:: 1.2
838
+ The default value of `force_alpha` will change to `True` in v1.4.
816
839
817
840
fit_prior : bool, default=True
818
841
Whether to learn class prior probabilities or not.
@@ -881,15 +904,22 @@ class MultinomialNB(_BaseDiscreteNB):
881
904
>>> X = rng.randint(5, size=(6, 100))
882
905
>>> y = np.array([1, 2, 3, 4, 5, 6])
883
906
>>> from sklearn.naive_bayes import MultinomialNB
884
- >>> clf = MultinomialNB()
907
+ >>> clf = MultinomialNB(force_alpha=True )
885
908
>>> clf.fit(X, y)
886
- MultinomialNB()
909
+ MultinomialNB(force_alpha=True )
887
910
>>> print(clf.predict(X[2:3]))
888
911
[3]
889
912
"""
890
913
891
- def __init__ (self , * , alpha = 1.0 , fit_prior = True , class_prior = None ):
892
- super ().__init__ (alpha = alpha , fit_prior = fit_prior , class_prior = class_prior )
914
+ def __init__ (
915
+ self , * , alpha = 1.0 , force_alpha = "warn" , fit_prior = True , class_prior = None
916
+ ):
917
+ super ().__init__ (
918
+ alpha = alpha ,
919
+ fit_prior = fit_prior ,
920
+ class_prior = class_prior ,
921
+ force_alpha = force_alpha ,
922
+ )
893
923
894
924
def _more_tags (self ):
895
925
return {"requires_positive_X" : True }
@@ -928,7 +958,17 @@ class ComplementNB(_BaseDiscreteNB):
928
958
Parameters
929
959
----------
930
960
alpha : float or array-like of shape (n_features,), default=1.0
931
- Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
961
+ Additive (Laplace/Lidstone) smoothing parameter
962
+ (set alpha=0 and force_alpha=True, for no smoothing).
963
+
964
+ force_alpha : bool, default=False
965
+ If False and alpha is less than 1e-10, it will set alpha to
966
+ 1e-10. If True, alpha will remain unchanged. This may cause
967
+ numerical errors if alpha is too close to 0.
968
+
969
+ .. versionadded:: 1.2
970
+ .. deprecated:: 1.2
971
+ The default value of `force_alpha` will change to `True` in v1.4.
932
972
933
973
fit_prior : bool, default=True
934
974
Only used in edge case with a single class in the training set.
@@ -1005,9 +1045,9 @@ class ComplementNB(_BaseDiscreteNB):
1005
1045
>>> X = rng.randint(5, size=(6, 100))
1006
1046
>>> y = np.array([1, 2, 3, 4, 5, 6])
1007
1047
>>> from sklearn.naive_bayes import ComplementNB
1008
- >>> clf = ComplementNB()
1048
+ >>> clf = ComplementNB(force_alpha=True )
1009
1049
>>> clf.fit(X, y)
1010
- ComplementNB()
1050
+ ComplementNB(force_alpha=True )
1011
1051
>>> print(clf.predict(X[2:3]))
1012
1052
[3]
1013
1053
"""
@@ -1017,8 +1057,21 @@ class ComplementNB(_BaseDiscreteNB):
1017
1057
"norm" : ["boolean" ],
1018
1058
}
1019
1059
1020
- def __init__ (self , * , alpha = 1.0 , fit_prior = True , class_prior = None , norm = False ):
1021
- super ().__init__ (alpha = alpha , fit_prior = fit_prior , class_prior = class_prior )
1060
+ def __init__ (
1061
+ self ,
1062
+ * ,
1063
+ alpha = 1.0 ,
1064
+ force_alpha = "warn" ,
1065
+ fit_prior = True ,
1066
+ class_prior = None ,
1067
+ norm = False ,
1068
+ ):
1069
+ super ().__init__ (
1070
+ alpha = alpha ,
1071
+ force_alpha = force_alpha ,
1072
+ fit_prior = fit_prior ,
1073
+ class_prior = class_prior ,
1074
+ )
1022
1075
self .norm = norm
1023
1076
1024
1077
def _more_tags (self ):
@@ -1064,7 +1117,16 @@ class BernoulliNB(_BaseDiscreteNB):
1064
1117
----------
1065
1118
alpha : float or array-like of shape (n_features,), default=1.0
1066
1119
Additive (Laplace/Lidstone) smoothing parameter
1067
- (0 for no smoothing).
1120
+ (set alpha=0 and force_alpha=True, for no smoothing).
1121
+
1122
+ force_alpha : bool, default=False
1123
+ If False and alpha is less than 1e-10, it will set alpha to
1124
+ 1e-10. If True, alpha will remain unchanged. This may cause
1125
+ numerical errors if alpha is too close to 0.
1126
+
1127
+ .. versionadded:: 1.2
1128
+ .. deprecated:: 1.2
1129
+ The default value of `force_alpha` will change to `True` in v1.4.
1068
1130
1069
1131
binarize : float or None, default=0.0
1070
1132
Threshold for binarizing (mapping to booleans) of sample features.
@@ -1144,9 +1206,9 @@ class BernoulliNB(_BaseDiscreteNB):
1144
1206
>>> X = rng.randint(5, size=(6, 100))
1145
1207
>>> Y = np.array([1, 2, 3, 4, 4, 5])
1146
1208
>>> from sklearn.naive_bayes import BernoulliNB
1147
- >>> clf = BernoulliNB()
1209
+ >>> clf = BernoulliNB(force_alpha=True )
1148
1210
>>> clf.fit(X, Y)
1149
- BernoulliNB()
1211
+ BernoulliNB(force_alpha=True )
1150
1212
>>> print(clf.predict(X[2:3]))
1151
1213
[3]
1152
1214
"""
@@ -1156,8 +1218,21 @@ class BernoulliNB(_BaseDiscreteNB):
1156
1218
"binarize" : [None , Interval (Real , 0 , None , closed = "left" )],
1157
1219
}
1158
1220
1159
- def __init__ (self , * , alpha = 1.0 , binarize = 0.0 , fit_prior = True , class_prior = None ):
1160
- super ().__init__ (alpha = alpha , fit_prior = fit_prior , class_prior = class_prior )
1221
+ def __init__ (
1222
+ self ,
1223
+ * ,
1224
+ alpha = 1.0 ,
1225
+ force_alpha = "warn" ,
1226
+ binarize = 0.0 ,
1227
+ fit_prior = True ,
1228
+ class_prior = None ,
1229
+ ):
1230
+ super ().__init__ (
1231
+ alpha = alpha ,
1232
+ fit_prior = fit_prior ,
1233
+ class_prior = class_prior ,
1234
+ force_alpha = force_alpha ,
1235
+ )
1161
1236
self .binarize = binarize
1162
1237
1163
1238
def _check_X (self , X ):
@@ -1219,7 +1294,16 @@ class CategoricalNB(_BaseDiscreteNB):
1219
1294
----------
1220
1295
alpha : float, default=1.0
1221
1296
Additive (Laplace/Lidstone) smoothing parameter
1222
- (0 for no smoothing).
1297
+ (set alpha=0 and force_alpha=True, for no smoothing).
1298
+
1299
+ force_alpha : bool, default=False
1300
+ If False and alpha is less than 1e-10, it will set alpha to
1301
+ 1e-10. If True, alpha will remain unchanged. This may cause
1302
+ numerical errors if alpha is too close to 0.
1303
+
1304
+ .. versionadded:: 1.2
1305
+ .. deprecated:: 1.2
1306
+ The default value of `force_alpha` will change to `True` in v1.4.
1223
1307
1224
1308
fit_prior : bool, default=True
1225
1309
Whether to learn class prior probabilities or not.
@@ -1301,9 +1385,9 @@ class CategoricalNB(_BaseDiscreteNB):
1301
1385
>>> X = rng.randint(5, size=(6, 100))
1302
1386
>>> y = np.array([1, 2, 3, 4, 5, 6])
1303
1387
>>> from sklearn.naive_bayes import CategoricalNB
1304
- >>> clf = CategoricalNB()
1388
+ >>> clf = CategoricalNB(force_alpha=True )
1305
1389
>>> clf.fit(X, y)
1306
- CategoricalNB()
1390
+ CategoricalNB(force_alpha=True )
1307
1391
>>> print(clf.predict(X[2:3]))
1308
1392
[3]
1309
1393
"""
@@ -1319,9 +1403,20 @@ class CategoricalNB(_BaseDiscreteNB):
1319
1403
}
1320
1404
1321
1405
def __init__ (
1322
- self , * , alpha = 1.0 , fit_prior = True , class_prior = None , min_categories = None
1406
+ self ,
1407
+ * ,
1408
+ alpha = 1.0 ,
1409
+ force_alpha = "warn" ,
1410
+ fit_prior = True ,
1411
+ class_prior = None ,
1412
+ min_categories = None ,
1323
1413
):
1324
- super ().__init__ (alpha = alpha , fit_prior = fit_prior , class_prior = class_prior )
1414
+ super ().__init__ (
1415
+ alpha = alpha ,
1416
+ force_alpha = force_alpha ,
1417
+ fit_prior = fit_prior ,
1418
+ class_prior = class_prior ,
1419
+ )
1325
1420
self .min_categories = min_categories
1326
1421
1327
1422
def fit (self , X , y , sample_weight = None ):
0 commit comments