-
-
Notifications
You must be signed in to change notification settings - Fork 25.8k
[MRG + 2] fix selectFdr bug #7490
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
520e107
4db45c8
86c5a53
3cab18d
6cb1cc0
a69d584
ac92743
a5a9dcf
207d15f
b315ad9
c2910b4
5a53fa7
f61098f
9e76fa5
22de1f8
d2be567
d782968
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -371,6 +371,40 @@ def test_select_heuristics_regression(): | |
assert_less(np.sum(support[5:] == 1), 3) | ||
|
||
|
||
def test_boundary_case_ch2(): | ||
# Test boundary case, and always aim to select 1 feature. | ||
X = np.array([[10, 20], [20, 20], [20, 30]]) | ||
y = np.array([[1], [0], [0]]) | ||
scores, pvalues = chi2(X, y) | ||
assert_array_almost_equal(scores, np.array([4., 0.71428571])) | ||
assert_array_almost_equal(pvalues, np.array([0.04550026, 0.39802472])) | ||
|
||
filter_fdr = SelectFdr(chi2, alpha=0.1) | ||
filter_fdr.fit(X, y) | ||
support_fdr = filter_fdr.get_support() | ||
assert_array_equal(support_fdr, np.array([True, False])) | ||
|
||
filter_kbest = SelectKBest(chi2, k=1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @amueller did you mean this sort of thing, testing the select 1 feature boundary case for each strategy? |
||
filter_kbest.fit(X, y) | ||
support_kbest = filter_kbest.get_support() | ||
assert_array_equal(support_kbest, np.array([True, False])) | ||
|
||
filter_percentile = SelectPercentile(chi2, percentile=50) | ||
filter_percentile.fit(X, y) | ||
support_percentile = filter_percentile.get_support() | ||
assert_array_equal(support_percentile, np.array([True, False])) | ||
|
||
filter_fpr = SelectFpr(chi2, alpha=0.1) | ||
filter_fpr.fit(X, y) | ||
support_fpr = filter_fpr.get_support() | ||
assert_array_equal(support_fpr, np.array([True, False])) | ||
|
||
filter_fwe = SelectFwe(chi2, alpha=0.1) | ||
filter_fwe.fit(X, y) | ||
support_fwe = filter_fwe.get_support() | ||
assert_array_equal(support_fwe, np.array([True, False])) | ||
|
||
|
||
def test_select_fdr_regression(): | ||
# Test that fdr heuristic actually has low FDR. | ||
def single_fdr(alpha, n_informative, random_state): | ||
|
@@ -404,7 +438,7 @@ def single_fdr(alpha, n_informative, random_state): | |
# FDR = E(FP / (TP + FP)) <= alpha | ||
false_discovery_rate = np.mean([single_fdr(alpha, n_informative, | ||
random_state) for | ||
random_state in range(30)]) | ||
random_state in range(100)]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this change for? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FDR = E(FP / (TP + FP)) <= alphaFDR is count by average many times (N) of "FP/(TP+FP)" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did it not pass without that change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, range 30 will not pass test |
||
assert_greater_equal(alpha, false_discovery_rate) | ||
|
||
# Make sure that the empirical false discovery rate increases | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You need to add yourself at the bottom of the page if you want your name to be a link.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks very much.