Skip to content

FIX handle outlier detector in _get_response_values #27565

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions sklearn/inspection/_plot/tests/test_boundary_decision_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
make_classification,
make_multilabel_classification,
)
from sklearn.ensemble import IsolationForest
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method
from sklearn.linear_model import LogisticRegression
Expand Down Expand Up @@ -240,6 +241,39 @@ def test_decision_boundary_display_classifier(
assert disp.figure_ == fig2


@pytest.mark.parametrize("response_method", ["auto", "predict", "decision_function"])
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_outlier_detector(
pyplot, response_method, plot_method
):
"""Check that decision boundary is correct for outlier detector."""
fig, ax = pyplot.subplots()
eps = 2.0
outlier_detector = IsolationForest(random_state=0).fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
outlier_detector,
X,
grid_resolution=5,
response_method=response_method,
plot_method=plot_method,
eps=eps,
ax=ax,
)
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
assert disp.ax_ == ax
assert disp.figure_ == fig

x0, x1 = X[:, 0], X[:, 1]

x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps

assert disp.xx0.min() == pytest.approx(x0_min)
assert disp.xx0.max() == pytest.approx(x0_max)
assert disp.xx1.min() == pytest.approx(x1_min)
assert disp.xx1.max() == pytest.approx(x1_max)


@pytest.mark.parametrize("response_method", ["auto", "predict"])
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_regressor(pyplot, response_method, plot_method):
Expand Down
13 changes: 9 additions & 4 deletions sklearn/utils/_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,14 @@ def _get_response_values(
pos_label=None,
return_response_method_used=False,
):
"""Compute the response values of a classifier or a regressor.
"""Compute the response values of a classifier, an outlier detector, or a regressor.

The response values are predictions such that it follows the following shape:

- for binary classification, it is a 1d array of shape `(n_samples,)`;
- for multiclass classification, it is a 2d array of shape `(n_samples, n_classes)`;
- for multilabel classification, it is a 2d array of shape `(n_samples, n_outputs)`;
- for outlier detection, it is a 1d array of shape `(n_samples,)`;
- for regression, it is a 1d array of shape `(n_samples,)`.

If `estimator` is a binary classifier, also return the label for the
Expand All @@ -135,8 +136,9 @@ def _get_response_values(
Parameters
----------
estimator : estimator instance
Fitted classifier or regressor or a fitted :class:`~sklearn.pipeline.Pipeline`
in which the last estimator is a classifier or a regressor.
Fitted classifier, outlier detector, or regressor or a
fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a
classifier, an outlier detector, or a regressor.

X : {array-like, sparse matrix} of shape (n_samples, n_features)
Input values.
Expand Down Expand Up @@ -188,7 +190,7 @@ def _get_response_values(
If the response method can be applied to a classifier only and
`estimator` is a regressor.
"""
from sklearn.base import is_classifier # noqa
from sklearn.base import is_classifier, is_outlier_detector # noqa

if is_classifier(estimator):
prediction_method = _check_response_method(estimator, response_method)
Expand Down Expand Up @@ -220,6 +222,9 @@ def _get_response_values(
classes=classes,
pos_label=pos_label,
)
elif is_outlier_detector(estimator):
prediction_method = _check_response_method(estimator, response_method)
y_pred, pos_label = prediction_method(X), None
else: # estimator is a regressor
if response_method != "predict":
raise ValueError(
Expand Down
28 changes: 28 additions & 0 deletions sklearn/utils/tests/test_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
make_multilabel_classification,
make_regression,
)
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import (
LinearRegression,
LogisticRegression,
Expand Down Expand Up @@ -52,6 +53,33 @@ def test_get_response_values_regressor(return_response_method_used):
assert results[2] == "predict"


@pytest.mark.parametrize(
"response_method",
["predict", "decision_function", ["decision_function", "predict"]],
)
@pytest.mark.parametrize("return_response_method_used", [True, False])
def test_get_response_values_outlier_detection(
response_method, return_response_method_used
):
"""Check the behaviour of `_get_response_values` with outlier detector."""
X, y = make_classification(n_samples=50, random_state=0)
outlier_detector = IsolationForest(random_state=0).fit(X, y)
results = _get_response_values(
outlier_detector,
X,
response_method=response_method,
return_response_method_used=return_response_method_used,
)
chosen_response_method = (
response_method[0] if isinstance(response_method, list) else response_method
)
prediction_method = getattr(outlier_detector, chosen_response_method)
assert_array_equal(results[0], prediction_method(X))
assert results[1] is None
if return_response_method_used:
assert results[2] == chosen_response_method


@pytest.mark.parametrize(
"response_method",
["predict_proba", "decision_function", "predict"],
Expand Down