diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 7a9318780b5b3..610a7cad65c39 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -220,6 +220,13 @@ Changelog by passing a function in place of a strategy name. :pr:`28053` by :user:`Mark Elliot `. +:mod:`sklearn.inspection` +......................... + +- |Fix| :meth:`inspection.DecisionBoundaryDisplay.from_estimator` no longer + warns about missing feature names when provided a `polars.DataFrame`. + :pr:`28718` by :user:`Patrick Wang `. + :mod:`sklearn.linear_model` ........................... diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index 4229aa333507c..92e1a2527400e 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -5,8 +5,11 @@ from ...utils import _safe_indexing from ...utils._optional_dependencies import check_matplotlib_support from ...utils._response import _get_response_values +from ...utils._set_output import _get_adapter_from_container from ...utils.validation import ( _is_arraylike_not_scalar, + _is_pandas_df, + _is_polars_df, _num_features, check_is_fitted, ) @@ -345,13 +348,15 @@ def from_estimator( np.linspace(x0_min, x0_max, grid_resolution), np.linspace(x1_min, x1_max, grid_resolution), ) - if hasattr(X, "iloc"): - # we need to preserve the feature names and therefore get an empty dataframe - X_grid = X.iloc[[], :].copy() - X_grid.iloc[:, 0] = xx0.ravel() - X_grid.iloc[:, 1] = xx1.ravel() - else: - X_grid = np.c_[xx0.ravel(), xx1.ravel()] + + X_grid = np.c_[xx0.ravel(), xx1.ravel()] + if _is_pandas_df(X) or _is_polars_df(X): + adapter = _get_adapter_from_container(X) + X_grid = adapter.create_container( + X_grid, + X_grid, + columns=X.columns, + ) prediction_method = _check_boundary_response_method( estimator, response_method, class_of_interest diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py index 7bb38f55445a0..f2dae8a684369 100644 --- a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py +++ b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py @@ -17,6 +17,7 @@ from sklearn.preprocessing import scale from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.utils._testing import ( + _convert_container, assert_allclose, assert_array_equal, ) @@ -468,15 +469,18 @@ def test_string_target(pyplot): ) -def test_dataframe_support(pyplot): +@pytest.mark.parametrize("constructor_name", ["pandas", "polars"]) +def test_dataframe_support(pyplot, constructor_name): """Check that passing a dataframe at fit and to the Display does not raise warnings. Non-regression test for: - https://github.com/scikit-learn/scikit-learn/issues/23311 + * https://github.com/scikit-learn/scikit-learn/issues/23311 + * https://github.com/scikit-learn/scikit-learn/issues/28717 """ - pd = pytest.importorskip("pandas") - df = pd.DataFrame(X, columns=["col_x", "col_y"]) + df = _convert_container( + X, constructor_name=constructor_name, columns_name=["col_x", "col_y"] + ) estimator = LogisticRegression().fit(df, y) with warnings.catch_warnings(): diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index 921bd6a01fb71..0442e75346fed 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -4,7 +4,10 @@ from ..base import BaseEstimator, TransformerMixin, _fit_context from ..utils._param_validation import StrOptions -from ..utils._set_output import ADAPTERS_MANAGER, _get_output_config +from ..utils._set_output import ( + _get_adapter_from_container, + _get_output_config, +) from ..utils.metaestimators import available_if from ..utils.validation import ( _allclose_dense_sparse, @@ -16,24 +19,6 @@ ) -def _get_adapter_from_container(container): - """Get the adapter that nows how to handle such container. - - See :class:`sklearn.utils._set_output.ContainerAdapterProtocol` for more - details. - """ - module_name = container.__class__.__module__.split(".")[0] - try: - return ADAPTERS_MANAGER.adapters[module_name] - except KeyError as exc: - available_adapters = list(ADAPTERS_MANAGER.adapters.keys()) - raise ValueError( - "The container does not have a registered adapter in scikit-learn. " - f"Available adapters are: {available_adapters} while the container " - f"provided is: {container!r}." - ) from exc - - def _identity(X): """The identity function.""" return X diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py index e7b86e88d1547..81d9d0b8eb843 100644 --- a/sklearn/preprocessing/tests/test_function_transformer.py +++ b/sklearn/preprocessing/tests/test_function_transformer.py @@ -5,7 +5,6 @@ from sklearn.pipeline import make_pipeline from sklearn.preprocessing import FunctionTransformer, StandardScaler -from sklearn.preprocessing._function_transformer import _get_adapter_from_container from sklearn.utils._testing import ( _convert_container, assert_allclose_dense_sparse, @@ -14,17 +13,6 @@ from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS -def test_get_adapter_from_container(): - """Check the behavior fo `_get_adapter_from_container`.""" - pd = pytest.importorskip("pandas") - X = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]}) - adapter = _get_adapter_from_container(X) - assert adapter.container_lib == "pandas" - err_msg = "The container does not have a registered adapter in scikit-learn." - with pytest.raises(ValueError, match=err_msg): - _get_adapter_from_container(X.to_numpy()) - - def _make_func(args_store, kwargs_store, func=lambda X, *a, **k: X): def _func(X, *args, **kwargs): args_store.append(X) diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py index cf7364e117320..d5c23a4c7c6f9 100644 --- a/sklearn/utils/_set_output.py +++ b/sklearn/utils/_set_output.py @@ -197,6 +197,24 @@ def register(self, adapter): ADAPTERS_MANAGER.register(PolarsAdapter()) +def _get_adapter_from_container(container): + """Get the adapter that knows how to handle such container. + + See :class:`sklearn.utils._set_output.ContainerAdapterProtocol` for more + details. + """ + module_name = container.__class__.__module__.split(".")[0] + try: + return ADAPTERS_MANAGER.adapters[module_name] + except KeyError as exc: + available_adapters = list(ADAPTERS_MANAGER.adapters.keys()) + raise ValueError( + "The container does not have a registered adapter in scikit-learn. " + f"Available adapters are: {available_adapters} while the container " + f"provided is: {container!r}." + ) from exc + + def _get_container_adapter(method, estimator=None): """Get container adapter.""" dense_config = _get_output_config(method, estimator)["dense"] diff --git a/sklearn/utils/tests/test_set_output.py b/sklearn/utils/tests/test_set_output.py index 827627f441ddd..360b081a2a0fb 100644 --- a/sklearn/utils/tests/test_set_output.py +++ b/sklearn/utils/tests/test_set_output.py @@ -10,6 +10,7 @@ from sklearn.utils._set_output import ( ADAPTERS_MANAGER, ContainerAdapterProtocol, + _get_adapter_from_container, _get_output_config, _safe_set_output, _SetOutputMixin, @@ -450,3 +451,14 @@ def patched_import_module(name): msg = "Setting output container to 'pandas' requires" with pytest.raises(ImportError, match=msg): check_library_installed("pandas") + + +def test_get_adapter_from_container(): + """Check the behavior fo `_get_adapter_from_container`.""" + pd = pytest.importorskip("pandas") + X = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]}) + adapter = _get_adapter_from_container(X) + assert adapter.container_lib == "pandas" + err_msg = "The container does not have a registered adapter in scikit-learn." + with pytest.raises(ValueError, match=err_msg): + _get_adapter_from_container(X.to_numpy())