From ea88150336489b68b2fd1e157b98126f3248f331 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Tue, 19 Mar 2024 14:58:28 +0100 Subject: [PATCH] move _to_object_array into validation --- sklearn/neighbors/_base.py | 3 +- sklearn/preprocessing/tests/test_label.py | 2 +- sklearn/utils/__init__.py | 36 ----------------------- sklearn/utils/tests/test_utils.py | 9 ------ sklearn/utils/tests/test_validation.py | 9 ++++++ sklearn/utils/validation.py | 36 +++++++++++++++++++++++ 6 files changed, 47 insertions(+), 48 deletions(-) diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py index 6df0f2030877e..e1e8bdbb09d7c 100644 --- a/sklearn/neighbors/_base.py +++ b/sklearn/neighbors/_base.py @@ -26,7 +26,6 @@ ) from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS from ..utils import ( - _to_object_array, check_array, gen_even_slices, ) @@ -34,7 +33,7 @@ from ..utils.fixes import parse_version, sp_base_version from ..utils.multiclass import check_classification_targets from ..utils.parallel import Parallel, delayed -from ..utils.validation import check_is_fitted, check_non_negative +from ..utils.validation import _to_object_array, check_is_fitted, check_non_negative from ._ball_tree import BallTree from ._kd_tree import KDTree diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index cce0ddc5c267e..e438805df1254 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -11,7 +11,6 @@ _inverse_binarize_thresholding, label_binarize, ) -from sklearn.utils import _to_object_array from sklearn.utils._testing import assert_array_equal, ignore_warnings from sklearn.utils.fixes import ( COO_CONTAINERS, @@ -21,6 +20,7 @@ LIL_CONTAINERS, ) from sklearn.utils.multiclass import type_of_target +from sklearn.utils.validation import _to_object_array iris = datasets.load_iris() diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 5131f7e7ed6e6..545741917599b 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -111,42 +111,6 @@ def tosequence(x): return list(x) -def _to_object_array(sequence): - """Convert sequence to a 1-D NumPy array of object dtype. - - numpy.array constructor has a similar use but it's output - is ambiguous. It can be 1-D NumPy array of object dtype if - the input is a ragged array, but if the input is a list of - equal length arrays, then the output is a 2D numpy.array. - _to_object_array solves this ambiguity by guarantying that - the output is a 1-D NumPy array of objects for any input. - - Parameters - ---------- - sequence : array-like of shape (n_elements,) - The sequence to be converted. - - Returns - ------- - out : ndarray of shape (n_elements,), dtype=object - The converted sequence into a 1-D NumPy array of object dtype. - - Examples - -------- - >>> import numpy as np - >>> from sklearn.utils import _to_object_array - >>> _to_object_array([np.array([0]), np.array([1])]) - array([array([0]), array([1])], dtype=object) - >>> _to_object_array([np.array([0]), np.array([1, 2])]) - array([array([0]), array([1, 2])], dtype=object) - >>> _to_object_array([np.array([0]), np.array([1, 2])]) - array([array([0]), array([1, 2])], dtype=object) - """ - out = np.empty(len(sequence), dtype=object) - out[:] = sequence - return out - - def _message_with_time(source, message, time): """Create one line message for logging purposes. diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index ccc3738e8d733..59f8c605a90f3 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -8,7 +8,6 @@ from sklearn.utils import ( _message_with_time, _print_elapsed_time, - _to_object_array, check_random_state, column_or_1d, deprecated, @@ -212,14 +211,6 @@ def test_deprecation_joblib_api(tmpdir): del joblib.parallel.BACKENDS["failing"] -@pytest.mark.parametrize("sequence", [[np.array(1), np.array(2)], [[1, 2], [3, 4]]]) -def test_to_object_array(sequence): - out = _to_object_array(sequence) - assert isinstance(out, np.ndarray) - assert out.dtype.kind == "O" - assert out.ndim == 1 - - def test__is_polars_df(): """Check that _is_polars_df return False for non-dataframe objects.""" diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 715adfb3b003d..5e54443a84165 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -74,6 +74,7 @@ _is_polars_df, _num_features, _num_samples, + _to_object_array, assert_all_finite, check_consistent_length, check_is_fitted, @@ -2052,3 +2053,11 @@ def test_check_array_dia_to_int32_indexed_csr_csc_coo(sparse_container, output_f else: # output_format in ["csr", "csc"] assert X_checked.indices.dtype == np.int32 assert X_checked.indptr.dtype == np.int32 + + +@pytest.mark.parametrize("sequence", [[np.array(1), np.array(2)], [[1, 2], [3, 4]]]) +def test_to_object_array(sequence): + out = _to_object_array(sequence) + assert isinstance(out, np.ndarray) + assert out.dtype.kind == "O" + assert out.ndim == 1 diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 3bfd835c1329c..d0a2fb098931f 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -2472,3 +2472,39 @@ def _check_pos_label_consistency(pos_label, y_true): pos_label = 1 return pos_label + + +def _to_object_array(sequence): + """Convert sequence to a 1-D NumPy array of object dtype. + + numpy.array constructor has a similar use but it's output + is ambiguous. It can be 1-D NumPy array of object dtype if + the input is a ragged array, but if the input is a list of + equal length arrays, then the output is a 2D numpy.array. + _to_object_array solves this ambiguity by guarantying that + the output is a 1-D NumPy array of objects for any input. + + Parameters + ---------- + sequence : array-like of shape (n_elements,) + The sequence to be converted. + + Returns + ------- + out : ndarray of shape (n_elements,), dtype=object + The converted sequence into a 1-D NumPy array of object dtype. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.utils.validation import _to_object_array + >>> _to_object_array([np.array([0]), np.array([1])]) + array([array([0]), array([1])], dtype=object) + >>> _to_object_array([np.array([0]), np.array([1, 2])]) + array([array([0]), array([1, 2])], dtype=object) + >>> _to_object_array([np.array([0]), np.array([1, 2])]) + array([array([0]), array([1, 2])], dtype=object) + """ + out = np.empty(len(sequence), dtype=object) + out[:] = sequence + return out