diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py index 6df0f2030877e..e1e8bdbb09d7c 100644 --- a/sklearn/neighbors/_base.py +++ b/sklearn/neighbors/_base.py @@ -26,7 +26,6 @@ ) from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS from ..utils import ( - _to_object_array, check_array, gen_even_slices, ) @@ -34,7 +33,7 @@ from ..utils.fixes import parse_version, sp_base_version from ..utils.multiclass import check_classification_targets from ..utils.parallel import Parallel, delayed -from ..utils.validation import check_is_fitted, check_non_negative +from ..utils.validation import _to_object_array, check_is_fitted, check_non_negative from ._ball_tree import BallTree from ._kd_tree import KDTree diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index cce0ddc5c267e..e438805df1254 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -11,7 +11,6 @@ _inverse_binarize_thresholding, label_binarize, ) -from sklearn.utils import _to_object_array from sklearn.utils._testing import assert_array_equal, ignore_warnings from sklearn.utils.fixes import ( COO_CONTAINERS, @@ -21,6 +20,7 @@ LIL_CONTAINERS, ) from sklearn.utils.multiclass import type_of_target +from sklearn.utils.validation import _to_object_array iris = datasets.load_iris() diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index db5021570451d..2d4fe7210ec11 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -107,39 +107,3 @@ def tosequence(x): return x else: return list(x) - - -def _to_object_array(sequence): - """Convert sequence to a 1-D NumPy array of object dtype. - - numpy.array constructor has a similar use but it's output - is ambiguous. It can be 1-D NumPy array of object dtype if - the input is a ragged array, but if the input is a list of - equal length arrays, then the output is a 2D numpy.array. - _to_object_array solves this ambiguity by guarantying that - the output is a 1-D NumPy array of objects for any input. - - Parameters - ---------- - sequence : array-like of shape (n_elements,) - The sequence to be converted. - - Returns - ------- - out : ndarray of shape (n_elements,), dtype=object - The converted sequence into a 1-D NumPy array of object dtype. - - Examples - -------- - >>> import numpy as np - >>> from sklearn.utils import _to_object_array - >>> _to_object_array([np.array([0]), np.array([1])]) - array([array([0]), array([1])], dtype=object) - >>> _to_object_array([np.array([0]), np.array([1, 2])]) - array([array([0]), array([1, 2])], dtype=object) - >>> _to_object_array([np.array([0]), np.array([1, 2])]) - array([array([0]), array([1, 2])], dtype=object) - """ - out = np.empty(len(sequence), dtype=object) - out[:] = sequence - return out diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index c2e2d01ee39a5..dc0c1198e80a0 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -4,7 +4,6 @@ import pytest from sklearn.utils import ( - _to_object_array, check_random_state, column_or_1d, deprecated, @@ -149,14 +148,6 @@ def test_deprecation_joblib_api(tmpdir): del joblib.parallel.BACKENDS["failing"] -@pytest.mark.parametrize("sequence", [[np.array(1), np.array(2)], [[1, 2], [3, 4]]]) -def test_to_object_array(sequence): - out = _to_object_array(sequence) - assert isinstance(out, np.ndarray) - assert out.dtype.kind == "O" - assert out.ndim == 1 - - def test__is_polars_df(): """Check that _is_polars_df return False for non-dataframe objects.""" diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 715adfb3b003d..5e54443a84165 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -74,6 +74,7 @@ _is_polars_df, _num_features, _num_samples, + _to_object_array, assert_all_finite, check_consistent_length, check_is_fitted, @@ -2052,3 +2053,11 @@ def test_check_array_dia_to_int32_indexed_csr_csc_coo(sparse_container, output_f else: # output_format in ["csr", "csc"] assert X_checked.indices.dtype == np.int32 assert X_checked.indptr.dtype == np.int32 + + +@pytest.mark.parametrize("sequence", [[np.array(1), np.array(2)], [[1, 2], [3, 4]]]) +def test_to_object_array(sequence): + out = _to_object_array(sequence) + assert isinstance(out, np.ndarray) + assert out.dtype.kind == "O" + assert out.ndim == 1 diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 3bfd835c1329c..d0a2fb098931f 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -2472,3 +2472,39 @@ def _check_pos_label_consistency(pos_label, y_true): pos_label = 1 return pos_label + + +def _to_object_array(sequence): + """Convert sequence to a 1-D NumPy array of object dtype. + + numpy.array constructor has a similar use but it's output + is ambiguous. It can be 1-D NumPy array of object dtype if + the input is a ragged array, but if the input is a list of + equal length arrays, then the output is a 2D numpy.array. + _to_object_array solves this ambiguity by guarantying that + the output is a 1-D NumPy array of objects for any input. + + Parameters + ---------- + sequence : array-like of shape (n_elements,) + The sequence to be converted. + + Returns + ------- + out : ndarray of shape (n_elements,), dtype=object + The converted sequence into a 1-D NumPy array of object dtype. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.utils.validation import _to_object_array + >>> _to_object_array([np.array([0]), np.array([1])]) + array([array([0]), array([1])], dtype=object) + >>> _to_object_array([np.array([0]), np.array([1, 2])]) + array([array([0]), array([1, 2])], dtype=object) + >>> _to_object_array([np.array([0]), np.array([1, 2])]) + array([array([0]), array([1, 2])], dtype=object) + """ + out = np.empty(len(sequence), dtype=object) + out[:] = sequence + return out