From ea88150336489b68b2fd1e157b98126f3248f331 Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Tue, 19 Mar 2024 14:58:28 +0100
Subject: [PATCH] move _to_object_array into validation

---
 sklearn/neighbors/_base.py                |  3 +-
 sklearn/preprocessing/tests/test_label.py |  2 +-
 sklearn/utils/__init__.py                 | 36 -----------------------
 sklearn/utils/tests/test_utils.py         |  9 ------
 sklearn/utils/tests/test_validation.py    |  9 ++++++
 sklearn/utils/validation.py               | 36 +++++++++++++++++++++++
 6 files changed, 47 insertions(+), 48 deletions(-)

diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py
index 6df0f2030877e..e1e8bdbb09d7c 100644
--- a/sklearn/neighbors/_base.py
+++ b/sklearn/neighbors/_base.py
@@ -26,7 +26,6 @@
 )
 from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
 from ..utils import (
-    _to_object_array,
     check_array,
     gen_even_slices,
 )
@@ -34,7 +33,7 @@
 from ..utils.fixes import parse_version, sp_base_version
 from ..utils.multiclass import check_classification_targets
 from ..utils.parallel import Parallel, delayed
-from ..utils.validation import check_is_fitted, check_non_negative
+from ..utils.validation import _to_object_array, check_is_fitted, check_non_negative
 from ._ball_tree import BallTree
 from ._kd_tree import KDTree
 
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index cce0ddc5c267e..e438805df1254 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -11,7 +11,6 @@
     _inverse_binarize_thresholding,
     label_binarize,
 )
-from sklearn.utils import _to_object_array
 from sklearn.utils._testing import assert_array_equal, ignore_warnings
 from sklearn.utils.fixes import (
     COO_CONTAINERS,
@@ -21,6 +20,7 @@
     LIL_CONTAINERS,
 )
 from sklearn.utils.multiclass import type_of_target
+from sklearn.utils.validation import _to_object_array
 
 iris = datasets.load_iris()
 
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 5131f7e7ed6e6..545741917599b 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -111,42 +111,6 @@ def tosequence(x):
         return list(x)
 
 
-def _to_object_array(sequence):
-    """Convert sequence to a 1-D NumPy array of object dtype.
-
-    numpy.array constructor has a similar use but it's output
-    is ambiguous. It can be 1-D NumPy array of object dtype if
-    the input is a ragged array, but if the input is a list of
-    equal length arrays, then the output is a 2D numpy.array.
-    _to_object_array solves this ambiguity by guarantying that
-    the output is a 1-D NumPy array of objects for any input.
-
-    Parameters
-    ----------
-    sequence : array-like of shape (n_elements,)
-        The sequence to be converted.
-
-    Returns
-    -------
-    out : ndarray of shape (n_elements,), dtype=object
-        The converted sequence into a 1-D NumPy array of object dtype.
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from sklearn.utils import _to_object_array
-    >>> _to_object_array([np.array([0]), np.array([1])])
-    array([array([0]), array([1])], dtype=object)
-    >>> _to_object_array([np.array([0]), np.array([1, 2])])
-    array([array([0]), array([1, 2])], dtype=object)
-    >>> _to_object_array([np.array([0]), np.array([1, 2])])
-    array([array([0]), array([1, 2])], dtype=object)
-    """
-    out = np.empty(len(sequence), dtype=object)
-    out[:] = sequence
-    return out
-
-
 def _message_with_time(source, message, time):
     """Create one line message for logging purposes.
 
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index ccc3738e8d733..59f8c605a90f3 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -8,7 +8,6 @@
 from sklearn.utils import (
     _message_with_time,
     _print_elapsed_time,
-    _to_object_array,
     check_random_state,
     column_or_1d,
     deprecated,
@@ -212,14 +211,6 @@ def test_deprecation_joblib_api(tmpdir):
     del joblib.parallel.BACKENDS["failing"]
 
 
-@pytest.mark.parametrize("sequence", [[np.array(1), np.array(2)], [[1, 2], [3, 4]]])
-def test_to_object_array(sequence):
-    out = _to_object_array(sequence)
-    assert isinstance(out, np.ndarray)
-    assert out.dtype.kind == "O"
-    assert out.ndim == 1
-
-
 def test__is_polars_df():
     """Check that _is_polars_df return False for non-dataframe objects."""
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 715adfb3b003d..5e54443a84165 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -74,6 +74,7 @@
     _is_polars_df,
     _num_features,
     _num_samples,
+    _to_object_array,
     assert_all_finite,
     check_consistent_length,
     check_is_fitted,
@@ -2052,3 +2053,11 @@ def test_check_array_dia_to_int32_indexed_csr_csc_coo(sparse_container, output_f
     else:  # output_format in ["csr", "csc"]
         assert X_checked.indices.dtype == np.int32
         assert X_checked.indptr.dtype == np.int32
+
+
+@pytest.mark.parametrize("sequence", [[np.array(1), np.array(2)], [[1, 2], [3, 4]]])
+def test_to_object_array(sequence):
+    out = _to_object_array(sequence)
+    assert isinstance(out, np.ndarray)
+    assert out.dtype.kind == "O"
+    assert out.ndim == 1
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 3bfd835c1329c..d0a2fb098931f 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -2472,3 +2472,39 @@ def _check_pos_label_consistency(pos_label, y_true):
         pos_label = 1
 
     return pos_label
+
+
+def _to_object_array(sequence):
+    """Convert sequence to a 1-D NumPy array of object dtype.
+
+    numpy.array constructor has a similar use but it's output
+    is ambiguous. It can be 1-D NumPy array of object dtype if
+    the input is a ragged array, but if the input is a list of
+    equal length arrays, then the output is a 2D numpy.array.
+    _to_object_array solves this ambiguity by guarantying that
+    the output is a 1-D NumPy array of objects for any input.
+
+    Parameters
+    ----------
+    sequence : array-like of shape (n_elements,)
+        The sequence to be converted.
+
+    Returns
+    -------
+    out : ndarray of shape (n_elements,), dtype=object
+        The converted sequence into a 1-D NumPy array of object dtype.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils.validation import _to_object_array
+    >>> _to_object_array([np.array([0]), np.array([1])])
+    array([array([0]), array([1])], dtype=object)
+    >>> _to_object_array([np.array([0]), np.array([1, 2])])
+    array([array([0]), array([1, 2])], dtype=object)
+    >>> _to_object_array([np.array([0]), np.array([1, 2])])
+    array([array([0]), array([1, 2])], dtype=object)
+    """
+    out = np.empty(len(sequence), dtype=object)
+    out[:] = sequence
+    return out