diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 93898a5114191..3f88285117d6b 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -24,13 +24,13 @@
 from scipy.special import comb
 
 from ..utils import (
-    _approximate_mode,
     _safe_indexing,
     check_random_state,
     indexable,
     metadata_routing,
 )
 from ..utils._param_validation import Interval, RealNotInt, validate_params
+from ..utils.extmath import _approximate_mode
 from ..utils.metadata_routing import _MetadataRequester
 from ..utils.multiclass import type_of_target
 from ..utils.validation import _num_samples, check_array, column_or_1d
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 53bb376ea93d6..e359a50c63aab 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -24,6 +24,7 @@
 from .class_weight import compute_class_weight, compute_sample_weight
 from .deprecation import deprecated
 from .discovery import all_estimators
+from .extmath import _approximate_mode, safe_sqr
 from .fixes import parse_version, threadpool_info
 from .murmurhash import murmurhash3_32
 from .validation import (
@@ -76,6 +77,7 @@
     "estimator_html_repr",
     "Bunch",
     "metadata_routing",
+    "safe_sqr",
 ]
 
 IS_PYPY = platform.python_implementation() == "PyPy"
@@ -778,41 +780,6 @@ def shuffle(*arrays, random_state=None, n_samples=None):
     )
 
 
-def safe_sqr(X, *, copy=True):
-    """Element wise squaring of array-likes and sparse matrices.
-
-    Parameters
-    ----------
-    X : {array-like, ndarray, sparse matrix}
-
-    copy : bool, default=True
-        Whether to create a copy of X and operate on it or to perform
-        inplace computation (default behaviour).
-
-    Returns
-    -------
-    X ** 2 : element wise square
-         Return the element-wise square of the input.
-
-    Examples
-    --------
-    >>> from sklearn.utils import safe_sqr
-    >>> safe_sqr([1, 2, 3])
-    array([1, 4, 9])
-    """
-    X = check_array(X, accept_sparse=["csr", "csc", "coo"], ensure_2d=False)
-    if issparse(X):
-        if copy:
-            X = X.copy()
-        X.data **= 2
-    else:
-        if copy:
-            X = X**2
-        else:
-            X **= 2
-    return X
-
-
 def _chunk_generator(gen, chunksize):
     """Chunk generator, ``gen`` into lists of length ``chunksize``. The last
     chunk may have a length less than ``chunksize``."""
@@ -1183,72 +1150,3 @@ def is_scalar_nan(x):
         and isinstance(x, numbers.Real)
         and math.isnan(x)
     )
-
-
-def _approximate_mode(class_counts, n_draws, rng):
-    """Computes approximate mode of multivariate hypergeometric.
-
-    This is an approximation to the mode of the multivariate
-    hypergeometric given by class_counts and n_draws.
-    It shouldn't be off by more than one.
-
-    It is the mostly likely outcome of drawing n_draws many
-    samples from the population given by class_counts.
-
-    Parameters
-    ----------
-    class_counts : ndarray of int
-        Population per class.
-    n_draws : int
-        Number of draws (samples to draw) from the overall population.
-    rng : random state
-        Used to break ties.
-
-    Returns
-    -------
-    sampled_classes : ndarray of int
-        Number of samples drawn from each class.
-        np.sum(sampled_classes) == n_draws
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from sklearn.utils import _approximate_mode
-    >>> _approximate_mode(class_counts=np.array([4, 2]), n_draws=3, rng=0)
-    array([2, 1])
-    >>> _approximate_mode(class_counts=np.array([5, 2]), n_draws=4, rng=0)
-    array([3, 1])
-    >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),
-    ...                   n_draws=2, rng=0)
-    array([0, 1, 1, 0])
-    >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),
-    ...                   n_draws=2, rng=42)
-    array([1, 1, 0, 0])
-    """
-    rng = check_random_state(rng)
-    # this computes a bad approximation to the mode of the
-    # multivariate hypergeometric given by class_counts and n_draws
-    continuous = class_counts / class_counts.sum() * n_draws
-    # floored means we don't overshoot n_samples, but probably undershoot
-    floored = np.floor(continuous)
-    # we add samples according to how much "left over" probability
-    # they had, until we arrive at n_samples
-    need_to_add = int(n_draws - floored.sum())
-    if need_to_add > 0:
-        remainder = continuous - floored
-        values = np.sort(np.unique(remainder))[::-1]
-        # add according to remainder, but break ties
-        # randomly to avoid biases
-        for value in values:
-            (inds,) = np.where(remainder == value)
-            # if we need_to_add less than what's in inds
-            # we draw randomly from them.
-            # if we need to add more, we add them all and
-            # go to the next value
-            add_now = min(len(inds), need_to_add)
-            inds = rng.choice(inds, size=add_now, replace=False)
-            floored[inds] += 1
-            need_to_add -= add_now
-            if need_to_add == 0:
-                break
-    return floored.astype(int)
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 975b3458692b9..372ebc4f6b8b5 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -21,10 +21,9 @@
 
 from ..utils import deprecated
 from ..utils._param_validation import Interval, StrOptions, validate_params
-from . import check_random_state
 from ._array_api import _is_numpy_namespace, device, get_namespace
 from .sparsefuncs_fast import csr_row_norms
-from .validation import check_array
+from .validation import check_array, check_random_state
 
 
 def squared_norm(x):
@@ -1282,3 +1281,107 @@ def _nanaverage(a, weights=None):
     except ZeroDivisionError:
         # this is when all weights are zero, then ignore them
         return np.average(a)
+
+
+def safe_sqr(X, *, copy=True):
+    """Element wise squaring of array-likes and sparse matrices.
+
+    Parameters
+    ----------
+    X : {array-like, ndarray, sparse matrix}
+
+    copy : bool, default=True
+        Whether to create a copy of X and operate on it or to perform
+        inplace computation (default behaviour).
+
+    Returns
+    -------
+    X ** 2 : element wise square
+         Return the element-wise square of the input.
+
+    Examples
+    --------
+    >>> from sklearn.utils import safe_sqr
+    >>> safe_sqr([1, 2, 3])
+    array([1, 4, 9])
+    """
+    X = check_array(X, accept_sparse=["csr", "csc", "coo"], ensure_2d=False)
+    if sparse.issparse(X):
+        if copy:
+            X = X.copy()
+        X.data **= 2
+    else:
+        if copy:
+            X = X**2
+        else:
+            X **= 2
+    return X
+
+
+def _approximate_mode(class_counts, n_draws, rng):
+    """Computes approximate mode of multivariate hypergeometric.
+
+    This is an approximation to the mode of the multivariate
+    hypergeometric given by class_counts and n_draws.
+    It shouldn't be off by more than one.
+
+    It is the mostly likely outcome of drawing n_draws many
+    samples from the population given by class_counts.
+
+    Parameters
+    ----------
+    class_counts : ndarray of int
+        Population per class.
+    n_draws : int
+        Number of draws (samples to draw) from the overall population.
+    rng : random state
+        Used to break ties.
+
+    Returns
+    -------
+    sampled_classes : ndarray of int
+        Number of samples drawn from each class.
+        np.sum(sampled_classes) == n_draws
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils.extmath import _approximate_mode
+    >>> _approximate_mode(class_counts=np.array([4, 2]), n_draws=3, rng=0)
+    array([2, 1])
+    >>> _approximate_mode(class_counts=np.array([5, 2]), n_draws=4, rng=0)
+    array([3, 1])
+    >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),
+    ...                   n_draws=2, rng=0)
+    array([0, 1, 1, 0])
+    >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),
+    ...                   n_draws=2, rng=42)
+    array([1, 1, 0, 0])
+    """
+    rng = check_random_state(rng)
+    # this computes a bad approximation to the mode of the
+    # multivariate hypergeometric given by class_counts and n_draws
+    continuous = class_counts / class_counts.sum() * n_draws
+    # floored means we don't overshoot n_samples, but probably undershoot
+    floored = np.floor(continuous)
+    # we add samples according to how much "left over" probability
+    # they had, until we arrive at n_samples
+    need_to_add = int(n_draws - floored.sum())
+    if need_to_add > 0:
+        remainder = continuous - floored
+        values = np.sort(np.unique(remainder))[::-1]
+        # add according to remainder, but break ties
+        # randomly to avoid biases
+        for value in values:
+            (inds,) = np.where(remainder == value)
+            # if we need_to_add less than what's in inds
+            # we draw randomly from them.
+            # if we need to add more, we add them all and
+            # go to the next value
+            add_now = min(len(inds), need_to_add)
+            inds = rng.choice(inds, size=add_now, replace=False)
+            floored[inds] += 1
+            need_to_add -= add_now
+            if need_to_add == 0:
+                break
+    return floored.astype(int)
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index fc2eab70f007b..c167a7e9d8f59 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -22,6 +22,7 @@
     skip_if_32bit,
 )
 from sklearn.utils.extmath import (
+    _approximate_mode,
     _deterministic_vector_sign_flip,
     _incremental_mean_and_var,
     _randomized_eigsh,
@@ -1062,3 +1063,20 @@ def test_safe_sparse_dot_dense_output(dense_output):
     if dense_output:
         expected = expected.toarray()
     assert_allclose_dense_sparse(actual, expected)
+
+
+def test_approximate_mode():
+    """Make sure sklearn.utils.extmath._approximate_mode returns valid
+    results for cases where "class_counts * n_draws" is enough
+    to overflow 32-bit signed integer.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/20774
+    """
+    X = np.array([99000, 1000], dtype=np.int32)
+    ret = _approximate_mode(class_counts=X, n_draws=25000, rng=0)
+
+    # Draws 25% of the total population, so in this case a fair draw means:
+    # 25% * 99.000 = 24.750
+    # 25% *  1.000 =    250
+    assert_array_equal(ret, [24750, 250])
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 20b3fdc600a8c..f3d9f81b372ec 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -11,7 +11,6 @@
 from sklearn import config_context
 from sklearn.externals._packaging.version import parse as parse_version
 from sklearn.utils import (
-    _approximate_mode,
     _determine_key_type,
     _get_column_indices,
     _is_polars_df,
@@ -706,23 +705,6 @@ def test_is_scalar_nan(value, result):
     assert isinstance(is_scalar_nan(value), bool)
 
 
-def test_approximate_mode():
-    """Make sure sklearn.utils._approximate_mode returns valid
-    results for cases where "class_counts * n_draws" is enough
-    to overflow 32-bit signed integer.
-
-    Non-regression test for:
-    https://github.com/scikit-learn/scikit-learn/issues/20774
-    """
-    X = np.array([99000, 1000], dtype=np.int32)
-    ret = _approximate_mode(class_counts=X, n_draws=25000, rng=0)
-
-    # Draws 25% of the total population, so in this case a fair draw means:
-    # 25% * 99.000 = 24.750
-    # 25% *  1.000 =    250
-    assert_array_equal(ret, [24750, 250])
-
-
 def dummy_func():
     pass