diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index 26359f963963d..cc7831f094f61 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -112,6 +112,7 @@ base estimator also does:
 Metrics
 -------
 
+- :func:`sklearn.metrics.cluster.entropy`
 - :func:`sklearn.metrics.accuracy_score`
 - :func:`sklearn.metrics.d2_tweedie_score`
 - :func:`sklearn.metrics.max_error`
diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
index 7f96f169ad638..fd43347cf7ac8 100644
--- a/doc/whats_new/v1.6.rst
+++ b/doc/whats_new/v1.6.rst
@@ -32,6 +32,7 @@ See :ref:`array_api` for more details.
 
 **Functions:**
 
+- :func:`sklearn.metrics.cluster.entropy` :pr:`29141` by :user:`Yaroslav Korobko <Tialo>`;
 - :func:`sklearn.metrics.d2_tweedie_score` :pr:`29207` by :user:`Emily Chen <EmilyXinyi>`;
 - :func:`sklearn.metrics.max_error` :pr:`29212` by :user:`Edoardo Abati <EdAbati>`;
 - :func:`sklearn.metrics.mean_absolute_error` :pr:`27736` by :user:`Edoardo Abati <EdAbati>`;
diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index 8e178e21a55ec..208f2732d06a1 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -23,6 +23,7 @@
 import numpy as np
 from scipy import sparse as sp
 
+from ...utils._array_api import get_namespace
 from ...utils._param_validation import Interval, StrOptions, validate_params
 from ...utils.multiclass import type_of_target
 from ...utils.validation import check_array, check_consistent_length
@@ -1282,17 +1283,20 @@ def entropy(labels):
     -----
     The logarithm used is the natural logarithm (base-e).
     """
-    if len(labels) == 0:
+    xp, is_array_api_compliant = get_namespace(labels)
+    labels_len = labels.shape[0] if is_array_api_compliant else len(labels)
+    if labels_len == 0:
         return 1.0
-    label_idx = np.unique(labels, return_inverse=True)[1]
-    pi = np.bincount(label_idx).astype(np.float64)
-    pi = pi[pi > 0]
+
+    pi = xp.astype(xp.unique_counts(labels)[1], xp.float64)
 
     # single cluster => zero entropy
     if pi.size == 1:
         return 0.0
 
-    pi_sum = np.sum(pi)
+    pi_sum = xp.sum(pi)
     # log(a / b) should be calculated as log(a) - log(b) for
     # possible loss of precision
-    return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))
+    # Always convert the result as a Python scalar (on CPU) instead of a device
+    # specific scalar array.
+    return float(-xp.sum((pi / pi_sum) * (xp.log(pi) - log(pi_sum))))
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
index dfaa58ff62c01..077dca0854a01 100644
--- a/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -4,6 +4,7 @@
 import pytest
 from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal
 
+from sklearn.base import config_context
 from sklearn.metrics.cluster import (
     adjusted_mutual_info_score,
     adjusted_rand_score,
@@ -22,7 +23,8 @@
 )
 from sklearn.metrics.cluster._supervised import _generalized_average, check_clusterings
 from sklearn.utils import assert_all_finite
-from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
+from sklearn.utils._testing import _array_api_for_tests, assert_almost_equal
 
 score_funcs = [
     adjusted_rand_score,
@@ -254,12 +256,25 @@ def test_int_overflow_mutual_info_fowlkes_mallows_score():
 
 
 def test_entropy():
-    ent = entropy([0, 0, 42.0])
-    assert_almost_equal(ent, 0.6365141, 5)
+    assert_almost_equal(entropy([0, 0, 42.0]), 0.6365141, 5)
     assert_almost_equal(entropy([]), 1)
     assert entropy([1, 1, 1, 1]) == 0
 
 
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
+)
+def test_entropy_array_api(array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+    float_labels = xp.asarray(np.asarray([0, 0, 42.0], dtype=dtype_name), device=device)
+    empty_int32_labels = xp.asarray([], dtype=xp.int32, device=device)
+    int_labels = xp.asarray([1, 1, 1, 1], device=device)
+    with config_context(array_api_dispatch=True):
+        assert entropy(float_labels) == pytest.approx(0.6365141, abs=1e-5)
+        assert entropy(empty_int32_labels) == 1
+        assert entropy(int_labels) == 0
+
+
 def test_contingency_matrix():
     labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
     labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 7bf9183c80772..c222e26fcc82c 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -250,7 +250,7 @@ def supported_float_dtypes(xp):
 def ensure_common_namespace_device(reference, *arrays):
     """Ensure that all arrays use the same namespace and device as reference.
 
-    If neccessary the arrays are moved to the same namespace and device as
+    If necessary the arrays are moved to the same namespace and device as
     the reference array.
 
     Parameters