From 3034d2e570f956d234c1ceeda515f45394b5fc6b Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Sat, 25 May 2024 02:04:52 +0300
Subject: [PATCH 01/12] array-api for entropy

---
 sklearn/metrics/cluster/_supervised.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index 1f72eae3725f6..100bd15800e25 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -23,6 +23,7 @@
 import numpy as np
 from scipy import sparse as sp
 
+from ...utils._array_api import get_namespace
 from ...utils._param_validation import Interval, StrOptions, validate_params
 from ...utils.multiclass import type_of_target
 from ...utils.validation import check_array, check_consistent_length
@@ -1282,17 +1283,27 @@ def entropy(labels):
     -----
     The logarithm used is the natural logarithm (base-e).
     """
-    if len(labels) == 0:
-        return 1.0
-    label_idx = np.unique(labels, return_inverse=True)[1]
-    pi = np.bincount(label_idx).astype(np.float64)
-    pi = pi[pi > 0]
+    if isinstance(labels, list):
+        xp, is_array_api_compliant = np, False
+    else:
+        xp, is_array_api_compliant = get_namespace(labels)
+
+    shape_0 = labels.shape[0] if is_array_api_compliant else len(labels)
+    if shape_0 == 0:
+        return 0.0
+
+    if is_array_api_compliant:
+        unique_counts = xp.unique_counts(labels)
+    else:
+        unique_counts = np.unique(labels, return_counts=True)
+
+    pi = xp.asarray(unique_counts[1], dtype=xp.float64)
 
     # single cluster => zero entropy
     if pi.size == 1:
         return 0.0
 
-    pi_sum = np.sum(pi)
+    pi_sum = xp.sum(pi)
     # log(a / b) should be calculated as log(a) - log(b) for
     # possible loss of precision
-    return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))
+    return -xp.sum((pi / pi_sum) * (xp.log(pi) - log(pi_sum)))

From 149487c429a01813545245877f1b828218fcfef4 Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Thu, 30 May 2024 22:47:31 +0300
Subject: [PATCH 02/12] iter

---
 sklearn/metrics/cluster/_supervised.py        | 22 +++++--------------
 .../metrics/cluster/tests/test_supervised.py  | 21 +++++++++++++++---
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index 100bd15800e25..f32e356fa6c14 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -23,7 +23,6 @@
 import numpy as np
 from scipy import sparse as sp
 
-from ...utils._array_api import get_namespace
 from ...utils._param_validation import Interval, StrOptions, validate_params
 from ...utils.multiclass import type_of_target
 from ...utils.validation import check_array, check_consistent_length
@@ -1283,27 +1282,18 @@ def entropy(labels):
     -----
     The logarithm used is the natural logarithm (base-e).
     """
-    if isinstance(labels, list):
-        xp, is_array_api_compliant = np, False
-    else:
-        xp, is_array_api_compliant = get_namespace(labels)
-
-    shape_0 = labels.shape[0] if is_array_api_compliant else len(labels)
-    if shape_0 == 0:
+    labels = np.asarray(labels)
+    if labels.shape[0] == 0:
         return 0.0
 
-    if is_array_api_compliant:
-        unique_counts = xp.unique_counts(labels)
-    else:
-        unique_counts = np.unique(labels, return_counts=True)
-
-    pi = xp.asarray(unique_counts[1], dtype=xp.float64)
+    pi = np.unique(labels, return_counts=True)[1]
+    pi = pi.astype(np.float64)
 
     # single cluster => zero entropy
     if pi.size == 1:
         return 0.0
 
-    pi_sum = xp.sum(pi)
+    pi_sum = np.sum(pi)
     # log(a / b) should be calculated as log(a) - log(b) for
     # possible loss of precision
-    return -xp.sum((pi / pi_sum) * (xp.log(pi) - log(pi_sum)))
+    return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
index dfaa58ff62c01..23b6f9f20732b 100644
--- a/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -4,6 +4,7 @@
 import pytest
 from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal
 
+from sklearn.base import config_context
 from sklearn.metrics.cluster import (
     adjusted_mutual_info_score,
     adjusted_rand_score,
@@ -22,7 +23,8 @@
 )
 from sklearn.metrics.cluster._supervised import _generalized_average, check_clusterings
 from sklearn.utils import assert_all_finite
-from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
+from sklearn.utils._testing import _array_api_for_tests, assert_almost_equal
 
 score_funcs = [
     adjusted_rand_score,
@@ -254,12 +256,25 @@ def test_int_overflow_mutual_info_fowlkes_mallows_score():
 
 
 def test_entropy():
-    ent = entropy([0, 0, 42.0])
-    assert_almost_equal(ent, 0.6365141, 5)
+    assert_almost_equal(entropy([0, 0, 42.0]), 0.6365141, 5)
     assert_almost_equal(entropy([]), 1)
     assert entropy([1, 1, 1, 1]) == 0
 
 
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
+)
+def test_entropy_array_api(array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+    labels1 = xp.asarray([0, 0, 42.0])
+    labels2 = xp.asarray([])
+    labels3 = xp.asarray([1, 1, 1, 1])
+    with config_context(array_api_dispatch=True):
+        assert_almost_equal(entropy(labels1), 0.6365141, 5)
+        assert entropy(labels2) == 1
+        assert entropy(labels3) == 0
+
+
 def test_contingency_matrix():
     labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
     labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])

From 76f574fa64d78942654e26c52d013ab29c098b18 Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Thu, 30 May 2024 22:53:46 +0300
Subject: [PATCH 03/12] whatsnew

---
 doc/whats_new/v1.6.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
index bc27f894ff9a1..d09ec62bedfe3 100644
--- a/doc/whats_new/v1.6.rst
+++ b/doc/whats_new/v1.6.rst
@@ -36,7 +36,8 @@ See :ref:`array_api` for more details.
   inputs.
   :pr:`28106` by :user:`Thomas Li <lithomas1>`;
 - :func:`sklearn.metrics.mean_absolute_error` :pr:`27736` by :user:`Edoardo Abati <EdAbati>`;
-- :func:`sklearn.metrics.pairwise.cosine_similarity` :pr:`29014` by :user:`Edoardo Abati <EdAbati>`.
+- :func:`sklearn.metrics.pairwise.cosine_similarity` :pr:`29014` by :user:`Edoardo Abati <EdAbati>`;
+- :func:`sklearn.metrics.cluster.entropy` :pr:`29141` by :user:`Yaroslav Korobko <Tialo>`.
 
 
 **Classes:**

From ed3ca4ff8703b84839883667f7bda311fadbb644 Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Thu, 30 May 2024 23:00:41 +0300
Subject: [PATCH 04/12] dont convert to numpy

---
 sklearn/metrics/cluster/_supervised.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index f32e356fa6c14..8a7a574cb631e 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -23,6 +23,7 @@
 import numpy as np
 from scipy import sparse as sp
 
+from ...utils._array_api import get_namespace
 from ...utils._param_validation import Interval, StrOptions, validate_params
 from ...utils.multiclass import type_of_target
 from ...utils.validation import check_array, check_consistent_length
@@ -1282,9 +1283,10 @@ def entropy(labels):
     -----
     The logarithm used is the natural logarithm (base-e).
     """
-    labels = np.asarray(labels)
-    if labels.shape[0] == 0:
-        return 0.0
+    xp, is_array_api_compliant = get_namespace(labels, remove_types=(list,))
+    labels_len = labels.shape[0] if is_array_api_compliant else len(labels)
+    if labels_len == 0:
+        return 1.0
 
     pi = np.unique(labels, return_counts=True)[1]
     pi = pi.astype(np.float64)

From 0eb7015085f45c4bd6f707d3a00fcdaf6d656fa2 Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Fri, 31 May 2024 02:17:26 +0300
Subject: [PATCH 05/12] remove remove_types

---
 sklearn/metrics/cluster/_supervised.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index 8a7a574cb631e..cc7f105288cc5 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -1283,7 +1283,7 @@ def entropy(labels):
     -----
     The logarithm used is the natural logarithm (base-e).
     """
-    xp, is_array_api_compliant = get_namespace(labels, remove_types=(list,))
+    xp, is_array_api_compliant = get_namespace(labels)
     labels_len = labels.shape[0] if is_array_api_compliant else len(labels)
     if labels_len == 0:
         return 1.0

From 616426c66a3bcab88aa0334f351d4f8a2e83170c Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Fri, 31 May 2024 02:20:37 +0300
Subject: [PATCH 06/12] underscore unused variable

---
 sklearn/metrics/cluster/_supervised.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index cc7f105288cc5..9282644d6e676 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -1283,7 +1283,7 @@ def entropy(labels):
     -----
     The logarithm used is the natural logarithm (base-e).
     """
-    xp, is_array_api_compliant = get_namespace(labels)
+    _, is_array_api_compliant = get_namespace(labels)
     labels_len = labels.shape[0] if is_array_api_compliant else len(labels)
     if labels_len == 0:
         return 1.0

From 8e07021318fc769302884df6684bf0f49668f5d0 Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Thu, 6 Jun 2024 14:53:56 +0300
Subject: [PATCH 07/12] sorted

---
 doc/whats_new/v1.6.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
index 884eb61e991b6..8f80ec556ad64 100644
--- a/doc/whats_new/v1.6.rst
+++ b/doc/whats_new/v1.6.rst
@@ -32,12 +32,10 @@ See :ref:`array_api` for more details.
 
 **Functions:**
 
-- :func:`sklearn.metrics.mean_tweedie_deviance` now supports Array API compatible
-  inputs.
-  :pr:`28106` by :user:`Thomas Li <lithomas1>`;
+- :func:`sklearn.metrics.cluster.entropy` :pr:`29141` by :user:`Yaroslav Korobko <Tialo>`.
 - :func:`sklearn.metrics.mean_absolute_error` :pr:`27736` by :user:`Edoardo Abati <EdAbati>`;
+- :func:`sklearn.metrics.mean_tweedie_deviance` :pr:`28106` by :user:`Thomas Li <lithomas1>`;
 - :func:`sklearn.metrics.pairwise.cosine_similarity` :pr:`29014` by :user:`Edoardo Abati <EdAbati>`;
-- :func:`sklearn.metrics.cluster.entropy` :pr:`29141` by :user:`Yaroslav Korobko <Tialo>`.
 
 
 **Classes:**

From eaa644b41b1297915e8dc07b9217bf213e63e5ce Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Thu, 6 Jun 2024 18:01:15 +0300
Subject: [PATCH 08/12] do not convert to numpy

---
 sklearn/metrics/cluster/_supervised.py           |  9 ++++-----
 sklearn/metrics/cluster/tests/test_supervised.py | 12 ++++++------
 sklearn/utils/_array_api.py                      |  2 +-
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index 9282644d6e676..727bd3c2af13d 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -1283,19 +1283,18 @@ def entropy(labels):
     -----
     The logarithm used is the natural logarithm (base-e).
     """
-    _, is_array_api_compliant = get_namespace(labels)
+    xp, is_array_api_compliant = get_namespace(labels)
     labels_len = labels.shape[0] if is_array_api_compliant else len(labels)
     if labels_len == 0:
         return 1.0
 
-    pi = np.unique(labels, return_counts=True)[1]
-    pi = pi.astype(np.float64)
+    pi = xp.astype(xp.unique_counts(labels)[1], xp.float64)
 
     # single cluster => zero entropy
     if pi.size == 1:
         return 0.0
 
-    pi_sum = np.sum(pi)
+    pi_sum = xp.sum(pi)
     # log(a / b) should be calculated as log(a) - log(b) for
     # possible loss of precision
-    return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))
+    return -xp.sum((pi / pi_sum) * (xp.log(pi) - log(pi_sum)))
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
index 23b6f9f20732b..3116bf58b5513 100644
--- a/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -266,13 +266,13 @@ def test_entropy():
 )
 def test_entropy_array_api(array_namespace, device, dtype_name):
     xp = _array_api_for_tests(array_namespace, device)
-    labels1 = xp.asarray([0, 0, 42.0])
-    labels2 = xp.asarray([])
-    labels3 = xp.asarray([1, 1, 1, 1])
+    float_labels = xp.asarray([0, 0, 42.0], device=device)
+    empty_int32_labels = xp.asarray([], dtype=xp.int32, device=device)
+    int_labels = xp.asarray([1, 1, 1, 1], device=device)
     with config_context(array_api_dispatch=True):
-        assert_almost_equal(entropy(labels1), 0.6365141, 5)
-        assert entropy(labels2) == 1
-        assert entropy(labels3) == 0
+        assert_almost_equal(entropy(float_labels), 0.6365141, 5)
+        assert entropy(empty_int32_labels) == 1
+        assert entropy(int_labels) == 0
 
 
 def test_contingency_matrix():
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 7bf9183c80772..c222e26fcc82c 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -250,7 +250,7 @@ def supported_float_dtypes(xp):
 def ensure_common_namespace_device(reference, *arrays):
     """Ensure that all arrays use the same namespace and device as reference.
 
-    If neccessary the arrays are moved to the same namespace and device as
+    If necessary the arrays are moved to the same namespace and device as
     the reference array.
 
     Parameters

From f02e736b1144fb54389b33d9f64adf624dd449ad Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Thu, 6 Jun 2024 19:26:36 +0300
Subject: [PATCH 09/12] use dtype_name

---
 sklearn/metrics/cluster/tests/test_supervised.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
index 3116bf58b5513..c45dd9a20858e 100644
--- a/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -266,7 +266,7 @@ def test_entropy():
 )
 def test_entropy_array_api(array_namespace, device, dtype_name):
     xp = _array_api_for_tests(array_namespace, device)
-    float_labels = xp.asarray([0, 0, 42.0], device=device)
+    float_labels = xp.asarray(np.asarray([0, 0, 42.0], dtype=dtype_name), device=device)
     empty_int32_labels = xp.asarray([], dtype=xp.int32, device=device)
     int_labels = xp.asarray([1, 1, 1, 1], device=device)
     with config_context(array_api_dispatch=True):

From a6fe06cd1d5aef31864d4a40fefefb5e90281f57 Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Thu, 6 Jun 2024 19:30:33 +0300
Subject: [PATCH 10/12] array_api.rst

---
 doc/modules/array_api.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index 9b58cde56b09a..e665b4c36d9db 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -101,6 +101,7 @@ Estimators
 Metrics
 -------
 
+- :func:`sklearn.metrics.cluster.entropy`
 - :func:`sklearn.metrics.accuracy_score`
 - :func:`sklearn.metrics.mean_absolute_error`
 - :func:`sklearn.metrics.mean_tweedie_deviance`

From cacf3b4b71bdcfe0e91b5d0c92feeb172ab9b9f2 Mon Sep 17 00:00:00 2001
From: Tialo <65392801+Tialo@users.noreply.github.com>
Date: Fri, 7 Jun 2024 11:54:46 +0300
Subject: [PATCH 11/12] Update sklearn/metrics/cluster/_supervised.py

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/metrics/cluster/_supervised.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index 727bd3c2af13d..9084ee3cd3df1 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -1297,4 +1297,6 @@ def entropy(labels):
     pi_sum = xp.sum(pi)
     # log(a / b) should be calculated as log(a) - log(b) for
     # possible loss of precision
-    return -xp.sum((pi / pi_sum) * (xp.log(pi) - log(pi_sum)))
+    # Always convert the result as a Python scalar (on CPU) instead of a device
+    # specific scalar array.
+    return float(-xp.sum((pi / pi_sum) * (xp.log(pi) - log(pi_sum))))

From bf5c517a3503262c74025ea3faf7fb28d9ec8a9e Mon Sep 17 00:00:00 2001
From: Tialo <tialox@yandex.ru>
Date: Wed, 12 Jun 2024 14:59:22 +0300
Subject: [PATCH 12/12] tests

---
 sklearn/metrics/cluster/tests/test_supervised.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
index c45dd9a20858e..077dca0854a01 100644
--- a/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -270,7 +270,7 @@ def test_entropy_array_api(array_namespace, device, dtype_name):
     empty_int32_labels = xp.asarray([], dtype=xp.int32, device=device)
     int_labels = xp.asarray([1, 1, 1, 1], device=device)
     with config_context(array_api_dispatch=True):
-        assert_almost_equal(entropy(float_labels), 0.6365141, 5)
+        assert entropy(float_labels) == pytest.approx(0.6365141, abs=1e-5)
         assert entropy(empty_int32_labels) == 1
         assert entropy(int_labels) == 0