From 4fec4d5c9b610a7aedfa056e183779c9b6c51747 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 10 Nov 2022 17:04:50 +0100
Subject: [PATCH 1/4] MAINT refactor affinity_propagation to make a private
 function without validation

---
 sklearn/cluster/_affinity_propagation.py    | 92 ++++++++++++++++++---
 sklearn/linear_model/_coordinate_descent.py |  2 +
 2 files changed, 82 insertions(+), 12 deletions(-)

diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
index 07443d65f0ec4..6d806ff71e0f9 100644
--- a/sklearn/cluster/_affinity_propagation.py
+++ b/sklearn/cluster/_affinity_propagation.py
@@ -52,7 +52,6 @@ def affinity_propagation(
 
     Parameters
     ----------
-
     S : array-like of shape (n_samples, n_samples)
         Matrix of similarities between points.
 
@@ -95,7 +94,6 @@ def affinity_propagation(
 
     Returns
     -------
-
     cluster_centers_indices : ndarray of shape (n_clusters,)
         Index of clusters centers.
 
@@ -128,16 +126,81 @@ def affinity_propagation(
     Between Data Points", Science Feb. 2007
     """
     S = as_float_array(S, copy=copy)
-    n_samples = S.shape[0]
 
     if S.shape[0] != S.shape[1]:
-        raise ValueError("S must be a square array (shape=%s)" % repr(S.shape))
+        raise ValueError(f"S must be a square array (shape={S.shape})")
 
     if preference is None:
         preference = np.median(S)
+    preference = np.array(preference, copy=False)
+
+    random_state = check_random_state(random_state)
+
+    return _affinity_propagation(
+        S,
+        preference=preference,
+        convergence_iter=convergence_iter,
+        max_iter=max_iter,
+        damping=damping,
+        verbose=verbose,
+        return_n_iter=return_n_iter,
+        random_state=random_state,
+    )
 
-    preference = np.array(preference)
 
+def _affinity_propagation(
+    S,
+    *,
+    preference,
+    convergence_iter,
+    max_iter,
+    damping,
+    verbose,
+    return_n_iter,
+    random_state,
+):
+    """Same function than `affinity_propagation` but without input validation.
+
+    Parameters
+    ----------
+    S : array-like of shape (n_samples, n_samples), dtype={np.float32, np.float64}
+        Matrix of similarities between points.
+
+    preference : ndarray of shape (n_samples,) or (1,)
+        Preferences for each point.
+
+    convergence_iter : int
+        Number of iterations with no change in the number of estimated clusters
+        that stops the convergence.
+
+    max_iter : int
+        Maximum number of iterations.
+
+    damping : float
+        Damping factor between 0.5 and 1.
+
+    verbose : bool, default=False
+        The verbosity level.
+
+    return_n_iter : bool
+        Whether or not to return the number of iterations.
+
+    random_state : RandomState instance
+        Pseudo-random number generator to control the starting state.
+
+    Returns
+    -------
+    cluster_centers_indices : ndarray of shape (n_clusters,)
+        Index of clusters centers.
+
+    labels : ndarray of shape (n_samples,)
+        Cluster labels for each point.
+
+    n_iter : int
+        Number of iterations run. Returned only if `return_n_iter` is
+        set to True.
+    """
+    n_samples = S.shape[0]
     if n_samples == 1 or _equal_similarities_and_preferences(S, preference):
         # It makes no sense to run the algorithm in this case, so return 1 or
         # n_samples clusters, depending on preferences
@@ -158,8 +221,6 @@ def affinity_propagation(
                 else (np.array([0]), np.array([0] * n_samples))
             )
 
-    random_state = check_random_state(random_state)
-
     # Place preference on the diagonal of S
     S.flat[:: (n_samples + 1)] = preference
 
@@ -472,24 +533,31 @@ def fit(self, X, y=None):
             accept_sparse = "csr"
         X = self._validate_data(X, accept_sparse=accept_sparse)
         if self.affinity == "precomputed":
-            self.affinity_matrix_ = X
+            self.affinity_matrix_ = X.copy() if self.copy else X
         else:  # self.affinity == "euclidean"
             self.affinity_matrix_ = -euclidean_distances(X, squared=True)
 
+        if self.preference is None:
+            preference = np.median(self.affinity_matrix_)
+        else:
+            preference = self.preference
+        preference = np.array(preference, copy=False)
+
+        random_state = check_random_state(self.random_state)
+
         (
             self.cluster_centers_indices_,
             self.labels_,
             self.n_iter_,
-        ) = affinity_propagation(
+        ) = _affinity_propagation(
             self.affinity_matrix_,
-            preference=self.preference,
+            preference=preference,
             max_iter=self.max_iter,
             convergence_iter=self.convergence_iter,
             damping=self.damping,
-            copy=self.copy,
             verbose=self.verbose,
             return_n_iter=True,
-            random_state=self.random_state,
+            random_state=random_state,
         )
 
         if self.affinity != "precomputed":
diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
index bb9a4e4c0c326..0b8e9eba8e585 100644
--- a/sklearn/linear_model/_coordinate_descent.py
+++ b/sklearn/linear_model/_coordinate_descent.py
@@ -631,6 +631,8 @@ def enet_path(
                 positive,
             )
         elif precompute is False:
+            if l1_reg < 0.1:
+                print(coef_[:5])
             model = cd_fast.enet_coordinate_descent(
                 coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
             )

From fd3f9e6e63866f7e47541042c9eb8432169b3833 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 10 Nov 2022 17:23:04 +0100
Subject: [PATCH 2/4] Bunch overengineering

---
 sklearn/cluster/_affinity_propagation.py      | 263 +++++++++---------
 .../tests/test_affinity_propagation.py        |   2 +-
 2 files changed, 139 insertions(+), 126 deletions(-)

diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
index 6d806ff71e0f9..188d9be1e684c 100644
--- a/sklearn/cluster/_affinity_propagation.py
+++ b/sklearn/cluster/_affinity_propagation.py
@@ -12,7 +12,7 @@
 
 from ..exceptions import ConvergenceWarning
 from ..base import BaseEstimator, ClusterMixin
-from ..utils import as_float_array, check_random_state
+from ..utils import Bunch, as_float_array, check_random_state
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.validation import check_is_fitted
 from ..metrics import euclidean_distances
@@ -34,120 +34,6 @@ def all_equal_similarities():
     return all_equal_preferences() and all_equal_similarities()
 
 
-def affinity_propagation(
-    S,
-    *,
-    preference=None,
-    convergence_iter=15,
-    max_iter=200,
-    damping=0.5,
-    copy=True,
-    verbose=False,
-    return_n_iter=False,
-    random_state=None,
-):
-    """Perform Affinity Propagation Clustering of data.
-
-    Read more in the :ref:`User Guide <affinity_propagation>`.
-
-    Parameters
-    ----------
-    S : array-like of shape (n_samples, n_samples)
-        Matrix of similarities between points.
-
-    preference : array-like of shape (n_samples,) or float, default=None
-        Preferences for each point - points with larger values of
-        preferences are more likely to be chosen as exemplars. The number of
-        exemplars, i.e. of clusters, is influenced by the input preferences
-        value. If the preferences are not passed as arguments, they will be
-        set to the median of the input similarities (resulting in a moderate
-        number of clusters). For a smaller amount of clusters, this can be set
-        to the minimum value of the similarities.
-
-    convergence_iter : int, default=15
-        Number of iterations with no change in the number
-        of estimated clusters that stops the convergence.
-
-    max_iter : int, default=200
-        Maximum number of iterations.
-
-    damping : float, default=0.5
-        Damping factor between 0.5 and 1.
-
-    copy : bool, default=True
-        If copy is False, the affinity matrix is modified inplace by the
-        algorithm, for memory efficiency.
-
-    verbose : bool, default=False
-        The verbosity level.
-
-    return_n_iter : bool, default=False
-        Whether or not to return the number of iterations.
-
-    random_state : int, RandomState instance or None, default=None
-        Pseudo-random number generator to control the starting state.
-        Use an int for reproducible results across function calls.
-        See the :term:`Glossary <random_state>`.
-
-        .. versionadded:: 0.23
-            this parameter was previously hardcoded as 0.
-
-    Returns
-    -------
-    cluster_centers_indices : ndarray of shape (n_clusters,)
-        Index of clusters centers.
-
-    labels : ndarray of shape (n_samples,)
-        Cluster labels for each point.
-
-    n_iter : int
-        Number of iterations run. Returned only if `return_n_iter` is
-        set to True.
-
-    Notes
-    -----
-    For an example, see :ref:`examples/cluster/plot_affinity_propagation.py
-    <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.
-
-    When the algorithm does not converge, it will still return a arrays of
-    ``cluster_center_indices`` and labels if there are any exemplars/clusters,
-    however they may be degenerate and should be used with caution.
-
-    When all training samples have equal similarities and equal preferences,
-    the assignment of cluster centers and labels depends on the preference.
-    If the preference is smaller than the similarities, a single cluster center
-    and label ``0`` for every sample will be returned. Otherwise, every
-    training sample becomes its own cluster center and is assigned a unique
-    label.
-
-    References
-    ----------
-    Brendan J. Frey and Delbert Dueck, "Clustering by Passing Messages
-    Between Data Points", Science Feb. 2007
-    """
-    S = as_float_array(S, copy=copy)
-
-    if S.shape[0] != S.shape[1]:
-        raise ValueError(f"S must be a square array (shape={S.shape})")
-
-    if preference is None:
-        preference = np.median(S)
-    preference = np.array(preference, copy=False)
-
-    random_state = check_random_state(random_state)
-
-    return _affinity_propagation(
-        S,
-        preference=preference,
-        convergence_iter=convergence_iter,
-        max_iter=max_iter,
-        damping=damping,
-        verbose=verbose,
-        return_n_iter=return_n_iter,
-        random_state=random_state,
-    )
-
-
 def _affinity_propagation(
     S,
     *,
@@ -328,7 +214,133 @@ def _affinity_propagation(
         return cluster_centers_indices, labels
 
 
-###############################################################################
+def _validate_init_common_params(params):
+    """Validate common parameters for init methods and public function."""
+    if params.S.shape[0] != params.S.shape[1]:
+        raise ValueError(
+            f"The matrix of similarities must be a square array. Got {params.S.shape} "
+            "instead."
+        )
+
+    if params.preference is None:
+        preference = np.median(params.S)
+    else:
+        preference = params.preference
+    preference = np.array(preference, copy=False)
+
+    random_state = check_random_state(params.random_state)
+    return {
+        "preference": preference,
+        "random_state": random_state,
+    }
+
+
+def affinity_propagation(
+    S,
+    *,
+    preference=None,
+    convergence_iter=15,
+    max_iter=200,
+    damping=0.5,
+    copy=True,
+    verbose=False,
+    return_n_iter=False,
+    random_state=None,
+):
+    """Perform Affinity Propagation Clustering of data.
+
+    Read more in the :ref:`User Guide <affinity_propagation>`.
+
+    Parameters
+    ----------
+    S : array-like of shape (n_samples, n_samples)
+        Matrix of similarities between points.
+
+    preference : array-like of shape (n_samples,) or float, default=None
+        Preferences for each point - points with larger values of
+        preferences are more likely to be chosen as exemplars. The number of
+        exemplars, i.e. of clusters, is influenced by the input preferences
+        value. If the preferences are not passed as arguments, they will be
+        set to the median of the input similarities (resulting in a moderate
+        number of clusters). For a smaller amount of clusters, this can be set
+        to the minimum value of the similarities.
+
+    convergence_iter : int, default=15
+        Number of iterations with no change in the number
+        of estimated clusters that stops the convergence.
+
+    max_iter : int, default=200
+        Maximum number of iterations.
+
+    damping : float, default=0.5
+        Damping factor between 0.5 and 1.
+
+    copy : bool, default=True
+        If copy is False, the affinity matrix is modified inplace by the
+        algorithm, for memory efficiency.
+
+    verbose : bool, default=False
+        The verbosity level.
+
+    return_n_iter : bool, default=False
+        Whether or not to return the number of iterations.
+
+    random_state : int, RandomState instance or None, default=None
+        Pseudo-random number generator to control the starting state.
+        Use an int for reproducible results across function calls.
+        See the :term:`Glossary <random_state>`.
+
+        .. versionadded:: 0.23
+            this parameter was previously hardcoded as 0.
+
+    Returns
+    -------
+    cluster_centers_indices : ndarray of shape (n_clusters,)
+        Index of clusters centers.
+
+    labels : ndarray of shape (n_samples,)
+        Cluster labels for each point.
+
+    n_iter : int
+        Number of iterations run. Returned only if `return_n_iter` is
+        set to True.
+
+    Notes
+    -----
+    For an example, see :ref:`examples/cluster/plot_affinity_propagation.py
+    <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.
+
+    When the algorithm does not converge, it will still return a arrays of
+    ``cluster_center_indices`` and labels if there are any exemplars/clusters,
+    however they may be degenerate and should be used with caution.
+
+    When all training samples have equal similarities and equal preferences,
+    the assignment of cluster centers and labels depends on the preference.
+    If the preference is smaller than the similarities, a single cluster center
+    and label ``0`` for every sample will be returned. Otherwise, every
+    training sample becomes its own cluster center and is assigned a unique
+    label.
+
+    References
+    ----------
+    Brendan J. Frey and Delbert Dueck, "Clustering by Passing Messages
+    Between Data Points", Science Feb. 2007
+    """
+    S = as_float_array(S, copy=copy)
+
+    params = _validate_init_common_params(
+        Bunch(**{"S": S, "preference": preference, "random_state": random_state})
+    )
+
+    return _affinity_propagation(
+        S,
+        convergence_iter=convergence_iter,
+        max_iter=max_iter,
+        damping=damping,
+        verbose=verbose,
+        return_n_iter=return_n_iter,
+        **params,
+    )
 
 
 class AffinityPropagation(ClusterMixin, BaseEstimator):
@@ -537,13 +549,15 @@ def fit(self, X, y=None):
         else:  # self.affinity == "euclidean"
             self.affinity_matrix_ = -euclidean_distances(X, squared=True)
 
-        if self.preference is None:
-            preference = np.median(self.affinity_matrix_)
-        else:
-            preference = self.preference
-        preference = np.array(preference, copy=False)
-
-        random_state = check_random_state(self.random_state)
+        params = _validate_init_common_params(
+            Bunch(
+                **{
+                    "S": self.affinity_matrix_,
+                    "preference": self.preference,
+                    "random_state": self.random_state,
+                }
+            )
+        )
 
         (
             self.cluster_centers_indices_,
@@ -551,13 +565,12 @@ def fit(self, X, y=None):
             self.n_iter_,
         ) = _affinity_propagation(
             self.affinity_matrix_,
-            preference=preference,
             max_iter=self.max_iter,
             convergence_iter=self.convergence_iter,
             damping=self.damping,
             verbose=self.verbose,
             return_n_iter=True,
-            random_state=random_state,
+            **params,
         )
 
         if self.affinity != "precomputed":
diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py
index cc696620a0e4d..52007c375f667 100644
--- a/sklearn/cluster/tests/test_affinity_propagation.py
+++ b/sklearn/cluster/tests/test_affinity_propagation.py
@@ -101,7 +101,7 @@ def test_affinity_propagation_no_copy():
 def test_affinity_propagation_affinity_shape():
     """Check the shape of the affinity matrix when using `affinity_propagation."""
     S = -euclidean_distances(X, squared=True)
-    err_msg = "S must be a square array"
+    err_msg = "The matrix of similarities must be a square array"
     with pytest.raises(ValueError, match=err_msg):
         affinity_propagation(S[:, :-1])
 

From 0cbf2b4a6222ee2974041d10e08ef56e845fa398 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 10 Nov 2022 17:26:46 +0100
Subject: [PATCH 3/4] remove debug

---
 sklearn/linear_model/_coordinate_descent.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
index 0b8e9eba8e585..bb9a4e4c0c326 100644
--- a/sklearn/linear_model/_coordinate_descent.py
+++ b/sklearn/linear_model/_coordinate_descent.py
@@ -631,8 +631,6 @@ def enet_path(
                 positive,
             )
         elif precompute is False:
-            if l1_reg < 0.1:
-                print(coef_[:5])
             model = cd_fast.enet_coordinate_descent(
                 coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
             )

From f12ba4a95f73b82026929b60936aa89a14d39a09 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 10 Nov 2022 18:02:47 +0100
Subject: [PATCH 4/4] make the function call the class

---
 sklearn/cluster/_affinity_propagation.py | 111 ++++++-----------------
 1 file changed, 30 insertions(+), 81 deletions(-)

diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
index 188d9be1e684c..180e37996aa07 100644
--- a/sklearn/cluster/_affinity_propagation.py
+++ b/sklearn/cluster/_affinity_propagation.py
@@ -12,7 +12,7 @@
 
 from ..exceptions import ConvergenceWarning
 from ..base import BaseEstimator, ClusterMixin
-from ..utils import Bunch, as_float_array, check_random_state
+from ..utils import as_float_array, check_random_state
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.validation import check_is_fitted
 from ..metrics import euclidean_distances
@@ -45,47 +45,7 @@ def _affinity_propagation(
     return_n_iter,
     random_state,
 ):
-    """Same function than `affinity_propagation` but without input validation.
-
-    Parameters
-    ----------
-    S : array-like of shape (n_samples, n_samples), dtype={np.float32, np.float64}
-        Matrix of similarities between points.
-
-    preference : ndarray of shape (n_samples,) or (1,)
-        Preferences for each point.
-
-    convergence_iter : int
-        Number of iterations with no change in the number of estimated clusters
-        that stops the convergence.
-
-    max_iter : int
-        Maximum number of iterations.
-
-    damping : float
-        Damping factor between 0.5 and 1.
-
-    verbose : bool, default=False
-        The verbosity level.
-
-    return_n_iter : bool
-        Whether or not to return the number of iterations.
-
-    random_state : RandomState instance
-        Pseudo-random number generator to control the starting state.
-
-    Returns
-    -------
-    cluster_centers_indices : ndarray of shape (n_clusters,)
-        Index of clusters centers.
-
-    labels : ndarray of shape (n_samples,)
-        Cluster labels for each point.
-
-    n_iter : int
-        Number of iterations run. Returned only if `return_n_iter` is
-        set to True.
-    """
+    """Main affinity propagation algorithm."""
     n_samples = S.shape[0]
     if n_samples == 1 or _equal_similarities_and_preferences(S, preference):
         # It makes no sense to run the algorithm in this case, so return 1 or
@@ -214,25 +174,8 @@ def _affinity_propagation(
         return cluster_centers_indices, labels
 
 
-def _validate_init_common_params(params):
-    """Validate common parameters for init methods and public function."""
-    if params.S.shape[0] != params.S.shape[1]:
-        raise ValueError(
-            f"The matrix of similarities must be a square array. Got {params.S.shape} "
-            "instead."
-        )
-
-    if params.preference is None:
-        preference = np.median(params.S)
-    else:
-        preference = params.preference
-    preference = np.array(preference, copy=False)
-
-    random_state = check_random_state(params.random_state)
-    return {
-        "preference": preference,
-        "random_state": random_state,
-    }
+###############################################################################
+# Public API
 
 
 def affinity_propagation(
@@ -328,19 +271,20 @@ def affinity_propagation(
     """
     S = as_float_array(S, copy=copy)
 
-    params = _validate_init_common_params(
-        Bunch(**{"S": S, "preference": preference, "random_state": random_state})
-    )
-
-    return _affinity_propagation(
-        S,
-        convergence_iter=convergence_iter,
-        max_iter=max_iter,
+    estimator = AffinityPropagation(
         damping=damping,
+        max_iter=max_iter,
+        convergence_iter=convergence_iter,
+        copy=False,
+        preference=preference,
+        affinity="precomputed",
         verbose=verbose,
-        return_n_iter=return_n_iter,
-        **params,
-    )
+        random_state=random_state,
+    ).fit(S)
+
+    if return_n_iter:
+        return estimator.cluster_centers_indices_, estimator.labels_, estimator.n_iter_
+    return estimator.cluster_centers_indices_, estimator.labels_
 
 
 class AffinityPropagation(ClusterMixin, BaseEstimator):
@@ -549,15 +493,19 @@ def fit(self, X, y=None):
         else:  # self.affinity == "euclidean"
             self.affinity_matrix_ = -euclidean_distances(X, squared=True)
 
-        params = _validate_init_common_params(
-            Bunch(
-                **{
-                    "S": self.affinity_matrix_,
-                    "preference": self.preference,
-                    "random_state": self.random_state,
-                }
+        if self.affinity_matrix_.shape[0] != self.affinity_matrix_.shape[1]:
+            raise ValueError(
+                "The matrix of similarities must be a square array. "
+                f"Got {self.affinity_matrix_.shape} instead."
             )
-        )
+
+        if self.preference is None:
+            preference = np.median(self.affinity_matrix_)
+        else:
+            preference = self.preference
+        preference = np.array(preference, copy=False)
+
+        random_state = check_random_state(self.random_state)
 
         (
             self.cluster_centers_indices_,
@@ -567,10 +515,11 @@ def fit(self, X, y=None):
             self.affinity_matrix_,
             max_iter=self.max_iter,
             convergence_iter=self.convergence_iter,
+            preference=preference,
             damping=self.damping,
             verbose=self.verbose,
             return_n_iter=True,
-            **params,
+            random_state=random_state,
         )
 
         if self.affinity != "precomputed":