From 6468cf4a0adbb7d02c46e080ed08c52e9bf47903 Mon Sep 17 00:00:00 2001
From: TMat <timothee.mathieu@inria.fr>
Date: Wed, 13 Oct 2021 10:59:10 +0200
Subject: [PATCH 1/7] add IRLS algo

---
 .../robust/robust_weighted_estimator.py       | 112 ++++++++++++------
 .../tests/test_robust_weighted_estimator.py   |  15 ++-
 2 files changed, 82 insertions(+), 45 deletions(-)

diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py
index a2d7c063..63887b5d 100644
--- a/sklearn_extra/robust/robust_weighted_estimator.py
+++ b/sklearn_extra/robust/robust_weighted_estimator.py
@@ -21,7 +21,7 @@
     check_consistent_length,
 )
 from sklearn.utils.validation import check_is_fitted
-from sklearn.linear_model import SGDRegressor, SGDClassifier
+from sklearn.linear_model import SGDRegressor, SGDClassifier, LinearRegression
 from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
 from sklearn.cluster import MiniBatchKMeans
 from sklearn.metrics.pairwise import euclidean_distances
@@ -111,6 +111,9 @@ class _RobustWeightedEstimator(BaseEstimator):
         If callable, the function is used as loss function ro construct
         the weights.
 
+    solver : {"IRLS", "SGD"}, default="SGD"
+        Algorithm used for the optimization. For now only for regression.
+
     weighting : string, default="huber"
         Weighting scheme used to make the estimator robust.
         Can be 'huber' for huber-type weights or  'mom' for median-of-means
@@ -144,8 +147,7 @@ class _RobustWeightedEstimator(BaseEstimator):
         Can have a big effect on efficiency.
         If None, k is estimated using the number of points distant from the
         median of means of more than 2 times a robust estimate of the scale
-        (using the inter-quartile range), this tends to be conservative
-        (robust).
+        (using the inter-quartile range), this can be unstable.
 
     tol : float or None, (default = 1e-3)
         The stopping criterion. If it is not None, training will stop when
@@ -210,12 +212,13 @@ def __init__(
         self,
         base_estimator,
         loss,
+        solver="SGD",
         weighting="huber",
         max_iter=100,
         burn_in=10,
         eta0=0.1,
         c=None,
-        k=0,
+        k=1,
         tol=1e-5,
         n_iter_no_change=10,
         verbose=0,
@@ -223,11 +226,13 @@ def __init__(
     ):
         self.base_estimator = base_estimator
         self.weighting = weighting
+        self.solver=solver
         self.eta0 = eta0
         self.burn_in = burn_in
         self.c = c
         self.k = k
         self.loss = loss
+        self.solver = solver
         self.max_iter = max_iter
         self.tol = tol
         self.n_iter_no_change = n_iter_no_change
@@ -278,9 +283,9 @@ def fit(self, X, y=None):
 
         if "n_iter_no_change" in parameters:
             base_estimator.set_params(n_iter_no_change=self.n_iter_no_change)
-
-        base_estimator.set_params(random_state=random_state)
-        if self.burn_in > 0:
+        if "random_state" in parameters:
+            base_estimator.set_params(random_state=random_state)
+        if (self.burn_in > 0) and self.solver != 'IRLS':
             learning_rate = base_estimator.learning_rate
             base_estimator.set_params(learning_rate="constant", eta0=self.eta0)
 
@@ -302,8 +307,11 @@ def fit(self, X, y=None):
             # Initialization of the estimator
             # Partial fit for the estimator to be set to "fitted" to be able
             # to predict.
-            base_estimator.partial_fit(X, y)
-            # As the partial fit is here non-robust, override the
+            if self.solver == "SGD":
+                base_estimator.partial_fit(X, y)
+            else:
+                base_estimator.fit(X,y)
+            # As the fit is here non-robust, override the
             # learned coefs.
             base_estimator.coef_ = np.zeros([len(X[0])])
             base_estimator.intercept_ = np.array([0])
@@ -320,7 +328,7 @@ def fit(self, X, y=None):
         # Optimization algorithm
         for epoch in range(self.max_iter):
 
-            if epoch > self.burn_in and self.burn_in > 0:
+            if (epoch > self.burn_in) and (self.burn_in > 0) and (self.solver == "SGD"):
                 # If not in the burn_in phase anymore, change the learning_rate
                 # calibration to the one edicted by self.base_estimator.
                 base_estimator.set_params(learning_rate=learning_rate)
@@ -352,8 +360,6 @@ def fit(self, X, y=None):
             # epoch using the previously computed weights. Also shuffle the data.
             perm = random_state.permutation(len(X))
 
-            base_estimator.partial_fit(X, y, sample_weight=weights)
-
             if (self.tol is not None) and (
                 current_loss > best_loss - self.tol
             ):
@@ -374,9 +380,11 @@ def fit(self, X, y=None):
                     X[perm], y, sample_weight=weights[perm]
                 )
             else:
-                base_estimator.partial_fit(
-                    X[perm], y[perm], sample_weight=weights[perm]
-                )
+                if self.solver == "SGD":
+                    base_estimator.partial_fit(X[perm], y[perm], sample_weight=weights[perm])
+                else:
+                    base_estimator.fit(X[perm], y[perm], sample_weight=weights[perm])
+
             if (self.tol is not None) and (
                 current_loss > best_loss - self.tol
             ):
@@ -483,10 +491,12 @@ def psisx(x):
         elif self.weighting == "mom":
             if self.k is None:
                 med = np.median(loss_values)
-                # scale estimator using iqr, rescaled by what would be if the
-                # loss was Gaussian.
-                scale = iqr(np.abs(loss_values - med)) / 1.37
+                # scale estimator using iqr
+                scale = iqr(np.abs(loss_values - med))
                 k = np.sum(np.abs(loss_values - med) > 2 * scale)
+                if k < 2:
+                    # For safety
+                    k = 2
             else:
                 k = self.k
             # Choose (randomly) 2k+1 (almost-)equal blocks of data.
@@ -636,8 +646,7 @@ class RobustWeightedClassifier(BaseEstimator, ClassifierMixin):
         Can have a big effect on efficiency.
         If None, k is estimated using the number of points distant from the
         median of means of more than 2 times a robust estimate of the scale
-        (using the inter-quartile range), this tends to be conservative
-        (robust).
+        (using the inter-quartile range), this can be unstable.
 
     loss : string, None or callable, default="log"
         Classification losses supported : 'log', 'hinge', 'modified_huber'.
@@ -742,7 +751,7 @@ def __init__(
         burn_in=10,
         eta0=0.01,
         c=None,
-        k=0,
+        k=1,
         loss="log",
         sgd_args=None,
         multi_class="ovr",
@@ -940,6 +949,11 @@ class RobustWeightedRegressor(BaseEstimator, RegressorMixin):
         Can be 'huber' for huber-type weights or  'mom' for median-of-means
         type weights.
 
+    solver : {"SGD", "IRLS"}
+        Algorithm used for optimization. If "SGD" then, use SGDRegressor as
+        base estimator and reweight at each optimization step. If "IRLS" then
+        use multiple fit of reweighted LinearRegression with robust weights.
+
     max_iter : int, default=100
         Maximum number of iterations.
         For more information, see the optimization scheme of base_estimator
@@ -1052,6 +1066,7 @@ class RobustWeightedRegressor(BaseEstimator, RegressorMixin):
     def __init__(
         self,
         weighting="huber",
+        solver="SGD",
         max_iter=100,
         burn_in=10,
         eta0=0.01,
@@ -1066,6 +1081,7 @@ def __init__(
     ):
 
         self.weighting = weighting
+        self.solver = solver
         self.max_iter = max_iter
         self.burn_in = burn_in
         self.eta0 = eta0
@@ -1102,21 +1118,40 @@ def fit(self, X, y):
         # Define the base estimator
 
         X, y = self._validate_data(X, y, y_numeric=True)
-
-        self.base_estimator_ = _RobustWeightedEstimator(
-            SGDRegressor(**sgd_args, eta0=self.eta0),
-            weighting=self.weighting,
-            loss=self.loss,
-            burn_in=self.burn_in,
-            c=self.c,
-            k=self.k,
-            eta0=self.eta0,
-            max_iter=self.max_iter,
-            tol=self.tol,
-            n_iter_no_change=self.n_iter_no_change,
-            verbose=self.verbose,
-            random_state=self.random_state,
-        )
+        if self.solver == "SGD":
+            self.base_estimator_ = _RobustWeightedEstimator(
+                SGDRegressor(**sgd_args, eta0=self.eta0),
+                weighting=self.weighting,
+                solver="SGD",
+                loss=self.loss,
+                burn_in=self.burn_in,
+                c=self.c,
+                k=self.k,
+                eta0=self.eta0,
+                max_iter=self.max_iter,
+                tol=self.tol,
+                n_iter_no_change=self.n_iter_no_change,
+                verbose=self.verbose,
+                random_state=self.random_state,
+            )
+        elif self.solver == "IRLS":
+            self.base_estimator_ = _RobustWeightedEstimator(
+                LinearRegression(),
+                weighting=self.weighting,
+                solver="IRLS",
+                loss=self.loss,
+                burn_in=self.burn_in,
+                c=self.c,
+                k=self.k,
+                eta0=self.eta0,
+                max_iter=self.max_iter,
+                tol=self.tol,
+                n_iter_no_change=self.n_iter_no_change,
+                verbose=self.verbose,
+                random_state=self.random_state,
+            )
+        else:
+            raise ValueError('No such solver.')
         self.base_estimator_.fit(X, y)
 
         self.weights_ = self.base_estimator_.weights_
@@ -1215,8 +1250,7 @@ class RobustWeightedKMeans(BaseEstimator, ClusterMixin):
         Can have a big effect on efficiency.
         If None, k is estimated using the number of points distant from the
         median of means of more than 2 times a robust estimate of the scale
-        (using the inter-quartile range), this tends to be conservative
-        (robust).
+        (using the inter-quartile range), this can be unstable.
 
     kmeans_args : dict, default={}
         arguments of the MiniBatchKMeans base estimator. Must not contain
@@ -1307,7 +1341,7 @@ def __init__(
         max_iter=100,
         eta0=0.01,
         c=None,
-        k=0,
+        k=1,
         kmeans_args=None,
         tol=1e-3,
         n_iter_no_change=10,
diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
index 3482bbe8..67231734 100644
--- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
+++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
@@ -34,6 +34,7 @@
 classif_losses = ["log", "hinge"]
 weightings = ["huber", "mom"]
 multi_class = ["ovr", "ovo"]
+solvers = ['SGD', 'IRLS']
 
 
 def test_robust_estimator_max_iter():
@@ -233,8 +234,8 @@ def test_robust_no_proba():
 
 
 # Regression test with outliers
-X_rc = rng.uniform(-1, 1, size=[200])
-y_rc = X_rc + 0.1 * rng.normal(size=200)
+X_rc = rng.uniform(-1, 1, size=[300])
+y_rc = X_rc + 0.1 * rng.normal(size=300)
 X_rc[0] = 10
 X_rc = X_rc.reshape(-1, 1)
 y_rc[0] = -1
@@ -246,10 +247,12 @@ def test_robust_no_proba():
 @pytest.mark.parametrize("weighting", weightings)
 @pytest.mark.parametrize("k", k_values)
 @pytest.mark.parametrize("c", c_values)
-def test_corrupted_regression(loss, weighting, k, c):
+@pytest.mark.parametrize("solver", solvers)
+def test_corrupted_regression(loss, weighting, k, c, solver):
     reg = RobustWeightedRegressor(
         loss=loss,
-        max_iter=50,
+        max_iter=100,
+        solver=solver,
         weighting=weighting,
         k=k,
         c=c,
@@ -257,8 +260,8 @@ def test_corrupted_regression(loss, weighting, k, c):
         n_iter_no_change=20,
     )
     reg.fit(X_rc, y_rc)
-    assert np.abs(reg.coef_[0] - 1) < 0.1
-    assert np.abs(reg.intercept_[0]) < 0.1
+    assert np.abs(reg.coef_[0] - 1) < 0.2
+    assert np.abs(reg.intercept_) < 0.2
 
 
 # Check that weights_ parameter can be used as outlier score.

From 817d869640122cfdb5f42c5ed0399dd7464bdd62 Mon Sep 17 00:00:00 2001
From: TMat <timothee.mathieu@inria.fr>
Date: Wed, 13 Oct 2021 11:03:33 +0200
Subject: [PATCH 2/7] black

---
 .../robust/robust_weighted_estimator.py       | 22 +++++++++++++------
 .../tests/test_robust_weighted_estimator.py   |  2 +-
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py
index 63887b5d..cce0b078 100644
--- a/sklearn_extra/robust/robust_weighted_estimator.py
+++ b/sklearn_extra/robust/robust_weighted_estimator.py
@@ -226,7 +226,7 @@ def __init__(
     ):
         self.base_estimator = base_estimator
         self.weighting = weighting
-        self.solver=solver
+        self.solver = solver
         self.eta0 = eta0
         self.burn_in = burn_in
         self.c = c
@@ -285,7 +285,7 @@ def fit(self, X, y=None):
             base_estimator.set_params(n_iter_no_change=self.n_iter_no_change)
         if "random_state" in parameters:
             base_estimator.set_params(random_state=random_state)
-        if (self.burn_in > 0) and self.solver != 'IRLS':
+        if (self.burn_in > 0) and self.solver != "IRLS":
             learning_rate = base_estimator.learning_rate
             base_estimator.set_params(learning_rate="constant", eta0=self.eta0)
 
@@ -310,7 +310,7 @@ def fit(self, X, y=None):
             if self.solver == "SGD":
                 base_estimator.partial_fit(X, y)
             else:
-                base_estimator.fit(X,y)
+                base_estimator.fit(X, y)
             # As the fit is here non-robust, override the
             # learned coefs.
             base_estimator.coef_ = np.zeros([len(X[0])])
@@ -328,7 +328,11 @@ def fit(self, X, y=None):
         # Optimization algorithm
         for epoch in range(self.max_iter):
 
-            if (epoch > self.burn_in) and (self.burn_in > 0) and (self.solver == "SGD"):
+            if (
+                (epoch > self.burn_in)
+                and (self.burn_in > 0)
+                and (self.solver == "SGD")
+            ):
                 # If not in the burn_in phase anymore, change the learning_rate
                 # calibration to the one edicted by self.base_estimator.
                 base_estimator.set_params(learning_rate=learning_rate)
@@ -381,9 +385,13 @@ def fit(self, X, y=None):
                 )
             else:
                 if self.solver == "SGD":
-                    base_estimator.partial_fit(X[perm], y[perm], sample_weight=weights[perm])
+                    base_estimator.partial_fit(
+                        X[perm], y[perm], sample_weight=weights[perm]
+                    )
                 else:
-                    base_estimator.fit(X[perm], y[perm], sample_weight=weights[perm])
+                    base_estimator.fit(
+                        X[perm], y[perm], sample_weight=weights[perm]
+                    )
 
             if (self.tol is not None) and (
                 current_loss > best_loss - self.tol
@@ -1151,7 +1159,7 @@ def fit(self, X, y):
                 random_state=self.random_state,
             )
         else:
-            raise ValueError('No such solver.')
+            raise ValueError("No such solver.")
         self.base_estimator_.fit(X, y)
 
         self.weights_ = self.base_estimator_.weights_
diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
index 67231734..13cd82ef 100644
--- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
+++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
@@ -34,7 +34,7 @@
 classif_losses = ["log", "hinge"]
 weightings = ["huber", "mom"]
 multi_class = ["ovr", "ovo"]
-solvers = ['SGD', 'IRLS']
+solvers = ["SGD", "IRLS"]
 
 
 def test_robust_estimator_max_iter():

From 08bcc3b8a13acf84fa3217eeb8ac826414b3a26d Mon Sep 17 00:00:00 2001
From: TMat <timothee.mathieu@inria.fr>
Date: Wed, 13 Oct 2021 11:11:45 +0200
Subject: [PATCH 3/7] fix default parameter

---
 .../robust/robust_weighted_estimator.py          | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py
index cce0b078..e060fdbf 100644
--- a/sklearn_extra/robust/robust_weighted_estimator.py
+++ b/sklearn_extra/robust/robust_weighted_estimator.py
@@ -140,7 +140,7 @@ class _RobustWeightedEstimator(BaseEstimator):
         If None, c is estimated at each step using half the Inter-quartile
         range, this tends to be conservative (robust).
 
-    k : int < sample_size/2, default=1
+    k : int < sample_size/2, default=None
         Parameter used for mom weighting procedure, used only if weightings
         is 'mom'. 2k+1 is the number of blocks used for median-of-means
         estimation, higher value of k means a more robust estimator.
@@ -218,7 +218,7 @@ def __init__(
         burn_in=10,
         eta0=0.1,
         c=None,
-        k=1,
+        k=None,
         tol=1e-5,
         n_iter_no_change=10,
         verbose=0,
@@ -647,7 +647,7 @@ class RobustWeightedClassifier(BaseEstimator, ClassifierMixin):
         If None, c is estimated at each step using half the Inter-quartile
         range, this tends to be conservative (robust).
 
-    k : int < sample_size/2, default=1
+    k : int < sample_size/2, default=None
         Parameter used for mom weighting procedure, used only if weightings
         is 'mom'. 2k+1 is the number of blocks used for median-of-means
         estimation, higher value of k means a more robust estimator.
@@ -759,7 +759,7 @@ def __init__(
         burn_in=10,
         eta0=0.01,
         c=None,
-        k=1,
+        k=None,
         loss="log",
         sgd_args=None,
         multi_class="ovr",
@@ -983,7 +983,7 @@ class RobustWeightedRegressor(BaseEstimator, RegressorMixin):
         If None, c is estimated at each step using half the Inter-quartile
         range, this tends to be conservative (robust).
 
-    k : int < sample_size/2, default=1
+    k : int < sample_size/2 or None, default=None
         Parameter used for mom weighting procedure, used only if weightings
         is 'mom'. 2k+1 is the number of blocks used for median-of-means
         estimation, higher value of k means a more robust estimator.
@@ -1079,7 +1079,7 @@ def __init__(
         burn_in=10,
         eta0=0.01,
         c=None,
-        k=0,
+        k=None,
         loss="squared_loss",
         sgd_args=None,
         tol=1e-3,
@@ -1251,7 +1251,7 @@ class RobustWeightedKMeans(BaseEstimator, ClusterMixin):
         If None, c is estimated at each step using half the Inter-quartile
         range, this tends to be conservative (robust).
 
-    k : int < sample_size/2, default=1
+    k : int < sample_size/2 or None, default=None
         Parameter used for mom weighting procedure, used only if weightings
         is 'mom'. 2k+1 is the number of blocks used for median-of-means
         estimation, higher value of k means a more robust estimator.
@@ -1349,7 +1349,7 @@ def __init__(
         max_iter=100,
         eta0=0.01,
         c=None,
-        k=1,
+        k=None,
         kmeans_args=None,
         tol=1e-3,
         n_iter_no_change=10,

From 28c7c57a09986b59122c6bae13bbab6aaf7bcd78 Mon Sep 17 00:00:00 2001
From: TimotheeMathieu <30346931+TimotheeMathieu@users.noreply.github.com>
Date: Tue, 19 Oct 2021 10:36:07 +0200
Subject: [PATCH 4/7] Apply suggestions from code review

Co-authored-by: Roman Yurchak <rth.yurchak@gmail.com>
---
 sklearn_extra/robust/robust_weighted_estimator.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py
index e060fdbf..2cfa17b2 100644
--- a/sklearn_extra/robust/robust_weighted_estimator.py
+++ b/sklearn_extra/robust/robust_weighted_estimator.py
@@ -502,9 +502,8 @@ def psisx(x):
                 # scale estimator using iqr
                 scale = iqr(np.abs(loss_values - med))
                 k = np.sum(np.abs(loss_values - med) > 2 * scale)
-                if k < 2:
-                    # For safety
-                    k = 2
+                # For safety
+                k = min(k, 2)
             else:
                 k = self.k
             # Choose (randomly) 2k+1 (almost-)equal blocks of data.

From 391efd06e3c999584e36560f21e9c6b2146bfc82 Mon Sep 17 00:00:00 2001
From: TMat <timothee.mathieu@inria.fr>
Date: Tue, 19 Oct 2021 11:01:31 +0200
Subject: [PATCH 5/7] fix tests and apply review comments

---
 .../robust/robust_weighted_estimator.py       | 47 ++++++++-----------
 .../tests/test_robust_weighted_estimator.py   | 15 ++++++
 2 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py
index 2cfa17b2..fff43f3a 100644
--- a/sklearn_extra/robust/robust_weighted_estimator.py
+++ b/sklearn_extra/robust/robust_weighted_estimator.py
@@ -389,9 +389,8 @@ def fit(self, X, y=None):
                         X[perm], y[perm], sample_weight=weights[perm]
                     )
                 else:
-                    base_estimator.fit(
-                        X[perm], y[perm], sample_weight=weights[perm]
-                    )
+                    # Do one IRLS step.
+                    base_estimator.fit(X, y, sample_weight=weights)
 
             if (self.tol is not None) and (
                 current_loss > best_loss - self.tol
@@ -503,7 +502,7 @@ def psisx(x):
                 scale = iqr(np.abs(loss_values - med))
                 k = np.sum(np.abs(loss_values - med) > 2 * scale)
                 # For safety
-                k = min(k, 2)
+                k = min(k, 3)
             else:
                 k = self.k
             # Choose (randomly) 2k+1 (almost-)equal blocks of data.
@@ -811,6 +810,7 @@ def fit(self, X, y):
         base_robust_estimator_ = _RobustWeightedEstimator(
             SGDClassifier(**sgd_args, eta0=self.eta0),
             weighting=self.weighting,
+            solver="SGD",
             loss=self.loss,
             burn_in=self.burn_in,
             c=self.c,
@@ -1125,37 +1125,30 @@ def fit(self, X, y):
         # Define the base estimator
 
         X, y = self._validate_data(X, y, y_numeric=True)
+        kwargs = {
+            "weighting": self.weighting,
+            "loss": self.loss,
+            "burn_in": self.burn_in,
+            "c": self.c,
+            "k": self.k,
+            "eta0": self.eta0,
+            "max_iter": self.max_iter,
+            "tol": self.tol,
+            "n_iter_no_change": self.n_iter_no_change,
+            "verbose": self.verbose,
+            "random_state": self.random_state,
+        }
         if self.solver == "SGD":
             self.base_estimator_ = _RobustWeightedEstimator(
                 SGDRegressor(**sgd_args, eta0=self.eta0),
-                weighting=self.weighting,
                 solver="SGD",
-                loss=self.loss,
-                burn_in=self.burn_in,
-                c=self.c,
-                k=self.k,
-                eta0=self.eta0,
-                max_iter=self.max_iter,
-                tol=self.tol,
-                n_iter_no_change=self.n_iter_no_change,
-                verbose=self.verbose,
-                random_state=self.random_state,
+                **kwargs,
             )
         elif self.solver == "IRLS":
             self.base_estimator_ = _RobustWeightedEstimator(
                 LinearRegression(),
-                weighting=self.weighting,
                 solver="IRLS",
-                loss=self.loss,
-                burn_in=self.burn_in,
-                c=self.c,
-                k=self.k,
-                eta0=self.eta0,
-                max_iter=self.max_iter,
-                tol=self.tol,
-                n_iter_no_change=self.n_iter_no_change,
-                verbose=self.verbose,
-                random_state=self.random_state,
+                **kwargs,
             )
         else:
             raise ValueError("No such solver.")
@@ -1399,7 +1392,7 @@ def fit(self, X, y=None):
                 self.n_clusters,
                 batch_size=X.shape[0],
                 random_state=self.random_state,
-                **kmeans_args
+                **kmeans_args,
             ),
             burn_in=0,  # Important because it does not mean anything to
             # have burn-in
diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
index 13cd82ef..50bf7f3d 100644
--- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
+++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
@@ -79,6 +79,13 @@ def test_robust_estimator_input_validation_and_fit_check():
     with pytest.raises(ValueError, match=msg):
         RobustWeightedKMeans(c=0).fit(X_cc)
 
+
+def test_robust_estimator_unsupported_loss():
+    """Test that warning message is thrown when unsupported loss."""
+    model = RobustWeightedClassifier(loss="invalid")
+    msg = "No such solver."
+    with pytest.raises(ValueError, match=msg):
+        model.fit(X_cc, y_cc)
     msg = "burn_in must be >= 0, got -1."
     with pytest.raises(ValueError, match=msg):
         RobustWeightedClassifier(burn_in=-1).fit(X_cc, y_cc)
@@ -279,6 +286,14 @@ def test_regression_corrupted_weights(weighting):
     assert reg.weights_[0] < np.mean(reg.weights_[1:])
 
 
+def test_robust_regressor_unsupported_solver():
+    """Test that warning message is thrown when unsupported loss."""
+    model = RobustWeightedRegressor(solver="invalid")
+    msg = "No such solver."
+    with pytest.raises(ValueError, match=msg):
+        model.fit(X_rc, y_rc)
+
+
 X_r = rng.uniform(-1, 1, size=[1000])
 y_r = X_r + 0.1 * rng.normal(size=1000)
 X_r = X_r.reshape(-1, 1)

From e1aaaedb81415ebab9eff4dcc0515ee94f28cb69 Mon Sep 17 00:00:00 2001
From: TMat <timothee.mathieu@inria.fr>
Date: Tue, 19 Oct 2021 14:29:02 +0200
Subject: [PATCH 6/7] fix test

---
 .../robust/tests/test_robust_weighted_estimator.py       | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
index 50bf7f3d..f530c8bd 100644
--- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
+++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
@@ -79,13 +79,6 @@ def test_robust_estimator_input_validation_and_fit_check():
     with pytest.raises(ValueError, match=msg):
         RobustWeightedKMeans(c=0).fit(X_cc)
 
-
-def test_robust_estimator_unsupported_loss():
-    """Test that warning message is thrown when unsupported loss."""
-    model = RobustWeightedClassifier(loss="invalid")
-    msg = "No such solver."
-    with pytest.raises(ValueError, match=msg):
-        model.fit(X_cc, y_cc)
     msg = "burn_in must be >= 0, got -1."
     with pytest.raises(ValueError, match=msg):
         RobustWeightedClassifier(burn_in=-1).fit(X_cc, y_cc)
@@ -286,7 +279,7 @@ def test_regression_corrupted_weights(weighting):
     assert reg.weights_[0] < np.mean(reg.weights_[1:])
 
 
-def test_robust_regressor_unsupported_solver():
+def test_robust_regression_estimator_unsupported_solver():
     """Test that warning message is thrown when unsupported loss."""
     model = RobustWeightedRegressor(solver="invalid")
     msg = "No such solver."

From decca0e2bb98278dc213cc758f181d52e071ef86 Mon Sep 17 00:00:00 2001
From: TMat <timothee.mathieu@inria.fr>
Date: Tue, 19 Oct 2021 16:19:17 +0200
Subject: [PATCH 7/7] fix test clustering new scikit-learn

---
 sklearn_extra/robust/tests/test_robust_weighted_estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
index 00627566..01a9b209 100644
--- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
+++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py
@@ -405,7 +405,7 @@ def test_not_robust_cluster(weighting):
     difference = [
         np.linalg.norm(pred1[i] - pred2[i]) for i in range(len(pred1))
     ]
-    assert np.mean(difference) < 1
+    assert np.mean(difference) < 2
 
 
 def test_transform():