From 6468cf4a0adbb7d02c46e080ed08c52e9bf47903 Mon Sep 17 00:00:00 2001 From: TMat Date: Wed, 13 Oct 2021 10:59:10 +0200 Subject: [PATCH 1/7] add IRLS algo --- .../robust/robust_weighted_estimator.py | 112 ++++++++++++------ .../tests/test_robust_weighted_estimator.py | 15 ++- 2 files changed, 82 insertions(+), 45 deletions(-) diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py index a2d7c063..63887b5d 100644 --- a/sklearn_extra/robust/robust_weighted_estimator.py +++ b/sklearn_extra/robust/robust_weighted_estimator.py @@ -21,7 +21,7 @@ check_consistent_length, ) from sklearn.utils.validation import check_is_fitted -from sklearn.linear_model import SGDRegressor, SGDClassifier +from sklearn.linear_model import SGDRegressor, SGDClassifier, LinearRegression from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier from sklearn.cluster import MiniBatchKMeans from sklearn.metrics.pairwise import euclidean_distances @@ -111,6 +111,9 @@ class _RobustWeightedEstimator(BaseEstimator): If callable, the function is used as loss function ro construct the weights. + solver : {"IRLS", "SGD"}, default="SGD" + Algorithm used for the optimization. For now only for regression. + weighting : string, default="huber" Weighting scheme used to make the estimator robust. Can be 'huber' for huber-type weights or 'mom' for median-of-means @@ -144,8 +147,7 @@ class _RobustWeightedEstimator(BaseEstimator): Can have a big effect on efficiency. If None, k is estimated using the number of points distant from the median of means of more than 2 times a robust estimate of the scale - (using the inter-quartile range), this tends to be conservative - (robust). + (using the inter-quartile range), this can be unstable. tol : float or None, (default = 1e-3) The stopping criterion. If it is not None, training will stop when @@ -210,12 +212,13 @@ def __init__( self, base_estimator, loss, + solver="SGD", weighting="huber", max_iter=100, burn_in=10, eta0=0.1, c=None, - k=0, + k=1, tol=1e-5, n_iter_no_change=10, verbose=0, @@ -223,11 +226,13 @@ def __init__( ): self.base_estimator = base_estimator self.weighting = weighting + self.solver=solver self.eta0 = eta0 self.burn_in = burn_in self.c = c self.k = k self.loss = loss + self.solver = solver self.max_iter = max_iter self.tol = tol self.n_iter_no_change = n_iter_no_change @@ -278,9 +283,9 @@ def fit(self, X, y=None): if "n_iter_no_change" in parameters: base_estimator.set_params(n_iter_no_change=self.n_iter_no_change) - - base_estimator.set_params(random_state=random_state) - if self.burn_in > 0: + if "random_state" in parameters: + base_estimator.set_params(random_state=random_state) + if (self.burn_in > 0) and self.solver != 'IRLS': learning_rate = base_estimator.learning_rate base_estimator.set_params(learning_rate="constant", eta0=self.eta0) @@ -302,8 +307,11 @@ def fit(self, X, y=None): # Initialization of the estimator # Partial fit for the estimator to be set to "fitted" to be able # to predict. - base_estimator.partial_fit(X, y) - # As the partial fit is here non-robust, override the + if self.solver == "SGD": + base_estimator.partial_fit(X, y) + else: + base_estimator.fit(X,y) + # As the fit is here non-robust, override the # learned coefs. base_estimator.coef_ = np.zeros([len(X[0])]) base_estimator.intercept_ = np.array([0]) @@ -320,7 +328,7 @@ def fit(self, X, y=None): # Optimization algorithm for epoch in range(self.max_iter): - if epoch > self.burn_in and self.burn_in > 0: + if (epoch > self.burn_in) and (self.burn_in > 0) and (self.solver == "SGD"): # If not in the burn_in phase anymore, change the learning_rate # calibration to the one edicted by self.base_estimator. base_estimator.set_params(learning_rate=learning_rate) @@ -352,8 +360,6 @@ def fit(self, X, y=None): # epoch using the previously computed weights. Also shuffle the data. perm = random_state.permutation(len(X)) - base_estimator.partial_fit(X, y, sample_weight=weights) - if (self.tol is not None) and ( current_loss > best_loss - self.tol ): @@ -374,9 +380,11 @@ def fit(self, X, y=None): X[perm], y, sample_weight=weights[perm] ) else: - base_estimator.partial_fit( - X[perm], y[perm], sample_weight=weights[perm] - ) + if self.solver == "SGD": + base_estimator.partial_fit(X[perm], y[perm], sample_weight=weights[perm]) + else: + base_estimator.fit(X[perm], y[perm], sample_weight=weights[perm]) + if (self.tol is not None) and ( current_loss > best_loss - self.tol ): @@ -483,10 +491,12 @@ def psisx(x): elif self.weighting == "mom": if self.k is None: med = np.median(loss_values) - # scale estimator using iqr, rescaled by what would be if the - # loss was Gaussian. - scale = iqr(np.abs(loss_values - med)) / 1.37 + # scale estimator using iqr + scale = iqr(np.abs(loss_values - med)) k = np.sum(np.abs(loss_values - med) > 2 * scale) + if k < 2: + # For safety + k = 2 else: k = self.k # Choose (randomly) 2k+1 (almost-)equal blocks of data. @@ -636,8 +646,7 @@ class RobustWeightedClassifier(BaseEstimator, ClassifierMixin): Can have a big effect on efficiency. If None, k is estimated using the number of points distant from the median of means of more than 2 times a robust estimate of the scale - (using the inter-quartile range), this tends to be conservative - (robust). + (using the inter-quartile range), this can be unstable. loss : string, None or callable, default="log" Classification losses supported : 'log', 'hinge', 'modified_huber'. @@ -742,7 +751,7 @@ def __init__( burn_in=10, eta0=0.01, c=None, - k=0, + k=1, loss="log", sgd_args=None, multi_class="ovr", @@ -940,6 +949,11 @@ class RobustWeightedRegressor(BaseEstimator, RegressorMixin): Can be 'huber' for huber-type weights or 'mom' for median-of-means type weights. + solver : {"SGD", "IRLS"} + Algorithm used for optimization. If "SGD" then, use SGDRegressor as + base estimator and reweight at each optimization step. If "IRLS" then + use multiple fit of reweighted LinearRegression with robust weights. + max_iter : int, default=100 Maximum number of iterations. For more information, see the optimization scheme of base_estimator @@ -1052,6 +1066,7 @@ class RobustWeightedRegressor(BaseEstimator, RegressorMixin): def __init__( self, weighting="huber", + solver="SGD", max_iter=100, burn_in=10, eta0=0.01, @@ -1066,6 +1081,7 @@ def __init__( ): self.weighting = weighting + self.solver = solver self.max_iter = max_iter self.burn_in = burn_in self.eta0 = eta0 @@ -1102,21 +1118,40 @@ def fit(self, X, y): # Define the base estimator X, y = self._validate_data(X, y, y_numeric=True) - - self.base_estimator_ = _RobustWeightedEstimator( - SGDRegressor(**sgd_args, eta0=self.eta0), - weighting=self.weighting, - loss=self.loss, - burn_in=self.burn_in, - c=self.c, - k=self.k, - eta0=self.eta0, - max_iter=self.max_iter, - tol=self.tol, - n_iter_no_change=self.n_iter_no_change, - verbose=self.verbose, - random_state=self.random_state, - ) + if self.solver == "SGD": + self.base_estimator_ = _RobustWeightedEstimator( + SGDRegressor(**sgd_args, eta0=self.eta0), + weighting=self.weighting, + solver="SGD", + loss=self.loss, + burn_in=self.burn_in, + c=self.c, + k=self.k, + eta0=self.eta0, + max_iter=self.max_iter, + tol=self.tol, + n_iter_no_change=self.n_iter_no_change, + verbose=self.verbose, + random_state=self.random_state, + ) + elif self.solver == "IRLS": + self.base_estimator_ = _RobustWeightedEstimator( + LinearRegression(), + weighting=self.weighting, + solver="IRLS", + loss=self.loss, + burn_in=self.burn_in, + c=self.c, + k=self.k, + eta0=self.eta0, + max_iter=self.max_iter, + tol=self.tol, + n_iter_no_change=self.n_iter_no_change, + verbose=self.verbose, + random_state=self.random_state, + ) + else: + raise ValueError('No such solver.') self.base_estimator_.fit(X, y) self.weights_ = self.base_estimator_.weights_ @@ -1215,8 +1250,7 @@ class RobustWeightedKMeans(BaseEstimator, ClusterMixin): Can have a big effect on efficiency. If None, k is estimated using the number of points distant from the median of means of more than 2 times a robust estimate of the scale - (using the inter-quartile range), this tends to be conservative - (robust). + (using the inter-quartile range), this can be unstable. kmeans_args : dict, default={} arguments of the MiniBatchKMeans base estimator. Must not contain @@ -1307,7 +1341,7 @@ def __init__( max_iter=100, eta0=0.01, c=None, - k=0, + k=1, kmeans_args=None, tol=1e-3, n_iter_no_change=10, diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py index 3482bbe8..67231734 100644 --- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py +++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py @@ -34,6 +34,7 @@ classif_losses = ["log", "hinge"] weightings = ["huber", "mom"] multi_class = ["ovr", "ovo"] +solvers = ['SGD', 'IRLS'] def test_robust_estimator_max_iter(): @@ -233,8 +234,8 @@ def test_robust_no_proba(): # Regression test with outliers -X_rc = rng.uniform(-1, 1, size=[200]) -y_rc = X_rc + 0.1 * rng.normal(size=200) +X_rc = rng.uniform(-1, 1, size=[300]) +y_rc = X_rc + 0.1 * rng.normal(size=300) X_rc[0] = 10 X_rc = X_rc.reshape(-1, 1) y_rc[0] = -1 @@ -246,10 +247,12 @@ def test_robust_no_proba(): @pytest.mark.parametrize("weighting", weightings) @pytest.mark.parametrize("k", k_values) @pytest.mark.parametrize("c", c_values) -def test_corrupted_regression(loss, weighting, k, c): +@pytest.mark.parametrize("solver", solvers) +def test_corrupted_regression(loss, weighting, k, c, solver): reg = RobustWeightedRegressor( loss=loss, - max_iter=50, + max_iter=100, + solver=solver, weighting=weighting, k=k, c=c, @@ -257,8 +260,8 @@ def test_corrupted_regression(loss, weighting, k, c): n_iter_no_change=20, ) reg.fit(X_rc, y_rc) - assert np.abs(reg.coef_[0] - 1) < 0.1 - assert np.abs(reg.intercept_[0]) < 0.1 + assert np.abs(reg.coef_[0] - 1) < 0.2 + assert np.abs(reg.intercept_) < 0.2 # Check that weights_ parameter can be used as outlier score. From 817d869640122cfdb5f42c5ed0399dd7464bdd62 Mon Sep 17 00:00:00 2001 From: TMat Date: Wed, 13 Oct 2021 11:03:33 +0200 Subject: [PATCH 2/7] black --- .../robust/robust_weighted_estimator.py | 22 +++++++++++++------ .../tests/test_robust_weighted_estimator.py | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py index 63887b5d..cce0b078 100644 --- a/sklearn_extra/robust/robust_weighted_estimator.py +++ b/sklearn_extra/robust/robust_weighted_estimator.py @@ -226,7 +226,7 @@ def __init__( ): self.base_estimator = base_estimator self.weighting = weighting - self.solver=solver + self.solver = solver self.eta0 = eta0 self.burn_in = burn_in self.c = c @@ -285,7 +285,7 @@ def fit(self, X, y=None): base_estimator.set_params(n_iter_no_change=self.n_iter_no_change) if "random_state" in parameters: base_estimator.set_params(random_state=random_state) - if (self.burn_in > 0) and self.solver != 'IRLS': + if (self.burn_in > 0) and self.solver != "IRLS": learning_rate = base_estimator.learning_rate base_estimator.set_params(learning_rate="constant", eta0=self.eta0) @@ -310,7 +310,7 @@ def fit(self, X, y=None): if self.solver == "SGD": base_estimator.partial_fit(X, y) else: - base_estimator.fit(X,y) + base_estimator.fit(X, y) # As the fit is here non-robust, override the # learned coefs. base_estimator.coef_ = np.zeros([len(X[0])]) @@ -328,7 +328,11 @@ def fit(self, X, y=None): # Optimization algorithm for epoch in range(self.max_iter): - if (epoch > self.burn_in) and (self.burn_in > 0) and (self.solver == "SGD"): + if ( + (epoch > self.burn_in) + and (self.burn_in > 0) + and (self.solver == "SGD") + ): # If not in the burn_in phase anymore, change the learning_rate # calibration to the one edicted by self.base_estimator. base_estimator.set_params(learning_rate=learning_rate) @@ -381,9 +385,13 @@ def fit(self, X, y=None): ) else: if self.solver == "SGD": - base_estimator.partial_fit(X[perm], y[perm], sample_weight=weights[perm]) + base_estimator.partial_fit( + X[perm], y[perm], sample_weight=weights[perm] + ) else: - base_estimator.fit(X[perm], y[perm], sample_weight=weights[perm]) + base_estimator.fit( + X[perm], y[perm], sample_weight=weights[perm] + ) if (self.tol is not None) and ( current_loss > best_loss - self.tol @@ -1151,7 +1159,7 @@ def fit(self, X, y): random_state=self.random_state, ) else: - raise ValueError('No such solver.') + raise ValueError("No such solver.") self.base_estimator_.fit(X, y) self.weights_ = self.base_estimator_.weights_ diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py index 67231734..13cd82ef 100644 --- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py +++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py @@ -34,7 +34,7 @@ classif_losses = ["log", "hinge"] weightings = ["huber", "mom"] multi_class = ["ovr", "ovo"] -solvers = ['SGD', 'IRLS'] +solvers = ["SGD", "IRLS"] def test_robust_estimator_max_iter(): From 08bcc3b8a13acf84fa3217eeb8ac826414b3a26d Mon Sep 17 00:00:00 2001 From: TMat Date: Wed, 13 Oct 2021 11:11:45 +0200 Subject: [PATCH 3/7] fix default parameter --- .../robust/robust_weighted_estimator.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py index cce0b078..e060fdbf 100644 --- a/sklearn_extra/robust/robust_weighted_estimator.py +++ b/sklearn_extra/robust/robust_weighted_estimator.py @@ -140,7 +140,7 @@ class _RobustWeightedEstimator(BaseEstimator): If None, c is estimated at each step using half the Inter-quartile range, this tends to be conservative (robust). - k : int < sample_size/2, default=1 + k : int < sample_size/2, default=None Parameter used for mom weighting procedure, used only if weightings is 'mom'. 2k+1 is the number of blocks used for median-of-means estimation, higher value of k means a more robust estimator. @@ -218,7 +218,7 @@ def __init__( burn_in=10, eta0=0.1, c=None, - k=1, + k=None, tol=1e-5, n_iter_no_change=10, verbose=0, @@ -647,7 +647,7 @@ class RobustWeightedClassifier(BaseEstimator, ClassifierMixin): If None, c is estimated at each step using half the Inter-quartile range, this tends to be conservative (robust). - k : int < sample_size/2, default=1 + k : int < sample_size/2, default=None Parameter used for mom weighting procedure, used only if weightings is 'mom'. 2k+1 is the number of blocks used for median-of-means estimation, higher value of k means a more robust estimator. @@ -759,7 +759,7 @@ def __init__( burn_in=10, eta0=0.01, c=None, - k=1, + k=None, loss="log", sgd_args=None, multi_class="ovr", @@ -983,7 +983,7 @@ class RobustWeightedRegressor(BaseEstimator, RegressorMixin): If None, c is estimated at each step using half the Inter-quartile range, this tends to be conservative (robust). - k : int < sample_size/2, default=1 + k : int < sample_size/2 or None, default=None Parameter used for mom weighting procedure, used only if weightings is 'mom'. 2k+1 is the number of blocks used for median-of-means estimation, higher value of k means a more robust estimator. @@ -1079,7 +1079,7 @@ def __init__( burn_in=10, eta0=0.01, c=None, - k=0, + k=None, loss="squared_loss", sgd_args=None, tol=1e-3, @@ -1251,7 +1251,7 @@ class RobustWeightedKMeans(BaseEstimator, ClusterMixin): If None, c is estimated at each step using half the Inter-quartile range, this tends to be conservative (robust). - k : int < sample_size/2, default=1 + k : int < sample_size/2 or None, default=None Parameter used for mom weighting procedure, used only if weightings is 'mom'. 2k+1 is the number of blocks used for median-of-means estimation, higher value of k means a more robust estimator. @@ -1349,7 +1349,7 @@ def __init__( max_iter=100, eta0=0.01, c=None, - k=1, + k=None, kmeans_args=None, tol=1e-3, n_iter_no_change=10, From 28c7c57a09986b59122c6bae13bbab6aaf7bcd78 Mon Sep 17 00:00:00 2001 From: TimotheeMathieu <30346931+TimotheeMathieu@users.noreply.github.com> Date: Tue, 19 Oct 2021 10:36:07 +0200 Subject: [PATCH 4/7] Apply suggestions from code review Co-authored-by: Roman Yurchak --- sklearn_extra/robust/robust_weighted_estimator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py index e060fdbf..2cfa17b2 100644 --- a/sklearn_extra/robust/robust_weighted_estimator.py +++ b/sklearn_extra/robust/robust_weighted_estimator.py @@ -502,9 +502,8 @@ def psisx(x): # scale estimator using iqr scale = iqr(np.abs(loss_values - med)) k = np.sum(np.abs(loss_values - med) > 2 * scale) - if k < 2: - # For safety - k = 2 + # For safety + k = min(k, 2) else: k = self.k # Choose (randomly) 2k+1 (almost-)equal blocks of data. From 391efd06e3c999584e36560f21e9c6b2146bfc82 Mon Sep 17 00:00:00 2001 From: TMat Date: Tue, 19 Oct 2021 11:01:31 +0200 Subject: [PATCH 5/7] fix tests and apply review comments --- .../robust/robust_weighted_estimator.py | 47 ++++++++----------- .../tests/test_robust_weighted_estimator.py | 15 ++++++ 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/sklearn_extra/robust/robust_weighted_estimator.py b/sklearn_extra/robust/robust_weighted_estimator.py index 2cfa17b2..fff43f3a 100644 --- a/sklearn_extra/robust/robust_weighted_estimator.py +++ b/sklearn_extra/robust/robust_weighted_estimator.py @@ -389,9 +389,8 @@ def fit(self, X, y=None): X[perm], y[perm], sample_weight=weights[perm] ) else: - base_estimator.fit( - X[perm], y[perm], sample_weight=weights[perm] - ) + # Do one IRLS step. + base_estimator.fit(X, y, sample_weight=weights) if (self.tol is not None) and ( current_loss > best_loss - self.tol @@ -503,7 +502,7 @@ def psisx(x): scale = iqr(np.abs(loss_values - med)) k = np.sum(np.abs(loss_values - med) > 2 * scale) # For safety - k = min(k, 2) + k = min(k, 3) else: k = self.k # Choose (randomly) 2k+1 (almost-)equal blocks of data. @@ -811,6 +810,7 @@ def fit(self, X, y): base_robust_estimator_ = _RobustWeightedEstimator( SGDClassifier(**sgd_args, eta0=self.eta0), weighting=self.weighting, + solver="SGD", loss=self.loss, burn_in=self.burn_in, c=self.c, @@ -1125,37 +1125,30 @@ def fit(self, X, y): # Define the base estimator X, y = self._validate_data(X, y, y_numeric=True) + kwargs = { + "weighting": self.weighting, + "loss": self.loss, + "burn_in": self.burn_in, + "c": self.c, + "k": self.k, + "eta0": self.eta0, + "max_iter": self.max_iter, + "tol": self.tol, + "n_iter_no_change": self.n_iter_no_change, + "verbose": self.verbose, + "random_state": self.random_state, + } if self.solver == "SGD": self.base_estimator_ = _RobustWeightedEstimator( SGDRegressor(**sgd_args, eta0=self.eta0), - weighting=self.weighting, solver="SGD", - loss=self.loss, - burn_in=self.burn_in, - c=self.c, - k=self.k, - eta0=self.eta0, - max_iter=self.max_iter, - tol=self.tol, - n_iter_no_change=self.n_iter_no_change, - verbose=self.verbose, - random_state=self.random_state, + **kwargs, ) elif self.solver == "IRLS": self.base_estimator_ = _RobustWeightedEstimator( LinearRegression(), - weighting=self.weighting, solver="IRLS", - loss=self.loss, - burn_in=self.burn_in, - c=self.c, - k=self.k, - eta0=self.eta0, - max_iter=self.max_iter, - tol=self.tol, - n_iter_no_change=self.n_iter_no_change, - verbose=self.verbose, - random_state=self.random_state, + **kwargs, ) else: raise ValueError("No such solver.") @@ -1399,7 +1392,7 @@ def fit(self, X, y=None): self.n_clusters, batch_size=X.shape[0], random_state=self.random_state, - **kmeans_args + **kmeans_args, ), burn_in=0, # Important because it does not mean anything to # have burn-in diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py index 13cd82ef..50bf7f3d 100644 --- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py +++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py @@ -79,6 +79,13 @@ def test_robust_estimator_input_validation_and_fit_check(): with pytest.raises(ValueError, match=msg): RobustWeightedKMeans(c=0).fit(X_cc) + +def test_robust_estimator_unsupported_loss(): + """Test that warning message is thrown when unsupported loss.""" + model = RobustWeightedClassifier(loss="invalid") + msg = "No such solver." + with pytest.raises(ValueError, match=msg): + model.fit(X_cc, y_cc) msg = "burn_in must be >= 0, got -1." with pytest.raises(ValueError, match=msg): RobustWeightedClassifier(burn_in=-1).fit(X_cc, y_cc) @@ -279,6 +286,14 @@ def test_regression_corrupted_weights(weighting): assert reg.weights_[0] < np.mean(reg.weights_[1:]) +def test_robust_regressor_unsupported_solver(): + """Test that warning message is thrown when unsupported loss.""" + model = RobustWeightedRegressor(solver="invalid") + msg = "No such solver." + with pytest.raises(ValueError, match=msg): + model.fit(X_rc, y_rc) + + X_r = rng.uniform(-1, 1, size=[1000]) y_r = X_r + 0.1 * rng.normal(size=1000) X_r = X_r.reshape(-1, 1) From e1aaaedb81415ebab9eff4dcc0515ee94f28cb69 Mon Sep 17 00:00:00 2001 From: TMat Date: Tue, 19 Oct 2021 14:29:02 +0200 Subject: [PATCH 6/7] fix test --- .../robust/tests/test_robust_weighted_estimator.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py index 50bf7f3d..f530c8bd 100644 --- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py +++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py @@ -79,13 +79,6 @@ def test_robust_estimator_input_validation_and_fit_check(): with pytest.raises(ValueError, match=msg): RobustWeightedKMeans(c=0).fit(X_cc) - -def test_robust_estimator_unsupported_loss(): - """Test that warning message is thrown when unsupported loss.""" - model = RobustWeightedClassifier(loss="invalid") - msg = "No such solver." - with pytest.raises(ValueError, match=msg): - model.fit(X_cc, y_cc) msg = "burn_in must be >= 0, got -1." with pytest.raises(ValueError, match=msg): RobustWeightedClassifier(burn_in=-1).fit(X_cc, y_cc) @@ -286,7 +279,7 @@ def test_regression_corrupted_weights(weighting): assert reg.weights_[0] < np.mean(reg.weights_[1:]) -def test_robust_regressor_unsupported_solver(): +def test_robust_regression_estimator_unsupported_solver(): """Test that warning message is thrown when unsupported loss.""" model = RobustWeightedRegressor(solver="invalid") msg = "No such solver." From decca0e2bb98278dc213cc758f181d52e071ef86 Mon Sep 17 00:00:00 2001 From: TMat Date: Tue, 19 Oct 2021 16:19:17 +0200 Subject: [PATCH 7/7] fix test clustering new scikit-learn --- sklearn_extra/robust/tests/test_robust_weighted_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py index 00627566..01a9b209 100644 --- a/sklearn_extra/robust/tests/test_robust_weighted_estimator.py +++ b/sklearn_extra/robust/tests/test_robust_weighted_estimator.py @@ -405,7 +405,7 @@ def test_not_robust_cluster(weighting): difference = [ np.linalg.norm(pred1[i] - pred2[i]) for i in range(len(pred1)) ] - assert np.mean(difference) < 1 + assert np.mean(difference) < 2 def test_transform():