diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index ca23360f9d462..2160f8a31207c 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -36,12 +36,14 @@ T = [[-1, -1], [2, 2], [3, 2]] true_result = [1, 2, 2] -# also load the iris dataset -iris = datasets.load_iris() -rng = check_random_state(42) -perm = rng.permutation(iris.target.size) -iris.data = iris.data[perm] -iris.target = iris.target[perm] + +def get_iris_dataset(global_random_seed): + iris = datasets.load_iris() + rng = check_random_state(global_random_seed) + perm = rng.permutation(iris.target.size) + iris.data = iris.data[perm] + iris.target = iris.target[perm] + return iris def test_libsvm_parameters(): @@ -54,9 +56,9 @@ def test_libsvm_parameters(): assert_array_equal(clf.predict(X), Y) -def test_libsvm_iris(): +def test_libsvm_iris(global_random_seed): # Check consistency on dataset iris. - + iris = get_iris_dataset(global_random_seed) # shuffle the dataset so that labels are not ordered for k in ("linear", "rbf"): clf = svm.SVC(kernel=k).fit(iris.data, iris.target) @@ -134,7 +136,7 @@ def test_libsvm_iris(): assert_array_equal(pred, pred2) -def test_precomputed(): +def test_precomputed(global_random_seed): # SVC with a precomputed kernel. # We test it with a toy dataset and with iris. clf = svm.SVC(kernel="precomputed") @@ -183,6 +185,7 @@ def kfunc(x, y): # and check parameters against a linear SVC clf = svm.SVC(kernel="precomputed") clf2 = svm.SVC(kernel="linear") + iris = get_iris_dataset(global_random_seed) K = np.dot(iris.data, iris.data.T) clf.fit(K, iris.target) clf2.fit(iris.data, iris.target) @@ -241,7 +244,7 @@ def test_linearsvr(): assert_almost_equal(score1, score2, 2) -def test_linearsvr_fit_sampleweight(): +def test_linearsvr_fit_sampleweight(global_random_seed): # check correct result when sample_weight is 1 # check that SVR(kernel='linear') and LinearSVC() give # comparable results @@ -265,8 +268,8 @@ def test_linearsvr_fit_sampleweight(): # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where # X = X1 repeated n1 times, X2 repeated n2 times and so forth - random_state = check_random_state(0) - random_weight = random_state.randint(0, 10, n_samples) + rng = np.random.RandomState(global_random_seed) + random_weight = rng.randint(0, 10, n_samples) lsvr_unflat = svm.LinearSVR(C=1e3, tol=1e-12, max_iter=10000).fit( diabetes.data, diabetes.target, sample_weight=random_weight ) @@ -307,20 +310,19 @@ def test_oneclass(): (lambda: clf.coef_)() -def test_oneclass_decision_function(): +def test_oneclass_decision_function(global_random_seed): # Test OneClassSVM decision function - clf = svm.OneClassSVM() - rnd = check_random_state(2) - + rng = np.random.RandomState(global_random_seed) + N = 1000 # Generate train data - X = 0.3 * rnd.randn(100, 2) + X = 0.3 * rng.randn(5 * N, 2) X_train = np.r_[X + 2, X - 2] # Generate some regular novel observations - X = 0.3 * rnd.randn(20, 2) + X = 0.3 * rng.randn(N, 2) X_test = np.r_[X + 2, X - 2] # Generate some abnormal novel observations - X_outliers = rnd.uniform(low=-4, high=4, size=(20, 2)) + X_outliers = rng.uniform(low=-4, high=4, size=(N, 2)) # fit the model clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1) @@ -328,9 +330,9 @@ def test_oneclass_decision_function(): # predict things y_pred_test = clf.predict(X_test) - assert np.mean(y_pred_test == 1) > 0.9 + assert np.mean(y_pred_test == 1) > 0.85 y_pred_outliers = clf.predict(X_outliers) - assert np.mean(y_pred_outliers == -1) > 0.9 + assert np.mean(y_pred_outliers == -1) > 0.85 dec_func_test = clf.decision_function(X_test) assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1) dec_func_outliers = clf.decision_function(X_outliers) @@ -361,13 +363,14 @@ def test_tweak_params(): assert_array_equal(clf.predict([[-0.1, -0.1]]), [2]) -def test_probability(): +def test_probability(global_random_seed): # Predict probabilities using SVC # This uses cross validation, so we use a slightly bigger testing set. + iris = get_iris_dataset(global_random_seed) for clf in ( - svm.SVC(probability=True, random_state=0, C=1.0), - svm.NuSVC(probability=True, random_state=0), + svm.SVC(probability=True, random_state=global_random_seed + 1, C=1.0), + svm.NuSVC(probability=True, random_state=global_random_seed + 2), ): clf.fit(iris.data, iris.target) @@ -380,7 +383,8 @@ def test_probability(): ) -def test_decision_function(): +def test_decision_function(global_random_seed): + iris = get_iris_dataset(global_random_seed) # Test decision_function # Sanity check, test that decision_function implemented in python # returns the same as the one in libsvm @@ -414,36 +418,52 @@ def test_decision_function(): @pytest.mark.parametrize("SVM", (svm.SVC, svm.NuSVC)) -def test_decision_function_shape(SVM): +def test_decision_function_shape(SVM, global_random_seed): # check that decision_function_shape='ovr' or 'ovo' gives # correct shape and is consistent with predict + iris = get_iris_dataset(global_random_seed) - clf = SVM(kernel="linear", decision_function_shape="ovr").fit( - iris.data, iris.target + linear_ovr_svm = SVM( + kernel="linear", + decision_function_shape="ovr", + random_state=global_random_seed + 1, + break_ties=True, ) - dec = clf.decision_function(iris.data) + # we need to use break_ties here so that the prediction won't break ties randomly + # but use the argmax of the decision function. + linear_ovr_svm.fit(iris.data, iris.target) + dec = linear_ovr_svm.decision_function(iris.data) assert dec.shape == (len(iris.data), 3) - assert_array_equal(clf.predict(iris.data), np.argmax(dec, axis=1)) + assert_array_equal(linear_ovr_svm.predict(iris.data), np.argmax(dec, axis=1)) # with five classes: - X, y = make_blobs(n_samples=80, centers=5, random_state=0) - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + X, y = make_blobs(n_samples=80, centers=5, random_state=global_random_seed + 2) + X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=global_random_seed + 3 + ) - clf = SVM(kernel="linear", decision_function_shape="ovr").fit(X_train, y_train) - dec = clf.decision_function(X_test) + linear_ovr_svm.fit(X_train, y_train) + dec = linear_ovr_svm.decision_function(X_test) assert dec.shape == (len(X_test), 5) - assert_array_equal(clf.predict(X_test), np.argmax(dec, axis=1)) + assert_array_equal(linear_ovr_svm.predict(X_test), np.argmax(dec, axis=1)) - # check shape of ovo_decition_function=True - clf = SVM(kernel="linear", decision_function_shape="ovo").fit(X_train, y_train) - dec = clf.decision_function(X_train) + # check shape of ovo_decision_function=True + linear_ovo_svm = SVM( + kernel="linear", + decision_function_shape="ovo", + random_state=global_random_seed + 4, + break_ties=True, + ) + linear_ovo_svm.fit(X_train, y_train) + dec = linear_ovo_svm.decision_function(X_train) assert dec.shape == (len(X_train), 10) -def test_svr_predict(): +def test_svr_predict(global_random_seed): # Test SVR's decision_function # Sanity check, test that predict implemented in python # returns the same as the one in libsvm + iris = get_iris_dataset(global_random_seed) X = iris.data y = iris.target @@ -462,7 +482,7 @@ def test_svr_predict(): assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel()) -def test_weight(): +def test_weight(global_random_seed): # Test class weights clf = svm.SVC(class_weight={1: 0.1}) # we give a small weights to class 1 @@ -471,18 +491,21 @@ def test_weight(): assert_array_almost_equal(clf.predict(X), [2] * 6) X_, y_ = make_classification( - n_samples=200, n_features=10, weights=[0.833, 0.167], random_state=2 + n_samples=2000, + n_features=10, + weights=[0.833, 0.167], + random_state=global_random_seed, ) for clf in ( linear_model.LogisticRegression(), - svm.LinearSVC(random_state=0), + svm.LinearSVC(random_state=global_random_seed + 1), svm.SVC(), ): clf.set_params(class_weight={0: 0.1, 1: 10}) - clf.fit(X_[:100], y_[:100]) - y_pred = clf.predict(X_[100:]) - assert f1_score(y_[100:], y_pred) > 0.3 + clf.fit(X_[:1000], y_[:1000]) + y_pred = clf.predict(X_[1000:]) + assert f1_score(y_[1000:], y_pred) > 0.25 @pytest.mark.parametrize("estimator", [svm.SVC(C=1e-2), svm.NuSVC()]) @@ -632,12 +655,13 @@ def test_negative_weight_equal_coeffs(Estimator, sample_weight): @ignore_warnings(category=UndefinedMetricWarning) -def test_auto_weight(): +def test_auto_weight(global_random_seed): # Test class weights for imbalanced data + iris = get_iris_dataset(global_random_seed) from sklearn.linear_model import LogisticRegression # We take as dataset the two-dimensional projection of iris so - # that it is not separable and remove half of predictors from + # that it is not separable and remove 30 predictors from # class 1. # We add one to the targets as a non-regression test: # class_weight="balanced" @@ -645,16 +669,16 @@ def test_auto_weight(): from sklearn.utils import compute_class_weight X, y = iris.data[:, :2], iris.target + 1 - unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2]) + unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][:30]) classes = np.unique(y[unbalanced]) class_weights = compute_class_weight("balanced", classes=classes, y=y[unbalanced]) assert np.argmax(class_weights) == 2 for clf in ( - svm.SVC(kernel="linear"), - svm.LinearSVC(random_state=0), - LogisticRegression(), + svm.SVC(kernel="linear", random_state=global_random_seed + 1), + svm.LinearSVC(random_state=global_random_seed + 2), + LogisticRegression(random_state=global_random_seed + 3), ): # check that score is better when class='balanced' is set. y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X) @@ -668,14 +692,14 @@ def test_auto_weight(): ) -def test_bad_input(): +def test_bad_input(global_random_seed): # Test dimensions for labels Y2 = Y[:-1] # wrong dimensions for labels with pytest.raises(ValueError): svm.SVC().fit(X, Y2) # Test with arrays that are non-contiguous. - for clf in (svm.SVC(), svm.LinearSVC(random_state=0)): + for clf in (svm.SVC(), svm.LinearSVC(random_state=global_random_seed)): Xf = np.asfortranarray(X) assert not Xf.flags["C_CONTIGUOUS"] yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T) @@ -706,9 +730,9 @@ def test_bad_input(): clf.predict(Xt) -def test_svc_nonfinite_params(): +def test_svc_nonfinite_params(global_random_seed): # Check SVC throws ValueError when dealing with non-finite parameter values - rng = np.random.RandomState(0) + rng = np.random.RandomState(global_random_seed) n_samples = 10 fmax = np.finfo(np.float64).max X = fmax * rng.uniform(size=(n_samples, 2)) @@ -720,8 +744,10 @@ def test_svc_nonfinite_params(): clf.fit(X, y) -def test_unicode_kernel(): +def test_unicode_kernel(global_random_seed): # Test that a unicode kernel name does not cause a TypeError + iris = get_iris_dataset(global_random_seed) + clf = svm.SVC(kernel="linear", probability=True) clf.fit(X, Y) clf.predict_proba(T) @@ -752,12 +778,16 @@ def test_sparse_fit_support_vectors_empty(): @pytest.mark.parametrize("loss", ["hinge", "squared_hinge"]) @pytest.mark.parametrize("penalty", ["l1", "l2"]) @pytest.mark.parametrize("dual", [True, False]) -def test_linearsvc_parameters(loss, penalty, dual): +def test_linearsvc_parameters(loss, penalty, dual, global_random_seed): # Test possible parameter combinations in LinearSVC # Generate list of possible parameter combinations - X, y = make_classification(n_samples=5, n_features=5, random_state=0) + X, y = make_classification( + n_samples=5, n_features=5, random_state=global_random_seed + ) - clf = svm.LinearSVC(penalty=penalty, loss=loss, dual=dual, random_state=0) + clf = svm.LinearSVC( + penalty=penalty, loss=loss, dual=dual, random_state=global_random_seed + 1 + ) if ( (loss, penalty) == ("hinge", "l1") or (loss, penalty, dual) == ("hinge", "l2", False) @@ -773,9 +803,9 @@ def test_linearsvc_parameters(loss, penalty, dual): clf.fit(X, y) -def test_linearsvc(): +def test_linearsvc(global_random_seed): # Test basic routines using LinearSVC - clf = svm.LinearSVC(random_state=0).fit(X, Y) + clf = svm.LinearSVC(random_state=global_random_seed).fit(X, Y) # by default should have intercept assert clf.fit_intercept @@ -785,16 +815,23 @@ def test_linearsvc(): # the same with l1 penalty clf = svm.LinearSVC( - penalty="l1", loss="squared_hinge", dual=False, random_state=0 + penalty="l1", + loss="squared_hinge", + dual=False, + random_state=global_random_seed + 1, ).fit(X, Y) assert_array_equal(clf.predict(T), true_result) # l2 penalty with dual formulation - clf = svm.LinearSVC(penalty="l2", dual=True, random_state=0).fit(X, Y) + clf = svm.LinearSVC( + penalty="l2", dual=True, random_state=global_random_seed + 2 + ).fit(X, Y) assert_array_equal(clf.predict(T), true_result) # l2 penalty, l1 loss - clf = svm.LinearSVC(penalty="l2", loss="hinge", dual=True, random_state=0) + clf = svm.LinearSVC( + penalty="l2", loss="hinge", dual=True, random_state=global_random_seed + 3 + ) clf.fit(X, Y) assert_array_equal(clf.predict(T), true_result) @@ -804,10 +841,16 @@ def test_linearsvc(): assert_array_equal(res, true_result) -def test_linearsvc_crammer_singer(): +def test_linearsvc_crammer_singer(global_random_seed): # Test LinearSVC with crammer_singer multi-class svm - ovr_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target) - cs_clf = svm.LinearSVC(multi_class="crammer_singer", random_state=0) + iris = get_iris_dataset(global_random_seed) + + ovr_clf = svm.LinearSVC(random_state=global_random_seed + 1).fit( + iris.data, iris.target + ) + cs_clf = svm.LinearSVC( + multi_class="crammer_singer", random_state=global_random_seed + 2 + ) cs_clf.fit(iris.data, iris.target) # similar prediction for ovr and crammer-singer: @@ -825,14 +868,14 @@ def test_linearsvc_crammer_singer(): assert_array_almost_equal(dec_func, cs_clf.decision_function(iris.data)) -def test_linearsvc_fit_sampleweight(): +def test_linearsvc_fit_sampleweight(global_random_seed): # check correct result when sample_weight is 1 n_samples = len(X) unit_weight = np.ones(n_samples) - clf = svm.LinearSVC(random_state=0).fit(X, Y) - clf_unitweight = svm.LinearSVC(random_state=0, tol=1e-12, max_iter=1000).fit( - X, Y, sample_weight=unit_weight - ) + clf = svm.LinearSVC(random_state=global_random_seed).fit(X, Y) + clf_unitweight = svm.LinearSVC( + random_state=global_random_seed + 1, tol=1e-12, max_iter=1000 + ).fit(X, Y, sample_weight=unit_weight) # check if same as sample_weight=None assert_array_equal(clf_unitweight.predict(T), clf.predict(T)) @@ -841,46 +884,50 @@ def test_linearsvc_fit_sampleweight(): # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where # X = X1 repeated n1 times, X2 repeated n2 times and so forth - random_state = check_random_state(0) - random_weight = random_state.randint(0, 10, n_samples) - lsvc_unflat = svm.LinearSVC(random_state=0, tol=1e-12, max_iter=1000).fit( - X, Y, sample_weight=random_weight - ) + rng = np.random.RandomState(global_random_seed) + random_weight = rng.randint(0, 10, n_samples) + lsvc_unflat = svm.LinearSVC( + random_state=global_random_seed + 2, tol=1e-12, max_iter=1000 + ).fit(X, Y, sample_weight=random_weight) pred1 = lsvc_unflat.predict(T) X_flat = np.repeat(X, random_weight, axis=0) y_flat = np.repeat(Y, random_weight, axis=0) - lsvc_flat = svm.LinearSVC(random_state=0, tol=1e-12, max_iter=1000).fit( - X_flat, y_flat - ) + lsvc_flat = svm.LinearSVC( + random_state=global_random_seed + 3, tol=1e-12, max_iter=1000 + ).fit(X_flat, y_flat) pred2 = lsvc_flat.predict(T) assert_array_equal(pred1, pred2) assert_allclose(lsvc_unflat.coef_, lsvc_flat.coef_, 1, 0.0001) -def test_crammer_singer_binary(): +def test_crammer_singer_binary(global_random_seed): # Test Crammer-Singer formulation in the binary case - X, y = make_classification(n_classes=2, random_state=0) + X, y = make_classification( + n_classes=2, random_state=global_random_seed, n_samples=256 + ) for fit_intercept in (True, False): acc = ( svm.LinearSVC( fit_intercept=fit_intercept, multi_class="crammer_singer", - random_state=0, + random_state=global_random_seed + 1, ) .fit(X, y) .score(X, y) ) - assert acc > 0.9 + assert acc > 0.85 -def test_linearsvc_iris(): +def test_linearsvc_iris(global_random_seed): + iris = get_iris_dataset(global_random_seed) + # Test that LinearSVC gives plausible predictions on the iris dataset # Also, test symbolic class names (classes_). target = iris.target_names[iris.target] - clf = svm.LinearSVC(random_state=0).fit(iris.data, target) + clf = svm.LinearSVC(random_state=global_random_seed + 1).fit(iris.data, target) assert set(clf.classes_) == set(iris.target_names) assert np.mean(clf.predict(iris.data) == target) > 0.8 @@ -889,7 +936,9 @@ def test_linearsvc_iris(): assert_array_equal(pred, clf.predict(iris.data)) -def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC): +def test_dense_liblinear_intercept_handling( + classifier=svm.LinearSVC, global_random_seed=42 +): # Test that dense liblinear honours intercept_scaling param X = [[2, 1], [3, 1], [1, 3], [2, 3]] y = [0, 0, 1, 1] @@ -900,7 +949,7 @@ def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC): dual=False, C=4, tol=1e-7, - random_state=0, + random_state=global_random_seed, ) assert clf.intercept_scaling == 1, clf.intercept_scaling assert clf.fit_intercept @@ -926,7 +975,9 @@ def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC): assert_array_almost_equal(intercept1, intercept2, decimal=2) -def test_liblinear_set_coef(): +def test_liblinear_set_coef(global_random_seed): + iris = get_iris_dataset(global_random_seed) + # multi-class case clf = svm.LinearSVC().fit(iris.data, iris.target) values = clf.decision_function(iris.data) @@ -947,7 +998,9 @@ def test_liblinear_set_coef(): assert_array_equal(values, values2) -def test_immutable_coef_property(): +def test_immutable_coef_property(global_random_seed): + iris = get_iris_dataset(global_random_seed) + # Check that primal coef modification are not silently ignored svms = [ svm.SVC(kernel="linear").fit(iris.data, iris.target), @@ -978,13 +1031,15 @@ def test_linearsvc_verbose(): os.dup2(stdout, 1) # restore original stdout -def test_svc_clone_with_callable_kernel(): +def test_svc_clone_with_callable_kernel(global_random_seed): + iris = get_iris_dataset(global_random_seed) + # create SVM with callable linear kernel, check that results are the same # as with built-in linear kernel svm_callable = svm.SVC( kernel=lambda x, y: np.dot(x, y.T), probability=True, - random_state=0, + random_state=global_random_seed + 1, decision_function_shape="ovr", ) # clone for checking clonability with lambda functions.. @@ -992,7 +1047,10 @@ def test_svc_clone_with_callable_kernel(): svm_cloned.fit(iris.data, iris.target) svm_builtin = svm.SVC( - kernel="linear", probability=True, random_state=0, decision_function_shape="ovr" + kernel="linear", + probability=True, + random_state=global_random_seed + 1, + decision_function_shape="ovr", ) svm_builtin.fit(iris.data, iris.target) @@ -1017,9 +1075,12 @@ def test_svc_bad_kernel(): svc.fit(X, Y) -def test_libsvm_convergence_warnings(): +def test_libsvm_convergence_warnings(global_random_seed): a = svm.SVC( - kernel=lambda x, y: np.dot(x, y.T), probability=True, random_state=0, max_iter=2 + kernel=lambda x, y: np.dot(x, y.T), + probability=True, + random_state=global_random_seed, + max_iter=2, ) warning_msg = ( r"Solver terminated early \(max_iter=2\). Consider pre-processing " @@ -1044,18 +1105,20 @@ def test_unfitted(): # ignore convergence warnings from max_iter=1 @ignore_warnings -def test_consistent_proba(): - a = svm.SVC(probability=True, max_iter=1, random_state=0) +def test_consistent_proba(global_random_seed): + a = svm.SVC(probability=True, max_iter=1, random_state=global_random_seed) proba_1 = a.fit(X, Y).predict_proba(X) - a = svm.SVC(probability=True, max_iter=1, random_state=0) + a = svm.SVC(probability=True, max_iter=1, random_state=global_random_seed) proba_2 = a.fit(X, Y).predict_proba(X) assert_array_almost_equal(proba_1, proba_2) -def test_linear_svm_convergence_warnings(): +def test_linear_svm_convergence_warnings(global_random_seed): + iris = get_iris_dataset(global_random_seed) + # Test that warnings are raised if model does not converge - lsvc = svm.LinearSVC(random_state=0, max_iter=2) + lsvc = svm.LinearSVC(random_state=global_random_seed + 1, max_iter=2) warning_msg = "Liblinear failed to converge, increase the number of iterations." with pytest.warns(ConvergenceWarning, match=warning_msg): lsvc.fit(X, Y) @@ -1064,18 +1127,19 @@ def test_linear_svm_convergence_warnings(): assert isinstance(lsvc.n_iter_, int) assert lsvc.n_iter_ == 2 - lsvr = svm.LinearSVR(random_state=0, max_iter=2) + lsvr = svm.LinearSVR(random_state=global_random_seed + 2, max_iter=2) with pytest.warns(ConvergenceWarning, match=warning_msg): lsvr.fit(iris.data, iris.target) assert isinstance(lsvr.n_iter_, int) assert lsvr.n_iter_ == 2 -def test_svr_coef_sign(): +def test_svr_coef_sign(global_random_seed): # Test that SVR(kernel="linear") has coef_ with the right sign. # Non-regression test for #2933. - X = np.random.RandomState(21).randn(10, 3) - y = np.random.RandomState(12).randn(10) + rng = np.random.RandomState(global_random_seed) + X = rng.randn(10, 3) + y = rng.randn(10) for svr in [svm.SVR(kernel="linear"), svm.NuSVR(kernel="linear"), svm.LinearSVR()]: svr.fit(X, y) @@ -1092,7 +1156,9 @@ def test_lsvc_intercept_scaling_zero(): assert lsvc.intercept_ == 0.0 -def test_hasattr_predict_proba(): +def test_hasattr_predict_proba(global_random_seed): + iris = get_iris_dataset(global_random_seed) + # Method must be (un)available before or after fit, switched by # `probability` param @@ -1116,9 +1182,9 @@ def test_hasattr_predict_proba(): G.predict_proba(iris.data) -def test_decision_function_shape_two_class(): +def test_decision_function_shape_two_class(global_random_seed): for n_classes in [2, 3]: - X, y = make_blobs(centers=n_classes, random_state=0) + X, y = make_blobs(centers=n_classes, random_state=global_random_seed) for estimator in [svm.SVC, svm.NuSVC]: clf = OneVsRestClassifier(estimator(decision_function_shape="ovr")).fit( X, y @@ -1171,11 +1237,14 @@ def test_ovr_decision_function(): @pytest.mark.parametrize("SVCClass", [svm.SVC, svm.NuSVC]) -def test_svc_invalid_break_ties_param(SVCClass): - X, y = make_blobs(random_state=42) +def test_svc_invalid_break_ties_param(SVCClass, global_random_seed): + X, y = make_blobs(random_state=global_random_seed) svm = SVCClass( - kernel="linear", decision_function_shape="ovo", break_ties=True, random_state=42 + kernel="linear", + decision_function_shape="ovo", + break_ties=True, + random_state=global_random_seed + 1, ).fit(X, y) with pytest.raises(ValueError, match="break_ties must be False"): @@ -1183,18 +1252,21 @@ def test_svc_invalid_break_ties_param(SVCClass): @pytest.mark.parametrize("SVCClass", [svm.SVC, svm.NuSVC]) -def test_svc_ovr_tie_breaking(SVCClass): +def test_svc_ovr_tie_breaking(SVCClass, global_random_seed): """Test if predict breaks ties in OVR mode. Related issue: https://github.com/scikit-learn/scikit-learn/issues/8277 """ - X, y = make_blobs(random_state=0, n_samples=20, n_features=2) + X, y = make_blobs(random_state=global_random_seed, n_samples=20, n_features=2) xs = np.linspace(X[:, 0].min(), X[:, 0].max(), 100) ys = np.linspace(X[:, 1].min(), X[:, 1].max(), 100) xx, yy = np.meshgrid(xs, ys) common_params = dict( - kernel="rbf", gamma=1e6, random_state=42, decision_function_shape="ovr" + kernel="rbf", + gamma=1e6, + random_state=global_random_seed + 1, + decision_function_shape="ovr", ) svm = SVCClass( break_ties=False, @@ -1233,7 +1305,7 @@ def test_gamma_scale(): (LinearSVR, {"loss": "squared_epsilon_insensitive", "dual": True}), ], ) -def test_linearsvm_liblinear_sample_weight(SVM, params): +def test_linearsvm_liblinear_sample_weight(SVM, params, global_random_seed): X = np.array( [ [1, 3], @@ -1263,9 +1335,11 @@ def test_linearsvm_liblinear_sample_weight(SVM, params): y2 = np.hstack([y, 3 - y]) sample_weight = np.ones(shape=len(y) * 2) sample_weight[len(y) :] = 0 - X2, y2, sample_weight = shuffle(X2, y2, sample_weight, random_state=0) + X2, y2, sample_weight = shuffle( + X2, y2, sample_weight, random_state=global_random_seed + ) - base_estimator = SVM(random_state=42) + base_estimator = SVM(random_state=global_random_seed + 1) base_estimator.set_params(**params) base_estimator.set_params(tol=1e-12, max_iter=1000) est_no_weight = base.clone(base_estimator).fit(X, y) @@ -1275,9 +1349,9 @@ def test_linearsvm_liblinear_sample_weight(SVM, params): for method in ("predict", "decision_function"): if hasattr(base_estimator, method): - X_est_no_weight = getattr(est_no_weight, method)(X) - X_est_with_weight = getattr(est_with_weight, method)(X) - assert_allclose(X_est_no_weight, X_est_with_weight) + result_without_weight = getattr(est_no_weight, method)(X) + result_with_weight = getattr(est_with_weight, method)(X) + assert_allclose(result_without_weight, result_with_weight, atol=1e-6) @pytest.mark.parametrize("Klass", (OneClassSVM, SVR, NuSVR)) @@ -1356,14 +1430,13 @@ def test_svc_raises_error_internal_representation(): ], ) @pytest.mark.parametrize( - "dataset", - [ - make_classification(n_classes=2, n_informative=2, random_state=0), - make_classification(n_classes=3, n_informative=3, random_state=0), - make_classification(n_classes=4, n_informative=4, random_state=0), - ], + "n_classes", + [2, 3, 4], ) -def test_n_iter_libsvm(estimator, expected_n_iter_type, dataset): +def test_n_iter_libsvm(estimator, expected_n_iter_type, n_classes, global_random_seed): + dataset = make_classification( + n_classes=n_classes, n_informative=n_classes, random_state=global_random_seed + ) # Check that the type of n_iter_ is correct for the classes that inherit # from BaseSVC. # Note that for SVC, and NuSVC this is an ndarray; while for SVR, NuSVR, and