diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index ef46d5a3fe5f1..32f49dd77d3bf 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1029,7 +1029,7 @@ Vector Machine, a Decision Tree, and a K-nearest neighbor classifier:: >>> # Training classifiers >>> clf1 = DecisionTreeClassifier(max_depth=4) >>> clf2 = KNeighborsClassifier(n_neighbors=7) - >>> clf3 = SVC(kernel='rbf', probability=True) + >>> clf3 = SVC(gamma='scale', kernel='rbf', probability=True) >>> eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)], voting='soft', weights=[2,1,2]) >>> clf1 = clf1.fit(X,y) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 03aacf194df7d..36045a7fa6199 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -98,9 +98,9 @@ Usage examples: >>> from sklearn.model_selection import cross_val_score >>> iris = datasets.load_iris() >>> X, y = iris.data, iris.target - >>> clf = svm.SVC(probability=True, random_state=0) + >>> clf = svm.SVC(gamma='scale', probability=True, random_state=0) >>> cross_val_score(clf, X, y, scoring='neg_log_loss') # doctest: +ELLIPSIS - array([-0.07..., -0.16..., -0.06...]) + array([-0.09..., -0.16..., -0.07...]) >>> model = svm.SVC() >>> cross_val_score(model, X, y, scoring='wrong_choice') Traceback (most recent call last): @@ -1775,7 +1775,7 @@ Next, let's compare the accuracy of ``SVC`` and ``most_frequent``:: We see that ``SVC`` doesn't do much better than a dummy classifier. Now, let's change the kernel:: - >>> clf = SVC(kernel='rbf', C=1).fit(X_train, y_train) + >>> clf = SVC(gamma='scale', kernel='rbf', C=1).fit(X_train, y_train) >>> clf.score(X_test, y_test) # doctest: +ELLIPSIS 0.97... diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst index d64657717ba79..15ecf3c2d88f6 100644 --- a/doc/modules/model_persistence.rst +++ b/doc/modules/model_persistence.rst @@ -18,12 +18,12 @@ persistence model, namely `pickle >> from sklearn import svm >>> from sklearn import datasets - >>> clf = svm.SVC() + >>> clf = svm.SVC(gamma='scale') >>> iris = datasets.load_iris() >>> X, y = iris.data, iris.target >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', + decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst index 0667d259c4b24..bc03640d97a3f 100644 --- a/doc/modules/pipeline.rst +++ b/doc/modules/pipeline.rst @@ -172,7 +172,7 @@ object:: >>> from sklearn.datasets import load_digits >>> digits = load_digits() >>> pca1 = PCA() - >>> svm1 = SVC() + >>> svm1 = SVC(gamma='scale') >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)]) >>> pipe.fit(digits.data, digits.target) ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS @@ -193,7 +193,7 @@ object:: >>> cachedir = mkdtemp() >>> pca2 = PCA() - >>> svm2 = SVC() + >>> svm2 = SVC(gamma='scale') >>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)], ... memory=cachedir) >>> cached_pipe.fit(digits.data, digits.target) diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst index 8f253437690c3..f0c6a0e76a576 100644 --- a/doc/modules/svm.rst +++ b/doc/modules/svm.rst @@ -75,10 +75,10 @@ n_features]`` holding the training samples, and an array y of class labels >>> from sklearn import svm >>> X = [[0, 0], [1, 1]] >>> y = [0, 1] - >>> clf = svm.SVC() + >>> clf = svm.SVC(gamma='scale') >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', + decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) @@ -119,10 +119,10 @@ n_classes)``:: >>> X = [[0], [1], [2], [3]] >>> Y = [0, 1, 2, 3] - >>> clf = svm.SVC(decision_function_shape='ovo') + >>> clf = svm.SVC(gamma='scale', decision_function_shape='ovo') >>> clf.fit(X, Y) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovo', degree=3, gamma='auto', kernel='rbf', + decision_function_shape='ovo', degree=3, gamma='scale', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) >>> dec = clf.decision_function([[1]]) @@ -318,8 +318,9 @@ floating point values instead of integer values:: >>> y = [0.5, 2.5] >>> clf = svm.SVR() >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE - SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto', - kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) + SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, + gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True, + tol=0.001, verbose=False) >>> clf.predict([[1, 1]]) array([ 1.5]) @@ -534,7 +535,7 @@ test vectors must be provided. >>> gram = np.dot(X, X.T) >>> clf.fit(gram, y) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', + decision_function_shape='ovr', degree=3, gamma='auto_deprecated', kernel='precomputed', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) >>> # predict on training examples diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst index a29162269d53a..edb2f046d7bee 100644 --- a/doc/tutorial/basic/tutorial.rst +++ b/doc/tutorial/basic/tutorial.rst @@ -216,12 +216,12 @@ persistence model, `pickle `_:: >>> from sklearn import svm >>> from sklearn import datasets - >>> clf = svm.SVC() + >>> clf = svm.SVC(gamma='scale') >>> iris = datasets.load_iris() >>> X, y = iris.data, iris.target >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', + decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) @@ -291,10 +291,10 @@ maintained:: >>> from sklearn import datasets >>> from sklearn.svm import SVC >>> iris = datasets.load_iris() - >>> clf = SVC() + >>> clf = SVC(gamma='scale') >>> clf.fit(iris.data, iris.target) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', + decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) @@ -303,7 +303,7 @@ maintained:: >>> clf.fit(iris.data, iris.target_names[iris.target]) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', + decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) @@ -332,19 +332,19 @@ more than once will overwrite what was learned by any previous ``fit()``:: >>> clf = SVC() >>> clf.set_params(kernel='linear').fit(X, y) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + decision_function_shape='ovr', degree=3, gamma='auto_deprecated', + kernel='linear', max_iter=-1, probability=False, random_state=None, + shrinking=True, tol=0.001, verbose=False) >>> clf.predict(X_test) array([1, 0, 1, 1, 0]) - >>> clf.set_params(kernel='rbf').fit(X, y) # doctest: +NORMALIZE_WHITESPACE + >>> clf.set_params(kernel='rbf', gamma='scale').fit(X, y) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', + decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) >>> clf.predict(X_test) - array([0, 0, 0, 1, 0]) + array([1, 0, 1, 1, 0]) Here, the default kernel ``rbf`` is first changed to ``linear`` after the estimator has been constructed via ``SVC()``, and changed back to ``rbf`` to @@ -364,7 +364,8 @@ the target data fit upon:: >>> X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]] >>> y = [0, 0, 1, 1, 2] - >>> classif = OneVsRestClassifier(estimator=SVC(random_state=0)) + >>> classif = OneVsRestClassifier(estimator=SVC(gamma='scale', + ... random_state=0)) >>> classif.fit(X, y).predict(X) array([0, 0, 1, 1, 2]) diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst index 6f53e9af791c5..49e69d9ec80d4 100644 --- a/doc/tutorial/statistical_inference/supervised_learning.rst +++ b/doc/tutorial/statistical_inference/supervised_learning.rst @@ -455,9 +455,9 @@ classification --:class:`SVC` (Support Vector Classification). >>> svc = svm.SVC(kernel='linear') >>> svc.fit(iris_X_train, iris_y_train) # doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + decision_function_shape='ovr', degree=3, gamma='auto_deprecated', + kernel='linear', max_iter=-1, probability=False, random_state=None, + shrinking=True, tol=0.001, verbose=False) .. warning:: **Normalizing data** diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index dd9895f7e8577..33eb61d6f2cc3 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -399,6 +399,12 @@ Linear, kernelized and related models estimators will report at most ``max_iter`` iterations even if more were performed. :issue:`10723` by `Joel Nothman`_. +- The default value of ``gamma`` parameter of :class:`svm.SVC`, + :class:`svm.NuSVC`, :class:`svm.SVR`, :class:`NuSVR`, :class:`OneClassSVM` + will change from ``'auto'`` to ``'scale'`` in version 0.22 to account + better for unscaled features. :issue:`8361` by :user:`Gaurav Dhingra ` + and :user:`Ting Neo `. + Metrics - Deprecate ``reorder`` parameter in :func:`metrics.auc` as it's no longer required diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index 50820d4512b5b..014e613ccd260 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -69,7 +69,7 @@ def test_classification(): Perceptron(tol=1e-3), DecisionTreeClassifier(), KNeighborsClassifier(), - SVC()]: + SVC(gamma="scale")]: for params in grid: BaggingClassifier(base_estimator=base_estimator, random_state=rng, @@ -115,7 +115,8 @@ def fit(self, X, y): for f in ['predict', 'predict_proba', 'predict_log_proba', 'decision_function']: # Trained on sparse format sparse_classifier = BaggingClassifier( - base_estimator=CustomSVC(decision_function_shape='ovr'), + base_estimator=CustomSVC(gamma='scale', + decision_function_shape='ovr'), random_state=1, **params ).fit(X_train_sparse, y_train) @@ -123,12 +124,13 @@ def fit(self, X, y): # Trained on dense format dense_classifier = BaggingClassifier( - base_estimator=CustomSVC(decision_function_shape='ovr'), + base_estimator=CustomSVC(gamma='scale', + decision_function_shape='ovr'), random_state=1, **params ).fit(X_train, y_train) dense_results = getattr(dense_classifier, f)(X_test) - assert_array_equal(sparse_results, dense_results) + assert_array_almost_equal(sparse_results, dense_results) sparse_type = type(X_train_sparse) types = [i.data_type_ for i in sparse_classifier.estimators_] @@ -151,7 +153,7 @@ def test_regression(): DummyRegressor(), DecisionTreeRegressor(), KNeighborsRegressor(), - SVR()]: + SVR(gamma='scale')]: for params in grid: BaggingRegressor(base_estimator=base_estimator, random_state=rng, @@ -197,7 +199,7 @@ def fit(self, X, y): # Trained on sparse format sparse_classifier = BaggingRegressor( - base_estimator=CustomSVR(), + base_estimator=CustomSVR(gamma='scale'), random_state=1, **params ).fit(X_train_sparse, y_train) @@ -205,7 +207,7 @@ def fit(self, X, y): # Trained on dense format dense_results = BaggingRegressor( - base_estimator=CustomSVR(), + base_estimator=CustomSVR(gamma='scale'), random_state=1, **params ).fit(X_train, y_train).predict(X_test) @@ -310,7 +312,7 @@ def test_oob_score_classification(): iris.target, random_state=rng) - for base_estimator in [DecisionTreeClassifier(), SVC()]: + for base_estimator in [DecisionTreeClassifier(), SVC(gamma="scale")]: clf = BaggingClassifier(base_estimator=base_estimator, n_estimators=100, bootstrap=True, @@ -440,7 +442,8 @@ def test_parallel_classification(): assert_array_almost_equal(y1, y3) # decision_function - ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'), + ensemble = BaggingClassifier(SVC(gamma='scale', + decision_function_shape='ovr'), n_jobs=3, random_state=0).fit(X_train, y_train) @@ -457,7 +460,8 @@ def test_parallel_classification(): "".format(X_test.shape[1], X_err.shape[1]), ensemble.decision_function, X_err) - ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'), + ensemble = BaggingClassifier(SVC(gamma='scale', + decision_function_shape='ovr'), n_jobs=1, random_state=0).fit(X_train, y_train) @@ -501,7 +505,7 @@ def test_gridsearch(): parameters = {'n_estimators': (1, 2), 'base_estimator__C': (1, 2)} - GridSearchCV(BaggingClassifier(SVC()), + GridSearchCV(BaggingClassifier(SVC(gamma="scale")), parameters, scoring="roc_auc").fit(X, y) @@ -550,7 +554,7 @@ def test_base_estimator(): assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor)) - ensemble = BaggingRegressor(SVR(), + ensemble = BaggingRegressor(SVR(gamma='scale'), n_jobs=3, random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, SVR)) diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py index ef6c0c739763b..d5a8e055f5d45 100644 --- a/sklearn/ensemble/tests/test_voting_classifier.py +++ b/sklearn/ensemble/tests/test_voting_classifier.py @@ -251,7 +251,7 @@ def test_sample_weight(): """Tests sample_weight parameter of VotingClassifier""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) - clf3 = SVC(probability=True, random_state=123) + clf3 = SVC(gamma='scale', probability=True, random_state=123) eclf1 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('svc', clf3)], voting='soft').fit(X, y, sample_weight=np.ones((len(y),))) diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index b6912de138dd6..4a8a806ed6a62 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -280,29 +280,27 @@ def test_error(): def test_base_estimator(): # Test different base estimators. from sklearn.ensemble import RandomForestClassifier - from sklearn.svm import SVC # XXX doesn't work with y_class because RF doesn't support classes_ # Shouldn't AdaBoost run a LabelBinarizer? clf = AdaBoostClassifier(RandomForestClassifier()) clf.fit(X, y_regr) - clf = AdaBoostClassifier(SVC(), algorithm="SAMME") + clf = AdaBoostClassifier(SVC(gamma="scale"), algorithm="SAMME") clf.fit(X, y_class) from sklearn.ensemble import RandomForestRegressor - from sklearn.svm import SVR clf = AdaBoostRegressor(RandomForestRegressor(), random_state=0) clf.fit(X, y_regr) - clf = AdaBoostRegressor(SVR(), random_state=0) + clf = AdaBoostRegressor(SVR(gamma='scale'), random_state=0) clf.fit(X, y_regr) # Check that an empty discrete ensemble fails in fit, not predict. X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]] y_fail = ["foo", "bar", 1, 2] - clf = AdaBoostClassifier(SVC(), algorithm="SAMME") + clf = AdaBoostClassifier(SVC(gamma="scale"), algorithm="SAMME") assert_raises_regexp(ValueError, "worse than random", clf.fit, X_fail, y_fail) @@ -344,14 +342,14 @@ def fit(self, X, y, sample_weight=None): # Trained on sparse format sparse_classifier = AdaBoostClassifier( - base_estimator=CustomSVC(probability=True), + base_estimator=CustomSVC(gamma='scale', probability=True), random_state=1, algorithm="SAMME" ).fit(X_train_sparse, y_train) # Trained on dense format dense_classifier = AdaBoostClassifier( - base_estimator=CustomSVC(probability=True), + base_estimator=CustomSVC(gamma='scale', probability=True), random_state=1, algorithm="SAMME" ).fit(X_train, y_train) @@ -438,13 +436,13 @@ def fit(self, X, y, sample_weight=None): # Trained on sparse format sparse_classifier = AdaBoostRegressor( - base_estimator=CustomSVR(), + base_estimator=CustomSVR(gamma='scale'), random_state=1 ).fit(X_train_sparse, y_train) # Trained on dense format dense_classifier = dense_results = AdaBoostRegressor( - base_estimator=CustomSVR(), + base_estimator=CustomSVR(gamma='scale'), random_state=1 ).fit(X_train, y_train) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 630de2860c637..6ba673d2fbf7c 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -740,7 +740,7 @@ class GridSearchCV(BaseSearchCV): >>> from sklearn import svm, grid_search, datasets >>> iris = datasets.load_iris() >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]} - >>> svr = svm.SVC() + >>> svr = svm.SVC(gamma="scale") >>> clf = grid_search.GridSearchCV(svr, parameters) >>> clf.fit(iris.data, iris.target) ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 4af8a03c3e194..d9e7d3166b8ff 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -937,7 +937,7 @@ class GridSearchCV(BaseSearchCV): >>> from sklearn.model_selection import GridSearchCV >>> iris = datasets.load_iris() >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]} - >>> svc = svm.SVC() + >>> svc = svm.SVC(gamma="scale") >>> clf = GridSearchCV(svc, parameters) >>> clf.fit(iris.data, iris.target) ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 6412a14c036ed..87d93894683b8 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -484,7 +484,7 @@ def test_grid_search_bad_param_grid(): GridSearchCV, clf, param_dict) param_dict = {"C": []} - clf = SVC() + clf = SVC(gamma="scale") assert_raise_message( ValueError, "Parameter values for parameter (C) need to be a non-empty sequence.", @@ -499,7 +499,7 @@ def test_grid_search_bad_param_grid(): GridSearchCV, clf, param_dict) param_dict = {"C": np.ones(6).reshape(3, 2)} - clf = SVC() + clf = SVC(gamma="scale") assert_raises(ValueError, GridSearchCV, clf, param_dict) @@ -828,7 +828,8 @@ def test_grid_search_cv_results(): n_candidates = n_grid_points for iid in (False, True): - search = GridSearchCV(SVC(), cv=n_splits, iid=iid, param_grid=params) + search = GridSearchCV(SVC(gamma='scale'), cv=n_splits, iid=iid, + param_grid=params) search.fit(X, y) assert_equal(iid, search.iid) cv_results = search.cv_results_ @@ -878,8 +879,9 @@ def test_random_search_cv_results(): n_cand = n_search_iter for iid in (False, True): - search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_splits, - iid=iid, param_distributions=params) + search = RandomizedSearchCV(SVC(gamma='scale'), n_iter=n_search_iter, + cv=n_splits, iid=iid, + param_distributions=params) search.fit(X, y) assert_equal(iid, search.iid) cv_results = search.cv_results_ @@ -908,7 +910,8 @@ def test_search_iid_param(): # create "cv" for splits cv = [[mask, ~mask], [~mask, mask]] # once with iid=True (default) - grid_search = GridSearchCV(SVC(), param_grid={'C': [1, 10]}, cv=cv) + grid_search = GridSearchCV(SVC(), param_grid={'C': [1, 10]}, + cv=cv) random_search = RandomizedSearchCV(SVC(), n_iter=2, param_distributions={'C': [1, 10]}, cv=cv) @@ -942,7 +945,8 @@ def test_search_iid_param(): assert_almost_equal(test_mean, expected_test_mean) assert_almost_equal(test_std, expected_test_std) assert_array_almost_equal(test_cv_scores, - cross_val_score(SVC(C=1), X, y, cv=cv)) + cross_val_score(SVC(C=1), X, + y, cv=cv)) # For the train scores, we do not take a weighted mean irrespective of # i.i.d. or not @@ -998,9 +1002,9 @@ def test_grid_search_cv_results_multimetric(): for scoring in ({'accuracy': make_scorer(accuracy_score), 'recall': make_scorer(recall_score)}, 'accuracy', 'recall'): - grid_search = GridSearchCV(SVC(), cv=n_splits, iid=iid, - param_grid=params, scoring=scoring, - refit=False) + grid_search = GridSearchCV(SVC(gamma='scale'), cv=n_splits, + iid=iid, param_grid=params, + scoring=scoring, refit=False) grid_search.fit(X, y) assert_equal(grid_search.iid, iid) grid_searches.append(grid_search) @@ -1095,8 +1099,8 @@ def test_search_cv_results_rank_tie_breaking(): # which would result in a tie of their mean cv-scores param_grid = {'C': [1, 1.001, 0.001]} - grid_search = GridSearchCV(SVC(), param_grid=param_grid) - random_search = RandomizedSearchCV(SVC(), n_iter=3, + grid_search = GridSearchCV(SVC(gamma="scale"), param_grid=param_grid) + random_search = RandomizedSearchCV(SVC(gamma="scale"), n_iter=3, param_distributions=param_grid) for search in (grid_search, random_search): @@ -1282,7 +1286,7 @@ def test_predict_proba_disabled(): # Test predict_proba when disabled on estimator. X = np.arange(20).reshape(5, -1) y = [0, 0, 1, 1, 1] - clf = SVC(probability=False) + clf = SVC(gamma='scale', probability=False) gs = GridSearchCV(clf, {}, cv=2).fit(X, y) assert_false(hasattr(gs, "predict_proba")) @@ -1536,18 +1540,18 @@ def test_deprecated_grid_search_iid(): depr_message = ("The default of the `iid` parameter will change from True " "to False in version 0.22") X, y = make_blobs(n_samples=54, random_state=0, centers=2) - grid = GridSearchCV(SVC(), param_grid={'C': [1]}, cv=3) + grid = GridSearchCV(SVC(gamma='scale'), param_grid={'C': [1]}, cv=3) # no warning with equally sized test sets assert_no_warnings(grid.fit, X, y) - grid = GridSearchCV(SVC(), param_grid={'C': [1]}, cv=5) + grid = GridSearchCV(SVC(gamma='scale'), param_grid={'C': [1]}, cv=5) # warning because 54 % 5 != 0 assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y) - grid = GridSearchCV(SVC(), param_grid={'C': [1]}, cv=2) + grid = GridSearchCV(SVC(gamma='scale'), param_grid={'C': [1]}, cv=2) # warning because stratification into two classes and 27 % 2 != 0 assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y) - grid = GridSearchCV(SVC(), param_grid={'C': [1]}, cv=KFold(2)) + grid = GridSearchCV(SVC(gamma='scale'), param_grid={'C': [1]}, cv=KFold(2)) # no warning because no stratification and 54 % 2 == 0 assert_no_warnings(grid.fit, X, y) diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 23d127631ad7d..2929916619769 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -339,10 +339,10 @@ def test_cross_validate_invalid_scoring_param(): # Multiclass Scorers that return multiple values are not supported yet assert_raises_regex(ValueError, "scoring must return a number, got", - cross_validate, SVC(), X, y, + cross_validate, SVC(gamma='scale'), X, y, scoring=multivalued_scorer) assert_raises_regex(ValueError, "scoring must return a number, got", - cross_validate, SVC(), X, y, + cross_validate, SVC(gamma='scale'), X, y, scoring={"foo": multivalued_scorer}) assert_raises_regex(ValueError, "'mse' is not a valid scoring value.", @@ -572,7 +572,7 @@ def test_cross_val_score_precomputed(): assert_array_almost_equal(score_precomputed, score_linear) # test with callable - svm = SVC(kernel=lambda x, y: np.dot(x, y.T)) + svm = SVC(gamma='scale', kernel=lambda x, y: np.dot(x, y.T)) score_callable = cross_val_score(svm, X, y) assert_array_almost_equal(score_precomputed, score_callable) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 03947d2f923e4..51c37097adca2 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -1773,7 +1773,8 @@ def test_cv_pipeline_precomputed(): y_true = np.ones((4,)) K = X.dot(X.T) kcent = KernelCenterer() - pipeline = Pipeline([("kernel_centerer", kcent), ("svr", SVR())]) + pipeline = Pipeline([("kernel_centerer", kcent), ("svr", + SVR(gamma='scale'))]) # did the pipeline set the _pairwise attribute? assert_true(pipeline._pairwise) diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index eb5bb01508953..289064718bf65 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -168,7 +168,32 @@ def fit(self, X, y, sample_weight=None): "boolean masks (use `indices=True` in CV)." % (sample_weight.shape, X.shape)) - if self.gamma == 'auto': + if self.gamma in ('scale', 'auto_deprecated'): + if sparse: + # std = sqrt(E[X^2] - E[X]^2) + X_std = np.sqrt((X.multiply(X)).mean() - (X.mean())**2) + else: + X_std = X.std() + if self.gamma == 'scale': + if X_std != 0: + self._gamma = 1.0 / (X.shape[1] * X_std) + else: + self._gamma = 1.0 + else: + kernel_uses_gamma = (not callable(self.kernel) and self.kernel + not in ('linear', 'precomputed')) + if kernel_uses_gamma and not np.isclose(X_std, 1.0): + # NOTE: when deprecation ends we need to remove explicitly + # setting `gamma` in examples (also in tests). See + # https://github.com/scikit-learn/scikit-learn/pull/10331 + # for the examples/tests that need to be reverted. + warnings.warn("The default value of gamma will change " + "from 'auto' to 'scale' in version 0.22 to " + "account better for unscaled features. Set " + "gamma explicitly to 'auto' or 'scale' to " + "avoid this warning.", FutureWarning) + self._gamma = 1.0 / X.shape[1] + elif self.gamma == 'auto': self._gamma = 1.0 / X.shape[1] else: self._gamma = self.gamma diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py index 7a7955d30d29e..9ed8c4d6a2db1 100644 --- a/sklearn/svm/classes.py +++ b/sklearn/svm/classes.py @@ -446,12 +446,12 @@ class SVC(BaseSVC): Penalty parameter C of the error term. kernel : string, optional (default='rbf') - Specifies the kernel type to be used in the algorithm. - It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or - a callable. - If none is given, 'rbf' will be used. If a callable is given it is - used to pre-compute the kernel matrix from data matrices; that matrix - should be an array of shape ``(n_samples, n_samples)``. + Specifies the kernel type to be used in the algorithm. + It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or + a callable. + If none is given, 'rbf' will be used. If a callable is given it is + used to pre-compute the kernel matrix from data matrices; that matrix + should be an array of shape ``(n_samples, n_samples)``. degree : int, optional (default=3) Degree of the polynomial kernel function ('poly'). @@ -459,7 +459,13 @@ class SVC(BaseSVC): gamma : float, optional (default='auto') Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - If gamma is 'auto' then 1/n_features will be used instead. + + Current default is 'auto' which uses 1 / n_features, + if ``gamma='scale'`` is passed then it uses 1 / (n_features * X.std()) + as value of gamma. The current default of gamma, 'auto', will change + to 'scale' in version 0.22. 'auto_deprecated', a deprecated version of + 'auto' is used as a default indicating that no explicit value of gamma + was passed. coef0 : float, optional (default=0.0) Independent term in kernel function. @@ -550,7 +556,7 @@ class SVC(BaseSVC): >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) >>> y = np.array([1, 1, 2, 2]) >>> from sklearn.svm import SVC - >>> clf = SVC() + >>> clf = SVC(gamma='auto') >>> clf.fit(X, y) #doctest: +NORMALIZE_WHITESPACE SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', @@ -573,7 +579,7 @@ class SVC(BaseSVC): _impl = 'c_svc' - def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='auto', + def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=1e-3, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', @@ -618,7 +624,13 @@ class NuSVC(BaseSVC): gamma : float, optional (default='auto') Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - If gamma is 'auto' then 1/n_features will be used instead. + + Current default is 'auto' which uses 1 / n_features, + if ``gamma='scale'`` is passed then it uses 1 / (n_features * X.std()) + as value of gamma. The current default of gamma, 'auto', will change + to 'scale' in version 0.22. 'auto_deprecated', a deprecated version of + 'auto' is used as a default indicating that no explicit value of gamma + was passed. coef0 : float, optional (default=0.0) Independent term in kernel function. @@ -708,10 +720,10 @@ class NuSVC(BaseSVC): >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) >>> y = np.array([1, 1, 2, 2]) >>> from sklearn.svm import NuSVC - >>> clf = NuSVC() + >>> clf = NuSVC(gamma='scale') >>> clf.fit(X, y) #doctest: +NORMALIZE_WHITESPACE NuSVC(cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', + decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf', max_iter=-1, nu=0.5, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) >>> print(clf.predict([[-0.8, -1]])) @@ -729,9 +741,9 @@ class NuSVC(BaseSVC): _impl = 'nu_svc' - def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='auto', coef0=0.0, - shrinking=True, probability=False, tol=1e-3, cache_size=200, - class_weight=None, verbose=False, max_iter=-1, + def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='auto_deprecated', + coef0=0.0, shrinking=True, probability=False, tol=1e-3, + cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', random_state=None): super(NuSVC, self).__init__( @@ -776,7 +788,13 @@ class SVR(BaseLibSVM, RegressorMixin): gamma : float, optional (default='auto') Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - If gamma is 'auto' then 1/n_features will be used instead. + + Current default is 'auto' which uses 1 / n_features, + if ``gamma='scale'`` is passed then it uses 1 / (n_features * X.std()) + as value of gamma. The current default of gamma, 'auto', will change + to 'scale' in version 0.22. 'auto_deprecated', a deprecated version of + 'auto' is used as a default indicating that no explicit value of gamma + was passed. coef0 : float, optional (default=0.0) Independent term in kernel function. @@ -831,9 +849,9 @@ class SVR(BaseLibSVM, RegressorMixin): >>> np.random.seed(0) >>> y = np.random.randn(n_samples) >>> X = np.random.randn(n_samples, n_features) - >>> clf = SVR(C=1.0, epsilon=0.2) + >>> clf = SVR(gamma='scale', C=1.0, epsilon=0.2) >>> clf.fit(X, y) #doctest: +NORMALIZE_WHITESPACE - SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='auto', + SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='scale', kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) See also @@ -849,8 +867,8 @@ class SVR(BaseLibSVM, RegressorMixin): _impl = 'epsilon_svr' - def __init__(self, kernel='rbf', degree=3, gamma='auto', coef0=0.0, - tol=1e-3, C=1.0, epsilon=0.1, shrinking=True, + def __init__(self, kernel='rbf', degree=3, gamma='auto_deprecated', + coef0=0.0, tol=1e-3, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1): super(SVR, self).__init__( @@ -894,7 +912,13 @@ class NuSVR(BaseLibSVM, RegressorMixin): gamma : float, optional (default='auto') Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - If gamma is 'auto' then 1/n_features will be used instead. + + Current default is 'auto' which uses 1 / n_features, + if ``gamma='scale'`` is passed then it uses 1 / (n_features * X.std()) + as value of gamma. The current default of gamma, 'auto', will change + to 'scale' in version 0.22. 'auto_deprecated', a deprecated version of + 'auto' is used as a default indicating that no explicit value of gamma + was passed. coef0 : float, optional (default=0.0) Independent term in kernel function. @@ -946,9 +970,9 @@ class NuSVR(BaseLibSVM, RegressorMixin): >>> np.random.seed(0) >>> y = np.random.randn(n_samples) >>> X = np.random.randn(n_samples, n_features) - >>> clf = NuSVR(C=1.0, nu=0.1) + >>> clf = NuSVR(gamma='scale', C=1.0, nu=0.1) >>> clf.fit(X, y) #doctest: +NORMALIZE_WHITESPACE - NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='auto', + NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='rbf', max_iter=-1, nu=0.1, shrinking=True, tol=0.001, verbose=False) @@ -965,8 +989,8 @@ class NuSVR(BaseLibSVM, RegressorMixin): _impl = 'nu_svr' def __init__(self, nu=0.5, C=1.0, kernel='rbf', degree=3, - gamma='auto', coef0=0.0, shrinking=True, tol=1e-3, - cache_size=200, verbose=False, max_iter=-1): + gamma='auto_deprecated', coef0=0.0, shrinking=True, + tol=1e-3, cache_size=200, verbose=False, max_iter=-1): super(NuSVR, self).__init__( kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, @@ -1005,7 +1029,13 @@ class OneClassSVM(BaseLibSVM, OutlierMixin): gamma : float, optional (default='auto') Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - If gamma is 'auto' then 1/n_features will be used instead. + + Current default is 'auto' which uses 1 / n_features, + if ``gamma='scale'`` is passed then it uses 1 / (n_features * X.std()) + as value of gamma. The current default of gamma, 'auto', will change + to 'scale' in version 0.22. 'auto_deprecated', a deprecated version of + 'auto' is used as a default indicating that no explicit value of gamma + was passed. coef0 : float, optional (default=0.0) Independent term in kernel function. @@ -1066,8 +1096,8 @@ class OneClassSVM(BaseLibSVM, OutlierMixin): _impl = 'one_class' - def __init__(self, kernel='rbf', degree=3, gamma='auto', coef0=0.0, - tol=1e-3, nu=0.5, shrinking=True, cache_size=200, + def __init__(self, kernel='rbf', degree=3, gamma='auto_deprecated', + coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200, verbose=False, max_iter=-1, random_state=None): super(OneClassSVM, self).__init__( diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index 56ea0b9eaf280..5fa83050a98f1 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -83,10 +83,10 @@ def test_svc(): kernels = ["linear", "poly", "rbf", "sigmoid"] for dataset in datasets: for kernel in kernels: - clf = svm.SVC(kernel=kernel, probability=True, random_state=0, - decision_function_shape='ovo') - sp_clf = svm.SVC(kernel=kernel, probability=True, random_state=0, - decision_function_shape='ovo') + clf = svm.SVC(gamma='scale', kernel=kernel, probability=True, + random_state=0, decision_function_shape='ovo') + sp_clf = svm.SVC(gamma='scale', kernel=kernel, probability=True, + random_state=0, decision_function_shape='ovo') check_svm_model_equal(clf, sp_clf, *dataset) @@ -127,15 +127,16 @@ def test_svc_with_custom_kernel(): def kfunc(x, y): return safe_sparse_dot(x, y.T) clf_lin = svm.SVC(kernel='linear').fit(X_sp, Y) - clf_mylin = svm.SVC(kernel=kfunc).fit(X_sp, Y) + clf_mylin = svm.SVC(gamma='scale', kernel=kfunc).fit(X_sp, Y) assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp)) def test_svc_iris(): # Test the sparse SVC with the iris dataset for k in ('linear', 'poly', 'rbf'): - sp_clf = svm.SVC(kernel=k).fit(iris.data, iris.target) - clf = svm.SVC(kernel=k).fit(iris.data.toarray(), iris.target) + sp_clf = svm.SVC(gamma='scale', kernel=k).fit(iris.data, iris.target) + clf = svm.SVC(gamma='scale', kernel=k).fit(iris.data.toarray(), + iris.target) assert_array_almost_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray()) @@ -175,16 +176,16 @@ def test_sparse_decision_function(): def test_error(): # Test that it gives proper exception on deficient input # impossible value of C - assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y) + assert_raises(ValueError, svm.SVC(gamma='scale', C=-1).fit, X, Y) # impossible value of nu - clf = svm.NuSVC(nu=0.0) + clf = svm.NuSVC(gamma='scale', nu=0.0) assert_raises(ValueError, clf.fit, X_sp, Y) Y2 = Y[:-1] # wrong dimensions for labels assert_raises(ValueError, clf.fit, X_sp, Y2) - clf = svm.SVC() + clf = svm.SVC(gamma="scale") clf.fit(X_sp, Y) assert_array_equal(clf.predict(T), true_result) @@ -241,7 +242,7 @@ def test_weight(): X_ = sparse.csr_matrix(X_) for clf in (linear_model.LogisticRegression(), svm.LinearSVC(random_state=0), - svm.SVC()): + svm.SVC(gamma="scale")): clf.set_params(class_weight={0: 5}) clf.fit(X_[:180], y_[:180]) y_pred = clf.predict(X_[180:]) @@ -250,7 +251,7 @@ def test_weight(): def test_sample_weights(): # Test weights on individual samples - clf = svm.SVC() + clf = svm.SVC(gamma="scale") clf.fit(X_sp, Y) assert_array_equal(clf.predict([X[2]]), [1.]) @@ -276,8 +277,8 @@ def test_sparse_oneclasssvm(): kernels = ["linear", "poly", "rbf", "sigmoid"] for dataset in datasets: for kernel in kernels: - clf = svm.OneClassSVM(kernel=kernel) - sp_clf = svm.OneClassSVM(kernel=kernel) + clf = svm.OneClassSVM(gamma='scale', kernel=kernel) + sp_clf = svm.OneClassSVM(gamma='scale', kernel=kernel) check_svm_model_equal(clf, sp_clf, *dataset) @@ -313,15 +314,15 @@ def test_sparse_realdata(): def test_sparse_svc_clone_with_callable_kernel(): # Test that the "dense_fit" is called even though we use sparse input # meaning that everything works fine. - a = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True, - random_state=0) + a = svm.SVC(gamma='scale', C=1, kernel=lambda x, y: x * y.T, + probability=True, random_state=0) b = base.clone(a) b.fit(X_sp, Y) pred = b.predict(X_sp) b.predict_proba(X_sp) - dense_svm = svm.SVC(C=1, kernel=lambda x, y: np.dot(x, y.T), + dense_svm = svm.SVC(gamma='scale', C=1, kernel=lambda x, y: np.dot(x, y.T), probability=True, random_state=0) pred_dense = dense_svm.fit(X, Y).predict(X) assert_array_equal(pred_dense, pred) @@ -329,17 +330,17 @@ def test_sparse_svc_clone_with_callable_kernel(): def test_timeout(): - sp = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True, - random_state=0, max_iter=1) + sp = svm.SVC(gamma='scale', C=1, kernel=lambda x, y: x * y.T, + probability=True, random_state=0, max_iter=1) assert_warns(ConvergenceWarning, sp.fit, X_sp, Y) def test_consistent_proba(): - a = svm.SVC(probability=True, max_iter=1, random_state=0) + a = svm.SVC(gamma='scale', probability=True, max_iter=1, random_state=0) with ignore_warnings(category=ConvergenceWarning): proba_1 = a.fit(X, Y).predict_proba(X) - a = svm.SVC(probability=True, max_iter=1, random_state=0) + a = svm.SVC(gamma='scale', probability=True, max_iter=1, random_state=0) with ignore_warnings(category=ConvergenceWarning): proba_2 = a.fit(X, Y).predict_proba(X) assert_array_almost_equal(proba_1, proba_2) diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 8bbf662b91e80..d3384ffe6a5e4 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -20,6 +20,7 @@ from sklearn.utils.testing import assert_raises_regexp, assert_warns from sklearn.utils.testing import assert_warns_message, assert_raise_message from sklearn.utils.testing import ignore_warnings, assert_raises +from sklearn.utils.testing import assert_no_warnings from sklearn.exceptions import ConvergenceWarning from sklearn.exceptions import NotFittedError from sklearn.multiclass import OneVsRestClassifier @@ -54,7 +55,7 @@ def test_libsvm_iris(): # shuffle the dataset so that labels are not ordered for k in ('linear', 'rbf'): - clf = svm.SVC(kernel=k).fit(iris.data, iris.target) + clf = svm.SVC(gamma='scale', kernel=k).fit(iris.data, iris.target) assert_greater(np.mean(clf.predict(iris.data) == iris.target), 0.9) assert_true(hasattr(clf, "coef_") == (k == 'linear')) @@ -119,7 +120,7 @@ def test_precomputed(): # matrix. kernel is just a linear kernel kfunc = lambda x, y: np.dot(x, y.T) - clf = svm.SVC(kernel=kfunc) + clf = svm.SVC(gamma='scale', kernel=kfunc) clf.fit(X, Y) pred = clf.predict(T) @@ -151,7 +152,7 @@ def test_precomputed(): pred = clf.predict(K) assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2) - clf = svm.SVC(kernel=kfunc) + clf = svm.SVC(gamma='scale', kernel=kfunc) clf.fit(iris.data, iris.target) assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2) @@ -171,7 +172,7 @@ def test_svr(): # non-regression test; previously, BaseLibSVM would check that # len(np.unique(y)) < 2, which must only be done for SVC - svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data))) + svm.SVR(gamma='scale').fit(diabetes.data, np.ones(len(diabetes.data))) svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data))) @@ -230,22 +231,22 @@ def test_svr_errors(): y = [0.0, 0.5] # Bad kernel - clf = svm.SVR(kernel=lambda x, y: np.array([[1.0]])) + clf = svm.SVR(gamma='scale', kernel=lambda x, y: np.array([[1.0]])) clf.fit(X, y) assert_raises(ValueError, clf.predict, X) def test_oneclass(): # Test OneClassSVM - clf = svm.OneClassSVM() + clf = svm.OneClassSVM(gamma='scale') clf.fit(X) pred = clf.predict(T) assert_array_equal(pred, [-1, -1, -1]) assert_equal(pred.dtype, np.dtype('intp')) - assert_array_almost_equal(clf.intercept_, [-1.008], decimal=3) + assert_array_almost_equal(clf.intercept_, [-1.117], decimal=3) assert_array_almost_equal(clf.dual_coef_, - [[0.632, 0.233, 0.633, 0.234, 0.632, 0.633]], + [[0.681, 0.139, 0.68, 0.14, 0.68, 0.68]], decimal=3) assert_raises(AttributeError, lambda: clf.coef_) @@ -306,8 +307,9 @@ def test_probability(): # Predict probabilities using SVC # This uses cross validation, so we use a slightly bigger testing set. - for clf in (svm.SVC(probability=True, random_state=0, C=1.0), - svm.NuSVC(probability=True, random_state=0)): + for clf in (svm.SVC(gamma='scale', probability=True, random_state=0, + C=1.0), svm.NuSVC(gamma='scale', probability=True, + random_state=0)): clf.fit(iris.data, iris.target) prob_predict = clf.predict_proba(iris.data) @@ -403,7 +405,7 @@ def test_svr_predict(): def test_weight(): # Test class weights - clf = svm.SVC(class_weight={1: 0.1}) + clf = svm.SVC(gamma='scale', class_weight={1: 0.1}) # we give a small weights to class 1 clf.fit(X, Y) # so all predicted values belong to class 2 @@ -413,7 +415,7 @@ def test_weight(): weights=[0.833, 0.167], random_state=2) for clf in (linear_model.LogisticRegression(), - svm.LinearSVC(random_state=0), svm.SVC()): + svm.LinearSVC(random_state=0), svm.SVC(gamma="scale")): clf.set_params(class_weight={0: .1, 1: 10}) clf.fit(X_[:100], y_[:100]) y_pred = clf.predict(X_[100:]) @@ -423,7 +425,7 @@ def test_weight(): def test_sample_weights(): # Test weights on individual samples # TODO: check on NuSVR, OneClass, etc. - clf = svm.SVC() + clf = svm.SVC(gamma="scale") clf.fit(X, Y) assert_array_equal(clf.predict([X[2]]), [1.]) @@ -432,7 +434,7 @@ def test_sample_weights(): assert_array_equal(clf.predict([X[2]]), [2.]) # test that rescaling all samples is the same as changing C - clf = svm.SVC() + clf = svm.SVC(gamma="scale") clf.fit(X, Y) dual_coef_no_weight = clf.dual_coef_ clf.set_params(C=100) @@ -470,17 +472,17 @@ def test_auto_weight(): def test_bad_input(): # Test that it gives proper exception on deficient input # impossible value of C - assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y) + assert_raises(ValueError, svm.SVC(gamma='scale', C=-1).fit, X, Y) # impossible value of nu - clf = svm.NuSVC(nu=0.0) + clf = svm.NuSVC(gamma='scale', nu=0.0) assert_raises(ValueError, clf.fit, X, Y) Y2 = Y[:-1] # wrong dimensions for labels assert_raises(ValueError, clf.fit, X, Y2) # Test with arrays that are non-contiguous. - for clf in (svm.SVC(), svm.LinearSVC(random_state=0)): + for clf in (svm.SVC(gamma="scale"), svm.LinearSVC(random_state=0)): Xf = np.asfortranarray(X) assert_false(Xf.flags['C_CONTIGUOUS']) yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T) @@ -495,18 +497,18 @@ def test_bad_input(): assert_raises(ValueError, clf.fit, X, Y) # sample_weight bad dimensions - clf = svm.SVC() + clf = svm.SVC(gamma="scale") assert_raises(ValueError, clf.fit, X, Y, sample_weight=range(len(X) - 1)) # predict with sparse input when trained with dense - clf = svm.SVC().fit(X, Y) + clf = svm.SVC(gamma="scale").fit(X, Y) assert_raises(ValueError, clf.predict, sparse.lil_matrix(X)) Xt = np.array(X).T clf.fit(np.dot(X, Xt), Y) assert_raises(ValueError, clf.predict, X) - clf = svm.SVC() + clf = svm.SVC(gamma="scale") clf.fit(X, Y) assert_raises(ValueError, clf.predict, Xt) @@ -524,7 +526,7 @@ def test_unicode_kernel(): random_seed=0) # Test default behavior on both versions - clf = svm.SVC(kernel='linear', probability=True) + clf = svm.SVC(gamma='scale', kernel='linear', probability=True) clf.fit(X, Y) clf.predict_proba(T) svm.libsvm.cross_validation(iris.data, @@ -811,7 +813,7 @@ def test_linearsvc_verbose(): def test_svc_clone_with_callable_kernel(): # create SVM with callable linear kernel, check that results are the same # as with built-in linear kernel - svm_callable = svm.SVC(kernel=lambda x, y: np.dot(x, y.T), + svm_callable = svm.SVC(gamma='scale', kernel=lambda x, y: np.dot(x, y.T), probability=True, random_state=0, decision_function_shape='ovr') # clone for checking clonability with lambda functions.. @@ -837,7 +839,7 @@ def test_svc_clone_with_callable_kernel(): def test_svc_bad_kernel(): - svc = svm.SVC(kernel=lambda x, y: x) + svc = svm.SVC(gamma='scale', kernel=lambda x, y: x) assert_raises(ValueError, svc.fit, X, Y) @@ -850,11 +852,11 @@ def test_timeout(): def test_unfitted(): X = "foo!" # input validation not required when SVM not fitted - clf = svm.SVC() + clf = svm.SVC(gamma="scale") assert_raises_regexp(Exception, r".*\bSVC\b.*\bnot\b.*\bfitted\b", clf.predict, X) - clf = svm.NuSVR() + clf = svm.NuSVR(gamma='scale') assert_raises_regexp(Exception, r".*\bNuSVR\b.*\bnot\b.*\bfitted\b", clf.predict, X) @@ -913,12 +915,12 @@ def test_hasattr_predict_proba(): # Method must be (un)available before or after fit, switched by # `probability` param - G = svm.SVC(probability=True) + G = svm.SVC(gamma='scale', probability=True) assert_true(hasattr(G, 'predict_proba')) G.fit(iris.data, iris.target) assert_true(hasattr(G, 'predict_proba')) - G = svm.SVC(probability=False) + G = svm.SVC(gamma='scale', probability=False) assert_false(hasattr(G, 'predict_proba')) G.fit(iris.data, iris.target) assert_false(hasattr(G, 'predict_proba')) @@ -935,7 +937,7 @@ def test_decision_function_shape_two_class(): for n_classes in [2, 3]: X, y = make_blobs(centers=n_classes, random_state=0) for estimator in [svm.SVC, svm.NuSVC]: - clf = OneVsRestClassifier(estimator( + clf = OneVsRestClassifier(estimator(gamma='scale', decision_function_shape="ovr")).fit(X, y) assert_equal(len(clf.predict(X)), len(y)) @@ -980,3 +982,29 @@ def test_ovr_decision_function(): # Test if the first point has lower decision value on every quadrant # compared to the second point assert_true(np.all(pred_class_deci_val[:, 0] < pred_class_deci_val[:, 1])) + + +def test_gamma_auto(): + X, y = [[0.0, 1.2], [1.0, 1.3]], [0, 1] + + msg = ("The default value of gamma will change from 'auto' to 'scale' in " + "version 0.22 to account better for unscaled features. Set gamma " + "explicitly to 'auto' or 'scale' to avoid this warning.") + + assert_warns_message(FutureWarning, msg, + svm.SVC().fit, X, y) + assert_no_warnings(svm.SVC(kernel='linear').fit, X, y) + assert_no_warnings(svm.SVC(kernel='precomputed').fit, X, y) + + +def test_gamma_scale(): + X, y = [[0.], [1.]], [0, 1] + + clf = svm.SVC(gamma='scale') + assert_no_warnings(clf.fit, X, y) + assert_equal(clf._gamma, 2.) + + # X_std ~= 1 shouldn't raise warning, for when + # gamma is not explicitly set. + X, y = [[1, 2], [3, 2 * np.sqrt(6) / 3 + 2]], [0, 1] + assert_no_warnings(clf.fit, X, y) diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py index 4d756bdaa0cf8..6a29a621ff588 100644 --- a/sklearn/tests/test_cross_validation.py +++ b/sklearn/tests/test_cross_validation.py @@ -1036,7 +1036,7 @@ def test_shufflesplit_reproducible(): def test_safe_split_with_precomputed_kernel(): - clf = SVC() + clf = SVC(gamma="scale") clfp = SVC(kernel="precomputed") iris = load_iris() diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index f3c003e8c5be5..7a42757daea89 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -688,7 +688,7 @@ def test_predict_proba_disabled(): # Test predict_proba when disabled on estimator. X = np.arange(20).reshape(5, -1) y = [0, 0, 1, 1, 1] - clf = SVC(probability=False) + clf = SVC(gamma='scale', probability=False) gs = GridSearchCV(clf, {}, cv=2).fit(X, y) assert_false(hasattr(gs, "predict_proba")) diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index 54071844260b3..f89e156bfd55c 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -179,7 +179,8 @@ def test_ovr_fit_predict_sparse(): assert_array_equal(pred, Y_pred_sprs.toarray()) # Test decision_function - clf_sprs = OneVsRestClassifier(svm.SVC()).fit(X_train, sparse(Y_train)) + clf = svm.SVC(gamma="scale") + clf_sprs = OneVsRestClassifier(clf).fit(X_train, sparse(Y_train)) dec_pred = (clf_sprs.decision_function(X_test) > 0).astype(int) assert_array_equal(dec_pred, clf_sprs.predict(X_test).toarray()) @@ -274,7 +275,7 @@ def conduct_test(base_clf, test_predict_proba=False): Ridge(), ElasticNet()): conduct_test(base_clf) - for base_clf in (MultinomialNB(), SVC(probability=True), + for base_clf in (MultinomialNB(), SVC(gamma='scale', probability=True), LogisticRegression()): conduct_test(base_clf, test_predict_proba=True) @@ -298,7 +299,7 @@ def test_ovr_multilabel(): def test_ovr_fit_predict_svc(): - ovr = OneVsRestClassifier(svm.SVC()) + ovr = OneVsRestClassifier(svm.SVC(gamma="scale")) ovr.fit(iris.data, iris.target) assert_equal(len(ovr.estimators_), 3) assert_greater(ovr.score(iris.data, iris.target), .9) @@ -343,18 +344,20 @@ def test_ovr_multilabel_predict_proba(): clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train) # Decision function only estimator. - decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train) + decision_only = OneVsRestClassifier(svm.SVR(gamma='scale') + ).fit(X_train, Y_train) assert_false(hasattr(decision_only, 'predict_proba')) # Estimator with predict_proba disabled, depending on parameters. - decision_only = OneVsRestClassifier(svm.SVC(probability=False)) + decision_only = OneVsRestClassifier(svm.SVC(gamma='scale', + probability=False)) assert_false(hasattr(decision_only, 'predict_proba')) decision_only.fit(X_train, Y_train) assert_false(hasattr(decision_only, 'predict_proba')) assert_true(hasattr(decision_only, 'decision_function')) # Estimator which can get predict_proba enabled after fitting - gs = GridSearchCV(svm.SVC(probability=False), + gs = GridSearchCV(svm.SVC(gamma='scale', probability=False), param_grid={'probability': [True]}) proba_after_fit = OneVsRestClassifier(gs) assert_false(hasattr(proba_after_fit, 'predict_proba')) @@ -378,7 +381,8 @@ def test_ovr_single_label_predict_proba(): clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train) # Decision function only estimator. - decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train) + decision_only = OneVsRestClassifier(svm.SVR(gamma='scale') + ).fit(X_train, Y_train) assert_false(hasattr(decision_only, 'predict_proba')) Y_pred = clf.predict(X_test) @@ -401,7 +405,7 @@ def test_ovr_multilabel_decision_function(): random_state=0) X_train, Y_train = X[:80], Y[:80] X_test = X[80:] - clf = OneVsRestClassifier(svm.SVC()).fit(X_train, Y_train) + clf = OneVsRestClassifier(svm.SVC(gamma="scale")).fit(X_train, Y_train) assert_array_equal((clf.decision_function(X_test) > 0).astype(int), clf.predict(X_test)) @@ -412,7 +416,7 @@ def test_ovr_single_label_decision_function(): random_state=0) X_train, Y_train = X[:80], Y[:80] X_test = X[80:] - clf = OneVsRestClassifier(svm.SVC()).fit(X_train, Y_train) + clf = OneVsRestClassifier(svm.SVC(gamma="scale")).fit(X_train, Y_train) assert_array_equal(clf.decision_function(X_test).ravel() > 0, clf.predict(X_test)) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 95172b103be7a..beb4be61d0b07 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -318,7 +318,7 @@ def test_pipeline_methods_pca_svm(): X = iris.data y = iris.target # Test with PCA + SVC - clf = SVC(probability=True, random_state=0) + clf = SVC(gamma='scale', probability=True, random_state=0) pca = PCA(svd_solver='full', n_components='mle', whiten=True) pipe = Pipeline([('pca', pca), ('svc', clf)]) pipe.fit(X, y) @@ -337,7 +337,8 @@ def test_pipeline_methods_preprocessing_svm(): n_classes = len(np.unique(y)) scaler = StandardScaler() pca = PCA(n_components=2, svd_solver='randomized', whiten=True) - clf = SVC(probability=True, random_state=0, decision_function_shape='ovr') + clf = SVC(gamma='scale', probability=True, random_state=0, + decision_function_shape='ovr') for preprocessing in [scaler, pca]: pipe = Pipeline([('preprocess', preprocessing), ('svc', clf)]) @@ -903,8 +904,8 @@ def test_pipeline_wrong_memory(): y = iris.target # Define memory as an integer memory = 1 - cached_pipe = Pipeline([('transf', DummyTransf()), ('svc', SVC())], - memory=memory) + cached_pipe = Pipeline([('transf', DummyTransf()), + ('svc', SVC())], memory=memory) assert_raises_regex(ValueError, "'memory' should be None, a string or" " have the same interface as " "sklearn.externals.joblib.Memory." @@ -942,7 +943,7 @@ def test_pipeline_memory(): try: memory = Memory(cachedir=cachedir, verbose=10) # Test with Transformer + SVC - clf = SVC(probability=True, random_state=0) + clf = SVC(gamma='scale', probability=True, random_state=0) transf = DummyTransf() pipe = Pipeline([('transf', clone(transf)), ('svc', clf)]) cached_pipe = Pipeline([('transf', transf), ('svc', clf)], @@ -976,7 +977,7 @@ def test_pipeline_memory(): assert_equal(ts, cached_pipe.named_steps['transf'].timestamp_) # Create a new pipeline with cloned estimators # Check that even changing the name step does not affect the cache hit - clf_2 = SVC(probability=True, random_state=0) + clf_2 = SVC(gamma='scale', probability=True, random_state=0) transf_2 = DummyTransf() cached_pipe_2 = Pipeline([('transf_2', transf_2), ('svc', clf_2)], memory=memory) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 95f5841ac80ba..a3a4175d7eff4 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -599,7 +599,7 @@ def test_check_is_fitted(): assert_raises(TypeError, check_is_fitted, "SVR", "support_") ard = ARDRegression() - svr = SVR() + svr = SVR(gamma='scale') try: assert_raises(NotFittedError, check_is_fitted, ard, "coef_")