diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 40076c0b275dd..30c184db592a0 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -67,6 +67,15 @@ Fixed models names out from one step of a pipeline to the next. :pr:`21351` by `Thomas Fan`_. +:mod:`sklearn.svm` +.................. + +- |Fix| :class:`svm.SVC` and :class:`svm.SVR` check for an inconsistency + in its internal representation and raise an error instead of segfaulting. + This fix also resolves + `CVE-2020-28975 `__. + :pr:`21336` by `Thomas Fan`_. + .. _changes_1_0: Version 1.0.0 diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py index 9c992e0d1f1fa..7f0e628aab70c 100644 --- a/sklearn/svm/_base.py +++ b/sklearn/svm/_base.py @@ -616,6 +616,13 @@ def _validate_for_predict(self, X): "the number of samples at training time" % (X.shape[1], self.shape_fit_[0]) ) + # Fixes https://nvd.nist.gov/vuln/detail/CVE-2020-28975 + # Check that _n_support is consistent with support_vectors + sv = self.support_vectors_ + if not self._sparse and sv.size > 0 and self.n_support_.sum() != sv.shape[0]: + raise ValueError( + f"The internal representation of {self.__class__.__name__} was altered" + ) return X @property diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 3933b6ca029e2..eb15aca0096b8 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -1371,3 +1371,16 @@ def string_kernel(X1, X2): else: # regressor assert_allclose(svc1.predict(data), svc2.predict(X)) assert_allclose(svc1.predict(data), svc3.predict(K)) + + +def test_svc_raises_error_internal_representation(): + """Check that SVC raises error when internal representation is altered. + + Non-regression test for #18891 and https://nvd.nist.gov/vuln/detail/CVE-2020-28975 + """ + clf = svm.SVC(kernel="linear").fit(X, Y) + clf._n_support[0] = 1000000 + + msg = "The internal representation of SVC was altered" + with pytest.raises(ValueError, match=msg): + clf.predict(X)