From 2242b1b2ea715161f87307060104a8a0141e5077 Mon Sep 17 00:00:00 2001 From: John Moeller Date: Thu, 16 Jun 2016 22:49:36 -0600 Subject: [PATCH 1/4] Make KernelCenterer a _pairwise operation Replicate solution to https://github.com/scikit-learn/scikit-learn/commit/9a520779c233dfeff466870c0b7cb04b705e61af except that `_pairwise` should always be `True` for `KernelCenterer` because it's supposed to receive a Gram matrix. This should make `KernelCenterer` usable in `Pipeline`s. Happy to add tests, just tell me what should be covered. --- sklearn/preprocessing/data.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 42957133b654c..74099dc51b153 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -1584,6 +1584,10 @@ def transform(self, K, y=None, copy=True): K += self.K_fit_all_ return K + + @property + def _pairwise(self): + return True def add_dummy_feature(X, value=1.0): From cc9dbac37f804656811f913379a47e1110710a77 Mon Sep 17 00:00:00 2001 From: John Moeller Date: Fri, 17 Jun 2016 17:28:36 -0600 Subject: [PATCH 2/4] Adding test for PR #6900 --- sklearn/preprocessing/tests/test_data.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index b1ef18a8ebc45..9caa1818fcf66 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -52,6 +52,11 @@ from sklearn.preprocessing.data import PolynomialFeatures from sklearn.exceptions import DataConversionWarning +from sklearn.pipeline import Pipeline +from sklearn.cross_validation import cross_val_score +from sklearn.cross_validation import LeaveOneOut +from sklearn.svm import SVR + from sklearn import datasets iris = datasets.load_iris() @@ -1369,6 +1374,23 @@ def test_center_kernel(): K_pred_centered2 = centerer.transform(K_pred) assert_array_almost_equal(K_pred_centered, K_pred_centered2) +def test_cv_pipeline_precomputed(): + """Cross-validate a regression on four coplanar points with the same + value. Use precomputed kernel to ensure Pipeline with KernelCenterer + is treated as a _pairwise operation.""" + X = np.array([[3,0,0],[0,3,0],[0,0,3],[1,1,1]]) + y = np.ones((4,)) + K = X.dot(X.T) + kcent = KernelCenterer() + pipeline = Pipeline([("kernel_centerer", kcent), ("svr", SVR())]) + + # did the pipeline set the _pairwise attribute? + assert_true(pipeline._pairwise) + + # test cross-validation, score should be almost perfect + score = cross_val_score(pipeline,K,y,cv=LeaveOneOut(4)) + assert_array_almost_equal(score, np.ones_like(score)) + def test_fit_transform(): rng = np.random.RandomState(0) From 13f68c93b889fb20730f24ec6c9fa74d4c0e43cf Mon Sep 17 00:00:00 2001 From: John Moeller Date: Fri, 17 Jun 2016 17:44:51 -0600 Subject: [PATCH 3/4] Simplifying imports and test --- sklearn/preprocessing/tests/test_data.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 9caa1818fcf66..e2e080562400d 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -53,8 +53,7 @@ from sklearn.exceptions import DataConversionWarning from sklearn.pipeline import Pipeline -from sklearn.cross_validation import cross_val_score -from sklearn.cross_validation import LeaveOneOut +from sklearn.cross_validation import cross_val_predict from sklearn.svm import SVR from sklearn import datasets @@ -1379,7 +1378,7 @@ def test_cv_pipeline_precomputed(): value. Use precomputed kernel to ensure Pipeline with KernelCenterer is treated as a _pairwise operation.""" X = np.array([[3,0,0],[0,3,0],[0,0,3],[1,1,1]]) - y = np.ones((4,)) + y_true = np.ones((4,)) K = X.dot(X.T) kcent = KernelCenterer() pipeline = Pipeline([("kernel_centerer", kcent), ("svr", SVR())]) @@ -1388,8 +1387,10 @@ def test_cv_pipeline_precomputed(): assert_true(pipeline._pairwise) # test cross-validation, score should be almost perfect - score = cross_val_score(pipeline,K,y,cv=LeaveOneOut(4)) - assert_array_almost_equal(score, np.ones_like(score)) + # NB: this test is pretty vacuous -- it's mainly to test integration + # of Pipeline and KernelCenterer + y_pred = cross_val_predict(pipeline,K,y_true,cv=4) + assert_array_almost_equal(y_true, y_pred) def test_fit_transform(): From 055bc4c0043870fa453e0468fc7625831dc80efd Mon Sep 17 00:00:00 2001 From: John Moeller Date: Wed, 22 Jun 2016 02:10:41 -0600 Subject: [PATCH 4/4] pep8 --- sklearn/preprocessing/data.py | 2 +- sklearn/preprocessing/tests/test_data.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 74099dc51b153..d81c382fa78bd 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -1584,7 +1584,7 @@ def transform(self, K, y=None, copy=True): K += self.K_fit_all_ return K - + @property def _pairwise(self): return True diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index e2e080562400d..f35fc274edc2e 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -1373,11 +1373,12 @@ def test_center_kernel(): K_pred_centered2 = centerer.transform(K_pred) assert_array_almost_equal(K_pred_centered, K_pred_centered2) + def test_cv_pipeline_precomputed(): - """Cross-validate a regression on four coplanar points with the same + """Cross-validate a regression on four coplanar points with the same value. Use precomputed kernel to ensure Pipeline with KernelCenterer is treated as a _pairwise operation.""" - X = np.array([[3,0,0],[0,3,0],[0,0,3],[1,1,1]]) + X = np.array([[3, 0, 0], [0, 3, 0], [0, 0, 3], [1, 1, 1]]) y_true = np.ones((4,)) K = X.dot(X.T) kcent = KernelCenterer() @@ -1389,7 +1390,7 @@ def test_cv_pipeline_precomputed(): # test cross-validation, score should be almost perfect # NB: this test is pretty vacuous -- it's mainly to test integration # of Pipeline and KernelCenterer - y_pred = cross_val_predict(pipeline,K,y_true,cv=4) + y_pred = cross_val_predict(pipeline, K, y_true, cv=4) assert_array_almost_equal(y_true, y_pred)