From d9260f0f606ae7b12c0dc9602fcf30b8fb95d119 Mon Sep 17 00:00:00 2001 From: Yannick Schwartz Date: Wed, 30 Oct 2013 14:15:12 +0100 Subject: [PATCH 1/3] Pipeline transform and inverse_transform skip last step if it lacks corresponding methods --- sklearn/pipeline.py | 28 ++++++++++++++++++++++------ sklearn/tests/test_pipeline.py | 9 +++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 2570c378b8ee9..9e208c264b327 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -138,8 +138,10 @@ def fit_transform(self, X, y=None, **fit_params): Xt, fit_params = self._pre_transform(X, y, **fit_params) if hasattr(self.steps[-1][-1], 'fit_transform'): return self.steps[-1][-1].fit_transform(Xt, y, **fit_params) - else: + elif hasattr(self.steps[-1][-1], 'transform'): return self.steps[-1][-1].fit(Xt, y, **fit_params).transform(Xt) + else: + return Xt def predict(self, X): """Applies transforms to the data, and the predict method of the @@ -175,11 +177,21 @@ def predict_log_proba(self, X): return self.steps[-1][-1].predict_log_proba(Xt) def transform(self, X): - """Applies transforms to the data, and the transform method of the - final estimator. Valid only if the final estimator implements - transform.""" + """Applies transforms to the data. + + All the estimators in the pipeline need to implement + a transform, except for the final one that is ignored + in the case it lacks the method. + + Calling transform in that case can be useful to extract + the features before the last step for debugging purposes. + """ Xt = X - for name, transform in self.steps: + # test if the last step implements a transform method + steps = self.steps + if not hasattr(self.steps[-1][-1], 'transform'): + steps = self.steps[:-1] + for name, transform in steps: Xt = transform.transform(Xt) return Xt @@ -187,7 +199,11 @@ def inverse_transform(self, X): if X.ndim == 1: X = X[None, :] Xt = X - for name, step in self.steps[::-1]: + # test the last step implements an inverse_transform method + inverse_steps = self.steps[::-1] + if not hasattr(self.steps[-1][-1], 'inverse_transform'): + inverse_steps = self.steps[:-1][::-1] + for name, step in inverse_steps: Xt = step.inverse_transform(Xt) return Xt diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index c3b2fbc170b70..9b790edec6161 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -235,19 +235,28 @@ def test_pipeline_transform(): # Also test pipeline.transform and pipeline.inverse_transform iris = load_iris() X = iris.data + y = iris.target pca = PCA(n_components=2) + clf = SVC() pipeline = Pipeline([('pca', pca)]) + pipeline2 = Pipeline([('pca', pca), ('clf', clf)]) # test transform and fit_transform: X_trans = pipeline.fit(X).transform(X) X_trans2 = pipeline.fit_transform(X) X_trans3 = pca.fit_transform(X) + X_trans4 = pipeline2.fit(X, y).transform(X) + X_trans5 = pipeline2.fit_transform(X, y) assert_array_almost_equal(X_trans, X_trans2) assert_array_almost_equal(X_trans, X_trans3) + assert_array_almost_equal(X_trans, X_trans4) + assert_array_almost_equal(X_trans, X_trans5) X_back = pipeline.inverse_transform(X_trans) X_back2 = pca.inverse_transform(X_trans) + X_back3 = pipeline2.inverse_transform(X_trans) assert_array_almost_equal(X_back, X_back2) + assert_array_almost_equal(X_back, X_back3) def test_pipeline_fit_transform(): From 04b0144b770f7795e52e7a2ff8125c9bee2b54ad Mon Sep 17 00:00:00 2001 From: Yannick Schwartz Date: Thu, 31 Oct 2013 15:16:27 +0100 Subject: [PATCH 2/3] Add docstring for new pipeline inverse_transform --- sklearn/pipeline.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 9e208c264b327..e664a359d7ee9 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -196,6 +196,16 @@ def transform(self, X): return Xt def inverse_transform(self, X): + """Applies inverse transforms to the data. + + All the estimators in the pipeline need to implement + an inverse transform, except for the final one that + is ignored in the case it lacks the method. + + As an example, calling inverse transform can be useful to + map back to the original space the `coef_` attribute + from a linear classifier. + """ if X.ndim == 1: X = X[None, :] Xt = X From 19069890c6e95e6bb5335d8bb64d4cd969848591 Mon Sep 17 00:00:00 2001 From: Yannick Schwartz Date: Thu, 31 Oct 2013 15:21:54 +0100 Subject: [PATCH 3/3] Updated the whats_new --- doc/whats_new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 6d298882f1b17..e51cd0971197e 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -60,6 +60,10 @@ Changelog - Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust fitting of regression models. By Johannes Schönberger. + - Extended transform and inverse_transform methods from + :class:`pipeline.Pipeline` to ignore the last step if it does not + implement the corresponding methods, by `Yannick Schwartz`_. + API changes summary -------------------