From a308cdb954a28204e23879ff17ee1c67221ed4cc Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 9 May 2013 01:19:18 +1000
Subject: [PATCH 1/5] FIX make Pipeline methods properties as per #1805

---
 sklearn/pipeline.py            | 242 +++++++++++++++++++++++++--------
 sklearn/tests/test_pipeline.py |  22 +++
 2 files changed, 207 insertions(+), 57 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index e91faba0d61f4..5423e04359284 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -110,6 +110,19 @@ def get_params(self, deep=True):
                     out['%s__%s' % (name, key)] = value
             return out
 
+    @property
+    def named_steps(self):
+        return dict(self.steps)
+
+    @property
+    def _transforms(self):
+        """Non-final estimators in (name, est) tuples."""
+        return self.steps[:-1]
+
+    @property
+    def _final_estimator(self):
+        return self.steps[-1][1]
+
     # Estimator interface
 
     def _pre_transform(self, X, y=None, **fit_params):
@@ -118,7 +131,9 @@ def _pre_transform(self, X, y=None, **fit_params):
             step, param = pname.split('__', 1)
             fit_params_steps[step][param] = pval
         Xt = X
-        for name, transform in self.steps[:-1]:
+        for name, transform in self._transforms:
+            if transform is None:
+                continue
             if hasattr(transform, "fit_transform"):
                 Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
             else:
@@ -129,79 +144,192 @@ def _pre_transform(self, X, y=None, **fit_params):
     def fit(self, X, y=None, **fit_params):
         """Fit all the transforms one after the other and transform the
         data, then fit the transformed data using the final estimator.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Training data, where n_samples in the number of samples and
+            n_features is the number of features.
+        y : array-like, shape = [n_samples], optional
+            Target vector relative to X for classification;
+            None for unsupervised learning.
+        fit_params : dict of string -> object
+            Parameters passed to the `fit` method of each step, where
+            each parameter name is prefixed such that parameter ``p`` for step
+            ``s`` has key ``s__p``.
         """
         Xt, fit_params = self._pre_transform(X, y, **fit_params)
-        self.steps[-1][-1].fit(Xt, y, **fit_params)
+        self._final_estimator.fit(Xt, y, **fit_params)
         return self
 
-    def fit_transform(self, X, y=None, **fit_params):
-        """Fit all the transforms one after the other and transform the
+    @property
+    def fit_transform(self):
+        """Pipeline.fit_transform(X, y=None, **fit_params)
+
+        Fit all the transforms one after the other and transform the
         data, then use fit_transform on transformed data using the final
-        estimator."""
-        Xt, fit_params = self._pre_transform(X, y, **fit_params)
-        if hasattr(self.steps[-1][-1], 'fit_transform'):
-            return self.steps[-1][-1].fit_transform(Xt, y, **fit_params)
-        else:
-            return self.steps[-1][-1].fit(Xt, y, **fit_params).transform(Xt)
+        estimator.
 
-    def predict(self, X):
-        """Applies transforms to the data, and the predict method of the
-        final estimator. Valid only if the final estimator implements
-        predict."""
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Training data, where n_samples in the number of samples and
+            n_features is the number of features.
+        y : array-like, shape = [n_samples], optional
+            Target vector relative to X for classification;
+            None for unsupervised learning.
+        fit_params : dict of string -> object
+            Parameters passed to the `fit` method of each step, where
+            each parameter name is prefixed such that parameter ``p`` for step
+            ``s`` has key ``s__p``.
+        """
+        last_step = self._final_estimator
+        if (
+                not hasattr(last_step, 'fit_transform')
+                and not hasattr(last_step, 'transform')):
+            raise AttributeError(
+                'last step has neither `transform` nor `fit_transform`')
+
+        def fn(X, y=None, **fit_params):
+            Xt, fit_params = self._pre_transform(X, y, **fit_params)
+            if hasattr(last_step, 'fit_transform'):
+                return last_step.fit_transform(Xt, y, **fit_params)
+            else:
+                return last_step.fit(Xt, y, **fit_params).transform(Xt)
+        return fn
+
+    def _run_pipeline(self, est_fn, X, *args, **kwargs):
         Xt = X
-        for name, transform in self.steps[:-1]:
-            Xt = transform.transform(Xt)
-        return self.steps[-1][-1].predict(Xt)
+        for name, transform in self._transforms:
+            if transform is not None:
+                Xt = transform.transform(Xt)
+        return est_fn(Xt, *args, **kwargs)
+
+    @property
+    def predict(self):
+        """Pipeline.predict(X)
 
-    def predict_proba(self, X):
-        """Applies transforms to the data, and the predict_proba method of the
+        Applies transforms to the data, and the `predict` method of the
         final estimator. Valid only if the final estimator implements
-        predict_proba."""
-        Xt = X
-        for name, transform in self.steps[:-1]:
-            Xt = transform.transform(Xt)
-        return self.steps[-1][-1].predict_proba(Xt)
+        predict.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Data samples, where n_samples in the number of samples and
+            n_features is the number of features.
+        """
+        return partial(self._run_pipeline, self._final_estimator.predict)
+
+    @property
+    def predict_proba(self):
+        """Pipeline.predict_proba(X)
 
-    def decision_function(self, X):
-        """Applies transforms to the data, and the decision_function method of
+        Applies transforms to the data, and the `predict_proba` method of
         the final estimator. Valid only if the final estimator implements
-        decision_function."""
-        Xt = X
-        for name, transform in self.steps[:-1]:
-            Xt = transform.transform(Xt)
-        return self.steps[-1][-1].decision_function(Xt)
+        predict_proba.
 
-    def predict_log_proba(self, X):
-        Xt = X
-        for name, transform in self.steps[:-1]:
-            Xt = transform.transform(Xt)
-        return self.steps[-1][-1].predict_log_proba(Xt)
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Data samples, where n_samples in the number of samples and
+            n_features is the number of features.
+        """
+        return partial(self._run_pipeline, self._final_estimator.predict_proba)
 
-    def transform(self, X):
-        """Applies transforms to the data, and the transform method of the
+    @property
+    def predict_log_proba(self):
+        """Pipeline.predict_log_proba(X)
+
+        Applies transforms to the data, and the `predict_log_proba` method
+        of the final estimator. Valid only if the final estimator implements
+        predict_log_proba.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Data samples, where n_samples in the number of samples and
+            n_features is the number of features.
+        """
+        return partial(self._run_pipeline,
+            self._final_estimator.predict_log_proba)
+
+    @property
+    def decision_function(self):
+        """Pipeline.decision_function(X)
+
+        Applies transforms to the data, and the `decision_function` method
+        of the final estimator. Valid only if the final estimator implements
+        decision_function.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Data samples, where n_samples in the number of samples and
+            n_features is the number of features.
+        """
+        return partial(self._run_pipeline,
+            self._final_estimator.decision_function)
+
+    @property
+    def transform(self):
+        """Pipeline.transform(X)
+
+        Applies transforms to the data, and the `transform` method of the
         final estimator. Valid only if the final estimator implements
-        transform."""
-        Xt = X
-        for name, transform in self.steps:
-            Xt = transform.transform(Xt)
-        return Xt
+        transform.
 
-    def inverse_transform(self, X):
-        if X.ndim == 1:
-            X = X[None, :]
-        Xt = X
-        for name, step in self.steps[::-1]:
-            Xt = step.inverse_transform(Xt)
-        return Xt
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Data samples, where n_samples in the number of samples and
+            n_features is the number of features.
+        """
+        return partial(self._run_pipeline, self._final_estimator.transform)
+
+    @property
+    def inverse_transform(self):
+        """Pipeline.inverse_transform(X)
 
-    def score(self, X, y=None):
-        """Applies transforms to the data, and the score method of the
+        Applies inverse transforms to the data from the last step to the
+        first.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Data samples, where n_samples in the number of samples and
+            n_features is the number of features.
+        """
+        inverse_transforms = [step.inverse_transform
+            for name, step in self.steps[::-1] if step is not None]
+
+        def fn(X):
+            if X.ndim == 1:
+                X = X[None, :]
+            Xt = X
+            for inv_transform in inverse_transforms:
+                Xt = inv_transform(Xt)
+            return Xt
+        return fn
+
+    @property
+    def score(self):
+        """Pipeline.score(X, y=None)
+
+        Applies transforms to the data, and the `score` method of the
         final estimator. Valid only if the final estimator implements
-        score."""
-        Xt = X
-        for name, transform in self.steps[:-1]:
-            Xt = transform.transform(Xt)
-        return self.steps[-1][-1].score(Xt, y)
+        score.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Data samples, where n_samples in the number of samples and
+            n_features is the number of features.
+        y : array-like, shape = [n_samples], optional
+            Target vector relative to X;
+            None for unsupervised learning.
+        """
+        return partial(self._run_pipeline, self._final_estimator.score)
 
     @property
     def _pairwise(self):
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 87bb7b813cae8..3953ef642bbb4 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -289,6 +289,28 @@ def test_make_pipeline():
     assert_equal(pipe.steps[2][0], "fitparamt")
 
 
+def test_pipeline_attributes():
+    """Ensure that the Pipeline only provides post-fit methods that are present
+    on the last step"""
+
+    def make(method):
+        """Make a pipeline whose estimator has specified method"""
+        transf = TransfT()
+        setattr(transf, method, lambda *args, **kwargs: True)
+        return Pipeline([('est', transf)]).fit([[1]], [1])
+
+    attribs = ['predict_proba', 'predict_log_proba', 'predict',
+               'decision_function', 'score', 'inverse_transform']
+
+    for attrib in attribs:
+        pipeline = make(attrib)
+        getattr(pipeline, attrib)(np.asarray([[1]]))
+        for attrib2 in attribs:
+            if attrib2 != attrib:
+                assert_false(hasattr(pipeline, attrib2))
+>>>>>>> FIX make Pipeline methods properties as per #1805
+
+
 def test_feature_union_weights():
     # test feature union with transformer weights
     iris = load_iris()

From ece02bbd885ca9ac7289dd84a02767efd00cde14 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 30 May 2013 11:14:13 +1000
Subject: [PATCH 2/5] TST test delegated ducktyping of metaestimators

---
 sklearn/pipeline.py                  |   4 +-
 sklearn/tests/test_metaestimators.py | 100 +++++++
 sklearn/tests/test_pipeline.py       | 410 +++++++++++++--------------
 3 files changed, 297 insertions(+), 217 deletions(-)
 create mode 100644 sklearn/tests/test_metaestimators.py

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 5423e04359284..e361f4ce2af3e 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -11,6 +11,8 @@
 
 from collections import defaultdict
 
+from functools import partial
+
 import numpy as np
 from scipy import sparse
 
@@ -78,7 +80,7 @@ class Pipeline(BaseEstimator):
     # BaseEstimator interface
 
     def __init__(self, steps):
-        self.named_steps = dict(steps)
+        self.steps = steps
         names, estimators = zip(*steps)
         if len(self.named_steps) != len(steps):
             raise ValueError("Names provided are not unique: %s" % (names,))
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
new file mode 100644
index 0000000000000..5da01390d1a00
--- /dev/null
+++ b/sklearn/tests/test_metaestimators.py
@@ -0,0 +1,100 @@
+"""Common tests for metaestimators"""
+
+import functools
+
+import numpy as np
+
+from sklearn.base import BaseEstimator
+from sklearn.externals.six import iterkeys
+from sklearn.datasets import make_classification
+from sklearn.utils.testing import assert_true, assert_false
+from sklearn.pipeline import Pipeline
+
+
+class DelegatorData(object):
+    def __init__(self, name, construct, skip_methods=(),
+                 fit_args=make_classification()):
+        self.name = name
+        self.construct = construct
+        self.fit_args = fit_args
+        self.skip_methods = skip_methods
+
+
+DELEGATING_METAESTIMATORS = {
+    DelegatorData('pipeline', lambda est: Pipeline([('est', est)])),
+    DelegatorData('pipeline', lambda est: Pipeline([('est', est)])),
+}
+
+
+def test_metaestimator_delegation():
+    def hides(method):
+        @property
+        def wrapper(obj):
+            if obj.hidden_method == method.__name__:
+                raise AttributeError
+            return functools.partial(method, obj)
+        return wrapper
+
+    class SubEstimator(BaseEstimator):
+        def __init__(self, param=1, hidden_method=None):
+            self.param = param
+            self.hidden_method = hidden_method
+        
+        def fit(self, X, y=None, *args, **kwargs):
+            return True
+        
+        @hides
+        def inverse_transform(self, X, *args, **kwargs):
+            return X
+
+        @hides
+        def transform(self, X, *args, **kwargs):
+            return X
+
+        @hides
+        def predict(self, X, *args, **kwargs):
+            return np.ones(X.shape[0])
+
+        @hides
+        def predict_proba(self, X, *args, **kwargs):
+            return np.ones(X.shape[0])
+
+        @hides
+        def predict_log_proba(self, X, *args, **kwargs):
+            return np.ones(X.shape[0])
+
+        @hides
+        def decision_function(self, X, *args, **kwargs):
+            return np.ones(X.shape[0])
+
+        @hides
+        def score(self, X, *args, **kwargs):
+            return 1.0
+
+
+    methods = [k for k in iterkeys(SubEstimator.__dict__)
+               if not k.startswith('_') and not k.startswith('fit')]
+
+    for delegator_data in DELEGATING_METAESTIMATORS:
+        delegate = SubEstimator()
+        delegator = delegator_data.construct(delegate)
+        delegator.fit(*delegator_data.fit_args)
+        for method in methods:
+            if method in delegator_data.skip_methods:
+                continue
+            assert_true(hasattr(delegate, method))
+            assert_true(hasattr(delegator, method),
+                        msg="%s does not have method %r when its delegate does"
+                            % (delegator_data.name, method))
+        
+        for method in methods:
+            if method in delegator_data.skip_methods:
+                continue
+            delegate = SubEstimator(hidden_method=method)
+            delegator = delegator_data.construct(delegate)
+            delegator.fit(*delegator_data.fit_args)
+            assert_false(hasattr(delegate, method))
+            assert_false(hasattr(delegator, method),
+                        msg="%s has method %r when its delegate does not"
+                            % (delegator_data.name, method))
+        
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 3953ef642bbb4..aa4ab41e8582a 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -23,254 +23,254 @@
 
 
 JUNK_FOOD_DOCS = (
-    "the pizza pizza beer copyright",
-    "the pizza burger beer copyright",
-    "the the pizza beer beer copyright",
-    "the burger beer beer copyright",
-    "the coke burger coke copyright",
-    "the coke burger burger",
+"the pizza pizza beer copyright",
+"the pizza burger beer copyright",
+"the the pizza beer beer copyright",
+"the burger beer beer copyright",
+"the coke burger coke copyright",
+"the coke burger burger",
 )
 
 
 class IncorrectT(BaseEstimator):
-    """Small class to test parameter dispatching.
-    """
+"""Small class to test parameter dispatching.
+"""
 
-    def __init__(self, a=None, b=None):
-        self.a = a
-        self.b = b
+def __init__(self, a=None, b=None):
+    self.a = a
+    self.b = b
 
 
 class T(IncorrectT):
 
-    def fit(self, X, y):
-        return self
+def fit(self, X, y):
+    return self
 
 
 class TransfT(T):
 
-    def transform(self, X, y=None):
-        return X
+def transform(self, X, y=None):
+    return X
 
 
 class FitParamT(BaseEstimator):
-    """Mock classifier
-    """
+"""Mock classifier
+"""
 
-    def __init__(self):
-        self.successful = False
-        pass
+def __init__(self):
+    self.successful = False
+    pass
 
-    def fit(self, X, y, should_succeed=False):
-        self.successful = should_succeed
+def fit(self, X, y, should_succeed=False):
+    self.successful = should_succeed
 
-    def predict(self, X):
-        return self.successful
+def predict(self, X):
+    return self.successful
 
 
 def test_pipeline_init():
-    """ Test the various init parameters of the pipeline.
-    """
-    assert_raises(TypeError, Pipeline)
-    # Check that we can't instantiate pipelines with objects without fit
-    # method
-    pipe = assert_raises(TypeError, Pipeline, [('svc', IncorrectT)])
-    # Smoke test with only an estimator
-    clf = T()
-    pipe = Pipeline([('svc', clf)])
-    assert_equal(pipe.get_params(deep=True),
-                 dict(svc__a=None, svc__b=None, svc=clf))
-
-    # Check that params are set
-    pipe.set_params(svc__a=0.1)
-    assert_equal(clf.a, 0.1)
-    # Smoke test the repr:
-    repr(pipe)
-
-    # Test with two objects
-    clf = SVC()
-    filter1 = SelectKBest(f_classif)
-    pipe = Pipeline([('anova', filter1), ('svc', clf)])
-
-    # Check that we can't use the same stage name twice
-    assert_raises(ValueError, Pipeline, [('svc', SVC()), ('svc', SVC())])
-
-    # Check that params are set
-    pipe.set_params(svc__C=0.1)
-    assert_equal(clf.C, 0.1)
-    # Smoke test the repr:
-    repr(pipe)
-
-    # Check that params are not set when naming them wrong
-    assert_raises(ValueError, pipe.set_params, anova__C=0.1)
-
-    # Test clone
-    pipe2 = clone(pipe)
-    assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])
-
-    # Check that apart from estimators, the parameters are the same
-    params = pipe.get_params()
-    params2 = pipe2.get_params()
-    # Remove estimators that where copied
-    params.pop('svc')
-    params.pop('anova')
-    params2.pop('svc')
-    params2.pop('anova')
-    assert_equal(params, params2)
+""" Test the various init parameters of the pipeline.
+"""
+assert_raises(TypeError, Pipeline)
+# Check that we can't instantiate pipelines with objects without fit
+# method
+pipe = assert_raises(TypeError, Pipeline, [('svc', IncorrectT)])
+# Smoke test with only an estimator
+clf = T()
+pipe = Pipeline([('svc', clf)])
+assert_equal(pipe.get_params(deep=True),
+             dict(svc__a=None, svc__b=None, svc=clf))
+
+# Check that params are set
+pipe.set_params(svc__a=0.1)
+assert_equal(clf.a, 0.1)
+# Smoke test the repr:
+repr(pipe)
+
+# Test with two objects
+clf = SVC()
+filter1 = SelectKBest(f_classif)
+pipe = Pipeline([('anova', filter1), ('svc', clf)])
+
+# Check that we can't use the same stage name twice
+assert_raises(ValueError, Pipeline, [('svc', SVC()), ('svc', SVC())])
+
+# Check that params are set
+pipe.set_params(svc__C=0.1)
+assert_equal(clf.C, 0.1)
+# Smoke test the repr:
+repr(pipe)
+
+# Check that params are not set when naming them wrong
+assert_raises(ValueError, pipe.set_params, anova__C=0.1)
+
+# Test clone
+pipe2 = clone(pipe)
+assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])
+
+# Check that apart from estimators, the parameters are the same
+params = pipe.get_params()
+params2 = pipe2.get_params()
+# Remove estimators that where copied
+params.pop('svc')
+params.pop('anova')
+params2.pop('svc')
+params2.pop('anova')
+assert_equal(params, params2)
 
 
 def test_pipeline_methods_anova():
-    """ Test the various methods of the pipeline (anova).
-    """
-    iris = load_iris()
-    X = iris.data
-    y = iris.target
-    # Test with Anova + LogisticRegression
-    clf = LogisticRegression()
-    filter1 = SelectKBest(f_classif, k=2)
-    pipe = Pipeline([('anova', filter1), ('logistic', clf)])
-    pipe.fit(X, y)
-    pipe.predict(X)
-    pipe.predict_proba(X)
-    pipe.predict_log_proba(X)
-    pipe.score(X, y)
+""" Test the various methods of the pipeline (anova).
+"""
+iris = load_iris()
+X = iris.data
+y = iris.target
+# Test with Anova + LogisticRegression
+clf = LogisticRegression()
+filter1 = SelectKBest(f_classif, k=2)
+pipe = Pipeline([('anova', filter1), ('logistic', clf)])
+pipe.fit(X, y)
+pipe.predict(X)
+pipe.predict_proba(X)
+pipe.predict_log_proba(X)
+pipe.score(X, y)
 
 
 def test_pipeline_fit_params():
-    """Test that the pipeline can take fit parameters
-    """
-    pipe = Pipeline([('transf', TransfT()), ('clf', FitParamT())])
-    pipe.fit(X=None, y=None, clf__should_succeed=True)
-    # classifier should return True
-    assert_true(pipe.predict(None))
-    # and transformer params should not be changed
-    assert_true(pipe.named_steps['transf'].a is None)
-    assert_true(pipe.named_steps['transf'].b is None)
+"""Test that the pipeline can take fit parameters
+"""
+pipe = Pipeline([('transf', TransfT()), ('clf', FitParamT())])
+pipe.fit(X=None, y=None, clf__should_succeed=True)
+# classifier should return True
+assert_true(pipe.predict(None))
+# and transformer params should not be changed
+assert_true(pipe.named_steps['transf'].a is None)
+assert_true(pipe.named_steps['transf'].b is None)
 
 
 def test_pipeline_methods_pca_svm():
-    """Test the various methods of the pipeline (pca + svm)."""
-    iris = load_iris()
-    X = iris.data
-    y = iris.target
-    # Test with PCA + SVC
-    clf = SVC(probability=True, random_state=0)
-    pca = PCA(n_components='mle', whiten=True)
-    pipe = Pipeline([('pca', pca), ('svc', clf)])
-    pipe.fit(X, y)
-    pipe.predict(X)
-    pipe.predict_proba(X)
-    pipe.predict_log_proba(X)
-    pipe.score(X, y)
+"""Test the various methods of the pipeline (pca + svm)."""
+iris = load_iris()
+X = iris.data
+y = iris.target
+# Test with PCA + SVC
+clf = SVC(probability=True, random_state=0)
+pca = PCA(n_components='mle', whiten=True)
+pipe = Pipeline([('pca', pca), ('svc', clf)])
+pipe.fit(X, y)
+pipe.predict(X)
+pipe.predict_proba(X)
+pipe.predict_log_proba(X)
+pipe.score(X, y)
 
 
 def test_pipeline_methods_preprocessing_svm():
-    """Test the various methods of the pipeline (preprocessing + svm)."""
-    iris = load_iris()
-    X = iris.data
-    y = iris.target
-    n_samples = X.shape[0]
-    n_classes = len(np.unique(y))
-    scaler = StandardScaler()
-    pca = RandomizedPCA(n_components=2, whiten=True)
-    clf = SVC(probability=True, random_state=0)
-
-    for preprocessing in [scaler, pca]:
-        pipe = Pipeline([('preprocess', preprocessing), ('svc', clf)])
-        pipe.fit(X, y)
+"""Test the various methods of the pipeline (preprocessing + svm)."""
+iris = load_iris()
+X = iris.data
+y = iris.target
+n_samples = X.shape[0]
+n_classes = len(np.unique(y))
+scaler = StandardScaler()
+pca = RandomizedPCA(n_components=2, whiten=True)
+clf = SVC(probability=True, random_state=0)
+
+for preprocessing in [scaler, pca]:
+    pipe = Pipeline([('preprocess', preprocessing), ('svc', clf)])
+    pipe.fit(X, y)
 
-        # check shapes of various prediction functions
-        predict = pipe.predict(X)
-        assert_equal(predict.shape, (n_samples,))
+    # check shapes of various prediction functions
+    predict = pipe.predict(X)
+    assert_equal(predict.shape, (n_samples,))
 
-        proba = pipe.predict_proba(X)
-        assert_equal(proba.shape, (n_samples, n_classes))
+    proba = pipe.predict_proba(X)
+    assert_equal(proba.shape, (n_samples, n_classes))
 
-        log_proba = pipe.predict_log_proba(X)
-        assert_equal(log_proba.shape, (n_samples, n_classes))
+    log_proba = pipe.predict_log_proba(X)
+    assert_equal(log_proba.shape, (n_samples, n_classes))
 
-        decision_function = pipe.decision_function(X)
-        assert_equal(decision_function.shape, (n_samples, n_classes))
+    decision_function = pipe.decision_function(X)
+    assert_equal(decision_function.shape, (n_samples, n_classes))
 
-        pipe.score(X, y)
+    pipe.score(X, y)
 
 
 def test_feature_union():
-    # basic sanity check for feature union
-    iris = load_iris()
-    X = iris.data
-    X -= X.mean(axis=0)
-    y = iris.target
-    svd = TruncatedSVD(n_components=2, random_state=0)
-    select = SelectKBest(k=1)
-    fs = FeatureUnion([("svd", svd), ("select", select)])
-    fs.fit(X, y)
-    X_transformed = fs.transform(X)
-    assert_equal(X_transformed.shape, (X.shape[0], 3))
-
-    # check if it does the expected thing
-    assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
-    assert_array_equal(X_transformed[:, -1],
-                       select.fit_transform(X, y).ravel())
-
-    # test if it also works for sparse input
-    # We use a different svd object to control the random_state stream
-    fs = FeatureUnion([("svd", svd), ("select", select)])
-    X_sp = sparse.csr_matrix(X)
-    X_sp_transformed = fs.fit_transform(X_sp, y)
-    assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
-
-    # test setting parameters
-    fs.set_params(select__k=2)
-    assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
-
-    # test it works with transformers missing fit_transform
-    fs = FeatureUnion([("mock", TransfT()), ("svd", svd), ("select", select)])
-    X_transformed = fs.fit_transform(X, y)
-    assert_equal(X_transformed.shape, (X.shape[0], 8))
+# basic sanity check for feature union
+iris = load_iris()
+X = iris.data
+X -= X.mean(axis=0)
+y = iris.target
+svd = TruncatedSVD(n_components=2, random_state=0)
+select = SelectKBest(k=1)
+fs = FeatureUnion([("svd", svd), ("select", select)])
+fs.fit(X, y)
+X_transformed = fs.transform(X)
+assert_equal(X_transformed.shape, (X.shape[0], 3))
+
+# check if it does the expected thing
+assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
+assert_array_equal(X_transformed[:, -1],
+                   select.fit_transform(X, y).ravel())
+
+# test if it also works for sparse input
+# We use a different svd object to control the random_state stream
+fs = FeatureUnion([("svd", svd), ("select", select)])
+X_sp = sparse.csr_matrix(X)
+X_sp_transformed = fs.fit_transform(X_sp, y)
+assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
+
+# test setting parameters
+fs.set_params(select__k=2)
+assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
+
+# test it works with transformers missing fit_transform
+fs = FeatureUnion([("mock", TransfT()), ("svd", svd), ("select", select)])
+X_transformed = fs.fit_transform(X, y)
+assert_equal(X_transformed.shape, (X.shape[0], 8))
 
 
 def test_make_union():
-    pca = PCA()
-    mock = TransfT()
-    fu = make_union(pca, mock)
-    names, transformers = zip(*fu.transformer_list)
-    assert_equal(names, ("pca", "transft"))
-    assert_equal(transformers, (pca, mock))
+pca = PCA()
+mock = TransfT()
+fu = make_union(pca, mock)
+names, transformers = zip(*fu.transformer_list)
+assert_equal(names, ("pca", "transft"))
+assert_equal(transformers, (pca, mock))
 
 
 def test_pipeline_transform():
-    # Test whether pipeline works with a transformer at the end.
-    # Also test pipeline.transform and pipeline.inverse_transform
-    iris = load_iris()
-    X = iris.data
-    pca = PCA(n_components=2)
-    pipeline = Pipeline([('pca', pca)])
+# Test whether pipeline works with a transformer at the end.
+# Also test pipeline.transform and pipeline.inverse_transform
+iris = load_iris()
+X = iris.data
+pca = PCA(n_components=2)
+pipeline = Pipeline([('pca', pca)])
 
-    # test transform and fit_transform:
-    X_trans = pipeline.fit(X).transform(X)
-    X_trans2 = pipeline.fit_transform(X)
-    X_trans3 = pca.fit_transform(X)
-    assert_array_almost_equal(X_trans, X_trans2)
-    assert_array_almost_equal(X_trans, X_trans3)
+# test transform and fit_transform:
+X_trans = pipeline.fit(X).transform(X)
+X_trans2 = pipeline.fit_transform(X)
+X_trans3 = pca.fit_transform(X)
+assert_array_almost_equal(X_trans, X_trans2)
+assert_array_almost_equal(X_trans, X_trans3)
 
-    X_back = pipeline.inverse_transform(X_trans)
-    X_back2 = pca.inverse_transform(X_trans)
-    assert_array_almost_equal(X_back, X_back2)
+X_back = pipeline.inverse_transform(X_trans)
+X_back2 = pca.inverse_transform(X_trans)
+assert_array_almost_equal(X_back, X_back2)
 
 
 def test_pipeline_fit_transform():
-    # Test whether pipeline works with a transformer missing fit_transform
-    iris = load_iris()
-    X = iris.data
-    y = iris.target
-    transft = TransfT()
-    pipeline = Pipeline([('mock', transft)])
+# Test whether pipeline works with a transformer missing fit_transform
+iris = load_iris()
+X = iris.data
+y = iris.target
+transft = TransfT()
+pipeline = Pipeline([('mock', transft)])
 
-    # test fit_transform:
-    X_trans = pipeline.fit_transform(X, y)
-    X_trans2 = transft.fit(X, y).transform(X)
-    assert_array_almost_equal(X_trans, X_trans2)
+# test fit_transform:
+X_trans = pipeline.fit_transform(X, y)
+X_trans2 = transft.fit(X, y).transform(X)
+assert_array_almost_equal(X_trans, X_trans2)
 
 
 def test_make_pipeline():
@@ -289,28 +289,6 @@ def test_make_pipeline():
     assert_equal(pipe.steps[2][0], "fitparamt")
 
 
-def test_pipeline_attributes():
-    """Ensure that the Pipeline only provides post-fit methods that are present
-    on the last step"""
-
-    def make(method):
-        """Make a pipeline whose estimator has specified method"""
-        transf = TransfT()
-        setattr(transf, method, lambda *args, **kwargs: True)
-        return Pipeline([('est', transf)]).fit([[1]], [1])
-
-    attribs = ['predict_proba', 'predict_log_proba', 'predict',
-               'decision_function', 'score', 'inverse_transform']
-
-    for attrib in attribs:
-        pipeline = make(attrib)
-        getattr(pipeline, attrib)(np.asarray([[1]]))
-        for attrib2 in attribs:
-            if attrib2 != attrib:
-                assert_false(hasattr(pipeline, attrib2))
->>>>>>> FIX make Pipeline methods properties as per #1805
-
-
 def test_feature_union_weights():
     # test feature union with transformer weights
     iris = load_iris()

From 470f45d3bbf482b7605db7a61080309d4a4dc29c Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 30 May 2013 11:27:53 +1000
Subject: [PATCH 3/5] TST extra tests, pep8, comment

---
 sklearn/tests/test_metaestimators.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index 5da01390d1a00..af4a581a0510c 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -9,6 +9,8 @@
 from sklearn.datasets import make_classification
 from sklearn.utils.testing import assert_true, assert_false
 from sklearn.pipeline import Pipeline
+from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
+from sklearn.feature_selection import RFECV
 
 
 class DelegatorData(object):
@@ -21,12 +23,22 @@ def __init__(self, name, construct, skip_methods=(),
 
 
 DELEGATING_METAESTIMATORS = {
-    DelegatorData('pipeline', lambda est: Pipeline([('est', est)])),
-    DelegatorData('pipeline', lambda est: Pipeline([('est', est)])),
+    DelegatorData('Pipeline', lambda est: Pipeline([('est', est)])),
+    DelegatorData('GridSearchCV',
+                  lambda est: GridSearchCV(
+                      est, param_grid={'param': [5]}, cv=2),
+                  skip_methods=['score']),
+    DelegatorData('RandomizedSearchCV',
+                  lambda est: RandomizedSearchCV(
+                      est, param_grid={'param': [5]}, cv=2),
+                  skip_methods=['score']),
+    DelegatorData('RFECV', RFECV,
+                  skip_methods=['transform', 'inverse_transform']),
 }
 
 
 def test_metaestimator_delegation():
+    """Ensures specified metaestimators have methods iff subestimator does"""
     def hides(method):
         @property
         def wrapper(obj):
@@ -39,10 +51,10 @@ class SubEstimator(BaseEstimator):
         def __init__(self, param=1, hidden_method=None):
             self.param = param
             self.hidden_method = hidden_method
-        
+
         def fit(self, X, y=None, *args, **kwargs):
             return True
-        
+
         @hides
         def inverse_transform(self, X, *args, **kwargs):
             return X
@@ -71,7 +83,6 @@ def decision_function(self, X, *args, **kwargs):
         def score(self, X, *args, **kwargs):
             return 1.0
 
-
     methods = [k for k in iterkeys(SubEstimator.__dict__)
                if not k.startswith('_') and not k.startswith('fit')]
 
@@ -86,7 +97,7 @@ def score(self, X, *args, **kwargs):
             assert_true(hasattr(delegator, method),
                         msg="%s does not have method %r when its delegate does"
                             % (delegator_data.name, method))
-        
+
         for method in methods:
             if method in delegator_data.skip_methods:
                 continue
@@ -95,6 +106,5 @@ def score(self, X, *args, **kwargs):
             delegator.fit(*delegator_data.fit_args)
             assert_false(hasattr(delegate, method))
             assert_false(hasattr(delegator, method),
-                        msg="%s has method %r when its delegate does not"
-                            % (delegator_data.name, method))
-        
+                         msg="%s has method %r when its delegate does not"
+                             % (delegator_data.name, method))

From a2d37810758a220dfd3c6006cfbd0f0d3cd61652 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Mon, 10 Jun 2013 12:43:34 +1000
Subject: [PATCH 4/5] Fix issues in test

---
 sklearn/tests/test_metaestimators.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index af4a581a0510c..c193c84d18db5 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -22,7 +22,7 @@ def __init__(self, name, construct, skip_methods=(),
         self.skip_methods = skip_methods
 
 
-DELEGATING_METAESTIMATORS = {
+DELEGATING_METAESTIMATORS = [
     DelegatorData('Pipeline', lambda est: Pipeline([('est', est)])),
     DelegatorData('GridSearchCV',
                   lambda est: GridSearchCV(
@@ -30,11 +30,11 @@ def __init__(self, name, construct, skip_methods=(),
                   skip_methods=['score']),
     DelegatorData('RandomizedSearchCV',
                   lambda est: RandomizedSearchCV(
-                      est, param_grid={'param': [5]}, cv=2),
+                      est, param_distributions={'param': [5]}, cv=2),
                   skip_methods=['score']),
     DelegatorData('RFECV', RFECV,
                   skip_methods=['transform', 'inverse_transform']),
-}
+]
 
 
 def test_metaestimator_delegation():
@@ -53,6 +53,7 @@ def __init__(self, param=1, hidden_method=None):
             self.hidden_method = hidden_method
 
         def fit(self, X, y=None, *args, **kwargs):
+            self.coef_ = np.arange(X.shape[1])
             return True
 
         @hides

From 159bb7b4c63ff67fbe3d1dfc2b7a4380456211de Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Mon, 10 Jun 2013 13:14:54 +1000
Subject: [PATCH 5/5] FIX properties of #1805 for BaseSearchCV and RFE

---
 sklearn/feature_selection/rfe.py     |  31 ++-
 sklearn/grid_search.py               |  31 +++
 sklearn/tests/test_metaestimators.py |  11 +-
 sklearn/tests/test_pipeline.py       | 390 +++++++++++++--------------
 4 files changed, 257 insertions(+), 206 deletions(-)

diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index 01c99ceb526f4..4b98106fda483 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -6,6 +6,7 @@
 
 """Recursive feature elimination for feature ranking"""
 
+from functools import wraps
 import numpy as np
 from ..utils import check_arrays, safe_sqr
 from ..base import BaseEstimator
@@ -36,6 +37,7 @@ class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin):
         A supervised learning estimator with a `fit` method that updates a
         `coef_` attribute that holds the fitted parameters. Important features
         must correspond to high absolute values in the `coef_` array.
+        The estimator must also implement a `score` method.
 
         For instance, this is the case for most supervised learning
         algorithms such as Support Vector Classifiers and Generalized
@@ -169,7 +171,13 @@ def fit(self, X, y):
 
         return self
 
-    def predict(self, X):
+    def _delegate_wrapper(self, delegate):
+        def wrapper(X, *args, **kwargs):
+            return delegate(self.transform(X), *args, **kwargs)
+        return wrapper
+
+    @property
+    def predict(self):
         """Reduce X to the selected features and then predict using the
            underlying estimator.
 
@@ -183,9 +191,10 @@ def predict(self, X):
         y : array of shape [n_samples]
             The predicted target values.
         """
-        return self.estimator_.predict(self.transform(X))
+        return self._delegate_wrapper(self.estimator_.predict)
 
-    def score(self, X, y):
+    @property
+    def score(self):
         """Reduce X to the selected features and then return the score of the
            underlying estimator.
 
@@ -197,16 +206,22 @@ def score(self, X, y):
         y : array of shape [n_samples]
             The target values.
         """
-        return self.estimator_.score(self.transform(X), y)
+        return self._delegate_wrapper(self.estimator_.score)
 
     def _get_support_mask(self):
         return self.support_
 
-    def decision_function(self, X):
-        return self.estimator_.decision_function(self.transform(X))
+    @property
+    def decision_function(self):
+        return self._delegate_wrapper(self.estimator_.decision_function)
+
+    @property
+    def predict_proba(self):
+        return self._delegate_wrapper(self.estimator_.predict_proba)
 
-    def predict_proba(self, X):
-        return self.estimator_.predict_proba(self.transform(X))
+    @property
+    def predict_log_proba(self):
+        return self._delegate_wrapper(self.estimator_.predict_log_proba)
 
 
 class RFECV(RFE, MetaEstimatorMixin):
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 8d217521f1269..2904030e77c4a 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -325,20 +325,51 @@ def score(self, X, y=None):
 
     @property
     def predict(self):
+        """Call predict on the best estimator"""
         return self.best_estimator_.predict
 
     @property
     def predict_proba(self):
+        """Call predict_proba on the best estimator"""
         return self.best_estimator_.predict_proba
 
+    @property
+    def predict_log_proba(self):
+        """Call predict_log_proba on the best estimator"""
+        return self.best_estimator_.predict_log_proba
+
     @property
     def decision_function(self):
+        """Call decision_function on the best estimator"""
         return self.best_estimator_.decision_function
 
     @property
     def transform(self):
+        """Call transform on the best estimator"""
         return self.best_estimator_.transform
 
+    @property
+    def inverse_transform(self):
+        """Call inverse_transform on the best estimator"""
+        return self.best_estimator_.inverse_transform
+
+    def _check_estimator(self):
+        """Check that estimator can be fitted and score can be computed."""
+        if (not hasattr(self.estimator, 'fit') or
+                not (hasattr(self.estimator, 'predict')
+                     or hasattr(self.estimator, 'score'))):
+            raise TypeError("estimator should a be an estimator implementing"
+                            " 'fit' and 'predict' or 'score' methods,"
+                            " %s (type %s) was passed" %
+                            (self.estimator, type(self.estimator)))
+        if (self.scoring is None and self.loss_func is None and self.score_func
+                is None):
+            if not hasattr(self.estimator, 'score'):
+                raise TypeError(
+                    "If no scoring is specified, the estimator passed "
+                    "should have a 'score' method. The estimator %s "
+                    "does not." % self.estimator)
+
     def _fit(self, X, y, parameter_iterable):
         """Actual fitting,  performing the search over parameters."""
 
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index c193c84d18db5..0a36125c07b2a 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -10,7 +10,7 @@
 from sklearn.utils.testing import assert_true, assert_false
 from sklearn.pipeline import Pipeline
 from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
-from sklearn.feature_selection import RFECV
+from sklearn.feature_selection import RFE, RFECV
 
 
 class DelegatorData(object):
@@ -32,8 +32,10 @@ def __init__(self, name, construct, skip_methods=(),
                   lambda est: RandomizedSearchCV(
                       est, param_distributions={'param': [5]}, cv=2),
                   skip_methods=['score']),
+    DelegatorData('RFE', RFE,
+                  skip_methods=['transform', 'inverse_transform', 'score']),
     DelegatorData('RFECV', RFECV,
-                  skip_methods=['transform', 'inverse_transform']),
+                  skip_methods=['transform', 'inverse_transform', 'score']),
 ]
 
 
@@ -43,7 +45,7 @@ def hides(method):
         @property
         def wrapper(obj):
             if obj.hidden_method == method.__name__:
-                raise AttributeError
+                raise AttributeError('%r is hidden' % obj.hidden_method)
             return functools.partial(method, obj)
         return wrapper
 
@@ -86,6 +88,7 @@ def score(self, X, *args, **kwargs):
 
     methods = [k for k in iterkeys(SubEstimator.__dict__)
                if not k.startswith('_') and not k.startswith('fit')]
+    methods.sort()
 
     for delegator_data in DELEGATING_METAESTIMATORS:
         delegate = SubEstimator()
@@ -98,6 +101,8 @@ def score(self, X, *args, **kwargs):
             assert_true(hasattr(delegator, method),
                         msg="%s does not have method %r when its delegate does"
                             % (delegator_data.name, method))
+            # smoke test delegation
+            getattr(delegator, method)(delegator_data.fit_args[0])
 
         for method in methods:
             if method in delegator_data.skip_methods:
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index aa4ab41e8582a..87bb7b813cae8 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -23,254 +23,254 @@
 
 
 JUNK_FOOD_DOCS = (
-"the pizza pizza beer copyright",
-"the pizza burger beer copyright",
-"the the pizza beer beer copyright",
-"the burger beer beer copyright",
-"the coke burger coke copyright",
-"the coke burger burger",
+    "the pizza pizza beer copyright",
+    "the pizza burger beer copyright",
+    "the the pizza beer beer copyright",
+    "the burger beer beer copyright",
+    "the coke burger coke copyright",
+    "the coke burger burger",
 )
 
 
 class IncorrectT(BaseEstimator):
-"""Small class to test parameter dispatching.
-"""
+    """Small class to test parameter dispatching.
+    """
 
-def __init__(self, a=None, b=None):
-    self.a = a
-    self.b = b
+    def __init__(self, a=None, b=None):
+        self.a = a
+        self.b = b
 
 
 class T(IncorrectT):
 
-def fit(self, X, y):
-    return self
+    def fit(self, X, y):
+        return self
 
 
 class TransfT(T):
 
-def transform(self, X, y=None):
-    return X
+    def transform(self, X, y=None):
+        return X
 
 
 class FitParamT(BaseEstimator):
-"""Mock classifier
-"""
+    """Mock classifier
+    """
 
-def __init__(self):
-    self.successful = False
-    pass
+    def __init__(self):
+        self.successful = False
+        pass
 
-def fit(self, X, y, should_succeed=False):
-    self.successful = should_succeed
+    def fit(self, X, y, should_succeed=False):
+        self.successful = should_succeed
 
-def predict(self, X):
-    return self.successful
+    def predict(self, X):
+        return self.successful
 
 
 def test_pipeline_init():
-""" Test the various init parameters of the pipeline.
-"""
-assert_raises(TypeError, Pipeline)
-# Check that we can't instantiate pipelines with objects without fit
-# method
-pipe = assert_raises(TypeError, Pipeline, [('svc', IncorrectT)])
-# Smoke test with only an estimator
-clf = T()
-pipe = Pipeline([('svc', clf)])
-assert_equal(pipe.get_params(deep=True),
-             dict(svc__a=None, svc__b=None, svc=clf))
-
-# Check that params are set
-pipe.set_params(svc__a=0.1)
-assert_equal(clf.a, 0.1)
-# Smoke test the repr:
-repr(pipe)
-
-# Test with two objects
-clf = SVC()
-filter1 = SelectKBest(f_classif)
-pipe = Pipeline([('anova', filter1), ('svc', clf)])
-
-# Check that we can't use the same stage name twice
-assert_raises(ValueError, Pipeline, [('svc', SVC()), ('svc', SVC())])
-
-# Check that params are set
-pipe.set_params(svc__C=0.1)
-assert_equal(clf.C, 0.1)
-# Smoke test the repr:
-repr(pipe)
-
-# Check that params are not set when naming them wrong
-assert_raises(ValueError, pipe.set_params, anova__C=0.1)
-
-# Test clone
-pipe2 = clone(pipe)
-assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])
-
-# Check that apart from estimators, the parameters are the same
-params = pipe.get_params()
-params2 = pipe2.get_params()
-# Remove estimators that where copied
-params.pop('svc')
-params.pop('anova')
-params2.pop('svc')
-params2.pop('anova')
-assert_equal(params, params2)
+    """ Test the various init parameters of the pipeline.
+    """
+    assert_raises(TypeError, Pipeline)
+    # Check that we can't instantiate pipelines with objects without fit
+    # method
+    pipe = assert_raises(TypeError, Pipeline, [('svc', IncorrectT)])
+    # Smoke test with only an estimator
+    clf = T()
+    pipe = Pipeline([('svc', clf)])
+    assert_equal(pipe.get_params(deep=True),
+                 dict(svc__a=None, svc__b=None, svc=clf))
+
+    # Check that params are set
+    pipe.set_params(svc__a=0.1)
+    assert_equal(clf.a, 0.1)
+    # Smoke test the repr:
+    repr(pipe)
+
+    # Test with two objects
+    clf = SVC()
+    filter1 = SelectKBest(f_classif)
+    pipe = Pipeline([('anova', filter1), ('svc', clf)])
+
+    # Check that we can't use the same stage name twice
+    assert_raises(ValueError, Pipeline, [('svc', SVC()), ('svc', SVC())])
+
+    # Check that params are set
+    pipe.set_params(svc__C=0.1)
+    assert_equal(clf.C, 0.1)
+    # Smoke test the repr:
+    repr(pipe)
+
+    # Check that params are not set when naming them wrong
+    assert_raises(ValueError, pipe.set_params, anova__C=0.1)
+
+    # Test clone
+    pipe2 = clone(pipe)
+    assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])
+
+    # Check that apart from estimators, the parameters are the same
+    params = pipe.get_params()
+    params2 = pipe2.get_params()
+    # Remove estimators that where copied
+    params.pop('svc')
+    params.pop('anova')
+    params2.pop('svc')
+    params2.pop('anova')
+    assert_equal(params, params2)
 
 
 def test_pipeline_methods_anova():
-""" Test the various methods of the pipeline (anova).
-"""
-iris = load_iris()
-X = iris.data
-y = iris.target
-# Test with Anova + LogisticRegression
-clf = LogisticRegression()
-filter1 = SelectKBest(f_classif, k=2)
-pipe = Pipeline([('anova', filter1), ('logistic', clf)])
-pipe.fit(X, y)
-pipe.predict(X)
-pipe.predict_proba(X)
-pipe.predict_log_proba(X)
-pipe.score(X, y)
+    """ Test the various methods of the pipeline (anova).
+    """
+    iris = load_iris()
+    X = iris.data
+    y = iris.target
+    # Test with Anova + LogisticRegression
+    clf = LogisticRegression()
+    filter1 = SelectKBest(f_classif, k=2)
+    pipe = Pipeline([('anova', filter1), ('logistic', clf)])
+    pipe.fit(X, y)
+    pipe.predict(X)
+    pipe.predict_proba(X)
+    pipe.predict_log_proba(X)
+    pipe.score(X, y)
 
 
 def test_pipeline_fit_params():
-"""Test that the pipeline can take fit parameters
-"""
-pipe = Pipeline([('transf', TransfT()), ('clf', FitParamT())])
-pipe.fit(X=None, y=None, clf__should_succeed=True)
-# classifier should return True
-assert_true(pipe.predict(None))
-# and transformer params should not be changed
-assert_true(pipe.named_steps['transf'].a is None)
-assert_true(pipe.named_steps['transf'].b is None)
+    """Test that the pipeline can take fit parameters
+    """
+    pipe = Pipeline([('transf', TransfT()), ('clf', FitParamT())])
+    pipe.fit(X=None, y=None, clf__should_succeed=True)
+    # classifier should return True
+    assert_true(pipe.predict(None))
+    # and transformer params should not be changed
+    assert_true(pipe.named_steps['transf'].a is None)
+    assert_true(pipe.named_steps['transf'].b is None)
 
 
 def test_pipeline_methods_pca_svm():
-"""Test the various methods of the pipeline (pca + svm)."""
-iris = load_iris()
-X = iris.data
-y = iris.target
-# Test with PCA + SVC
-clf = SVC(probability=True, random_state=0)
-pca = PCA(n_components='mle', whiten=True)
-pipe = Pipeline([('pca', pca), ('svc', clf)])
-pipe.fit(X, y)
-pipe.predict(X)
-pipe.predict_proba(X)
-pipe.predict_log_proba(X)
-pipe.score(X, y)
+    """Test the various methods of the pipeline (pca + svm)."""
+    iris = load_iris()
+    X = iris.data
+    y = iris.target
+    # Test with PCA + SVC
+    clf = SVC(probability=True, random_state=0)
+    pca = PCA(n_components='mle', whiten=True)
+    pipe = Pipeline([('pca', pca), ('svc', clf)])
+    pipe.fit(X, y)
+    pipe.predict(X)
+    pipe.predict_proba(X)
+    pipe.predict_log_proba(X)
+    pipe.score(X, y)
 
 
 def test_pipeline_methods_preprocessing_svm():
-"""Test the various methods of the pipeline (preprocessing + svm)."""
-iris = load_iris()
-X = iris.data
-y = iris.target
-n_samples = X.shape[0]
-n_classes = len(np.unique(y))
-scaler = StandardScaler()
-pca = RandomizedPCA(n_components=2, whiten=True)
-clf = SVC(probability=True, random_state=0)
-
-for preprocessing in [scaler, pca]:
-    pipe = Pipeline([('preprocess', preprocessing), ('svc', clf)])
-    pipe.fit(X, y)
+    """Test the various methods of the pipeline (preprocessing + svm)."""
+    iris = load_iris()
+    X = iris.data
+    y = iris.target
+    n_samples = X.shape[0]
+    n_classes = len(np.unique(y))
+    scaler = StandardScaler()
+    pca = RandomizedPCA(n_components=2, whiten=True)
+    clf = SVC(probability=True, random_state=0)
 
-    # check shapes of various prediction functions
-    predict = pipe.predict(X)
-    assert_equal(predict.shape, (n_samples,))
+    for preprocessing in [scaler, pca]:
+        pipe = Pipeline([('preprocess', preprocessing), ('svc', clf)])
+        pipe.fit(X, y)
 
-    proba = pipe.predict_proba(X)
-    assert_equal(proba.shape, (n_samples, n_classes))
+        # check shapes of various prediction functions
+        predict = pipe.predict(X)
+        assert_equal(predict.shape, (n_samples,))
 
-    log_proba = pipe.predict_log_proba(X)
-    assert_equal(log_proba.shape, (n_samples, n_classes))
+        proba = pipe.predict_proba(X)
+        assert_equal(proba.shape, (n_samples, n_classes))
 
-    decision_function = pipe.decision_function(X)
-    assert_equal(decision_function.shape, (n_samples, n_classes))
+        log_proba = pipe.predict_log_proba(X)
+        assert_equal(log_proba.shape, (n_samples, n_classes))
 
-    pipe.score(X, y)
+        decision_function = pipe.decision_function(X)
+        assert_equal(decision_function.shape, (n_samples, n_classes))
+
+        pipe.score(X, y)
 
 
 def test_feature_union():
-# basic sanity check for feature union
-iris = load_iris()
-X = iris.data
-X -= X.mean(axis=0)
-y = iris.target
-svd = TruncatedSVD(n_components=2, random_state=0)
-select = SelectKBest(k=1)
-fs = FeatureUnion([("svd", svd), ("select", select)])
-fs.fit(X, y)
-X_transformed = fs.transform(X)
-assert_equal(X_transformed.shape, (X.shape[0], 3))
-
-# check if it does the expected thing
-assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
-assert_array_equal(X_transformed[:, -1],
-                   select.fit_transform(X, y).ravel())
-
-# test if it also works for sparse input
-# We use a different svd object to control the random_state stream
-fs = FeatureUnion([("svd", svd), ("select", select)])
-X_sp = sparse.csr_matrix(X)
-X_sp_transformed = fs.fit_transform(X_sp, y)
-assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
-
-# test setting parameters
-fs.set_params(select__k=2)
-assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
-
-# test it works with transformers missing fit_transform
-fs = FeatureUnion([("mock", TransfT()), ("svd", svd), ("select", select)])
-X_transformed = fs.fit_transform(X, y)
-assert_equal(X_transformed.shape, (X.shape[0], 8))
+    # basic sanity check for feature union
+    iris = load_iris()
+    X = iris.data
+    X -= X.mean(axis=0)
+    y = iris.target
+    svd = TruncatedSVD(n_components=2, random_state=0)
+    select = SelectKBest(k=1)
+    fs = FeatureUnion([("svd", svd), ("select", select)])
+    fs.fit(X, y)
+    X_transformed = fs.transform(X)
+    assert_equal(X_transformed.shape, (X.shape[0], 3))
+
+    # check if it does the expected thing
+    assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
+    assert_array_equal(X_transformed[:, -1],
+                       select.fit_transform(X, y).ravel())
+
+    # test if it also works for sparse input
+    # We use a different svd object to control the random_state stream
+    fs = FeatureUnion([("svd", svd), ("select", select)])
+    X_sp = sparse.csr_matrix(X)
+    X_sp_transformed = fs.fit_transform(X_sp, y)
+    assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
+
+    # test setting parameters
+    fs.set_params(select__k=2)
+    assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
+
+    # test it works with transformers missing fit_transform
+    fs = FeatureUnion([("mock", TransfT()), ("svd", svd), ("select", select)])
+    X_transformed = fs.fit_transform(X, y)
+    assert_equal(X_transformed.shape, (X.shape[0], 8))
 
 
 def test_make_union():
-pca = PCA()
-mock = TransfT()
-fu = make_union(pca, mock)
-names, transformers = zip(*fu.transformer_list)
-assert_equal(names, ("pca", "transft"))
-assert_equal(transformers, (pca, mock))
+    pca = PCA()
+    mock = TransfT()
+    fu = make_union(pca, mock)
+    names, transformers = zip(*fu.transformer_list)
+    assert_equal(names, ("pca", "transft"))
+    assert_equal(transformers, (pca, mock))
 
 
 def test_pipeline_transform():
-# Test whether pipeline works with a transformer at the end.
-# Also test pipeline.transform and pipeline.inverse_transform
-iris = load_iris()
-X = iris.data
-pca = PCA(n_components=2)
-pipeline = Pipeline([('pca', pca)])
+    # Test whether pipeline works with a transformer at the end.
+    # Also test pipeline.transform and pipeline.inverse_transform
+    iris = load_iris()
+    X = iris.data
+    pca = PCA(n_components=2)
+    pipeline = Pipeline([('pca', pca)])
 
-# test transform and fit_transform:
-X_trans = pipeline.fit(X).transform(X)
-X_trans2 = pipeline.fit_transform(X)
-X_trans3 = pca.fit_transform(X)
-assert_array_almost_equal(X_trans, X_trans2)
-assert_array_almost_equal(X_trans, X_trans3)
+    # test transform and fit_transform:
+    X_trans = pipeline.fit(X).transform(X)
+    X_trans2 = pipeline.fit_transform(X)
+    X_trans3 = pca.fit_transform(X)
+    assert_array_almost_equal(X_trans, X_trans2)
+    assert_array_almost_equal(X_trans, X_trans3)
 
-X_back = pipeline.inverse_transform(X_trans)
-X_back2 = pca.inverse_transform(X_trans)
-assert_array_almost_equal(X_back, X_back2)
+    X_back = pipeline.inverse_transform(X_trans)
+    X_back2 = pca.inverse_transform(X_trans)
+    assert_array_almost_equal(X_back, X_back2)
 
 
 def test_pipeline_fit_transform():
-# Test whether pipeline works with a transformer missing fit_transform
-iris = load_iris()
-X = iris.data
-y = iris.target
-transft = TransfT()
-pipeline = Pipeline([('mock', transft)])
-
-# test fit_transform:
-X_trans = pipeline.fit_transform(X, y)
-X_trans2 = transft.fit(X, y).transform(X)
-assert_array_almost_equal(X_trans, X_trans2)
+    # Test whether pipeline works with a transformer missing fit_transform
+    iris = load_iris()
+    X = iris.data
+    y = iris.target
+    transft = TransfT()
+    pipeline = Pipeline([('mock', transft)])
+
+    # test fit_transform:
+    X_trans = pipeline.fit_transform(X, y)
+    X_trans2 = transft.fit(X, y).transform(X)
+    assert_array_almost_equal(X_trans, X_trans2)
 
 
 def test_make_pipeline():