scikit-learn · adrinjalali · Mar 7, 2019 · Nov 2, 2013 · Nov 2, 2013 · Nov 25, 2013
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
@@ -45,6 +45,9 @@ The last estimator may be any type (transformer, classifier, etc.).
 Usage
 -----
 
+Construction
+............
+
 The :class:`Pipeline` is built using a list of ``(key, value)`` pairs, where
 the ``key`` is a string containing the name you want to give this step and ``value``
 is an estimator object::
@@ -74,17 +77,41 @@ filling in the names automatically::
                                                     class_prior=None,
                                                     fit_prior=True))])
 
-The estimators of a pipeline are stored as a list in the ``steps`` attribute::
+Accessing steps
+...............
+
+The estimators of a pipeline are stored as a list in the ``steps`` attribute,
+but can be accessed by index or name by indexing (with ``[idx]``) the
+Pipeline::
 
     >>> pipe.steps[0]  # doctest: +NORMALIZE_WHITESPACE
-    ('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
-      svd_solver='auto', tol=0.0, whiten=False))
+    ('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None,
+                       random_state=None, svd_solver='auto', tol=0.0,
+                       whiten=False))
+    >>> pipe[0]  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
+        svd_solver='auto', tol=0.0, whiten=False)
+    >>> pipe['reduce_dim']  # doctest: +NORMALIZE_WHITESPACE
+    PCA(copy=True, ...)
 
-and as a ``dict`` in ``named_steps``::
+Pipeline's `named_steps` attribute allows accessing steps by name with tab
+completion in interactive environments::
 
-    >>> pipe.named_steps['reduce_dim']  # doctest: +NORMALIZE_WHITESPACE
-    PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
-      svd_solver='auto', tol=0.0, whiten=False)
+    >>> pipe.named_steps.reduce_dim is pipe['reduce_dim']
+    True
+
+A sub-pipeline can also be extracted using the slicing notation commonly used
+for Python Sequences such as lists or strings (although only a step of 1 is
+permitted). This is convenient for performing only some of the transformations
+(or their inverse):
+
+    >>> pipe[:1] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    Pipeline(memory=None, steps=[('reduce_dim', PCA(copy=True, ...))])
+    >>> pipe[-1:] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    Pipeline(memory=None, steps=[('clf', SVC(C=1.0, ...))])
+
+Nested parameters
+.................
 
 Parameters of the estimators in the pipeline can be accessed using the
 ``<estimator>__<parameter>`` syntax::
@@ -94,11 +121,6 @@ Parameters of the estimators in the pipeline can be accessed using the
              steps=[('reduce_dim', PCA(copy=True, iterated_power='auto',...)),
                     ('clf', SVC(C=10, cache_size=200, class_weight=None,...))])
 
-Attributes of named_steps map to keys, enabling tab completion in interactive environments::
-
-    >>> pipe.named_steps.reduce_dim is pipe.named_steps['reduce_dim']
-    True
-
 This is particularly important for doing grid searches::
 
     >>> from sklearn.model_selection import GridSearchCV
@@ -115,6 +137,16 @@ ignored by setting them to ``'passthrough'``::
     ...                   clf__C=[0.1, 10, 100])
     >>> grid_search = GridSearchCV(pipe, param_grid=param_grid)
 
+The estimators of the pipeline can be retrieved by index:
+
+    >>> pipe[0]  # doctest: +ELLIPSIS
+    PCA(copy=True, ...)
+
+or by name::
+
+    >>> pipe['reduce_dim']  # doctest: +ELLIPSIS
+    PCA(copy=True, ...)
+
 .. topic:: Examples:
 
  * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py`

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
@@ -298,6 +298,12 @@ Support for Python 3.4 and below has been officially dropped.
 :mod:`sklearn.pipeline`
 .......................
 
+- |Feature| :class:`pipeline.Pipeline` can now use indexing notation (e.g.
+  ``my_pipeline[0:-1]``) to extract a subsequence of steps as another Pipeline
+  instance.  A Pipeline can also be indexed directly to extract a particular
+  step (e.g. ``my_pipeline['svc']``), rather than accessing ``named_steps``.
+  :issue:`2568` by `Joel Nothman`_.
+
 - |API| :class:`pipeline.Pipeline` now supports using ``'passthrough'`` as a
   transformer. :issue:`11144` by :user:`Thomas Fan <thomasjpfan>`.
 

diff --git a/examples/feature_selection/plot_feature_selection_pipeline.py b/examples/feature_selection/plot_feature_selection_pipeline.py
@@ -4,7 +4,10 @@
 ==================
 
 Simple usage of Pipeline that runs successively a univariate
-feature selection with anova and then a C-SVM of the selected features.
+feature selection with anova and then a SVM of the selected features.
+
+Using a sub-pipeline, the fitted coefficients can be mapped back into
+the original feature space.
 """
 from sklearn import svm
 from sklearn.datasets import samples_generator
@@ -26,9 +29,12 @@
 # 1) anova filter, take 3 best ranked features
 anova_filter = SelectKBest(f_regression, k=3)
 # 2) svm
-clf = svm.SVC(kernel='linear')
+clf = svm.LinearSVC()
 
 anova_svm = make_pipeline(anova_filter, clf)
 anova_svm.fit(X_train, y_train)
 y_pred = anova_svm.predict(X_test)
 print(classification_report(y_test, y_pred))
+
+coef = anova_svm[:-1].inverse_transform(anova_svm['linearsvc'].coef_)
+print(coef)
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
@@ -99,17 +99,28 @@ class Pipeline(_BaseComposition):
     >>> anova_svm.score(X, y)                        # doctest: +ELLIPSIS
     0.83
     >>> # getting the selected features chosen by anova_filter
-    >>> anova_svm.named_steps['anova'].get_support()
+    >>> anova_svm['anova'].get_support()
     ... # doctest: +NORMALIZE_WHITESPACE
-    array([False, False,  True,  True, False, False, True,  True, False,
-           True,  False,  True,  True, False, True,  False, True, True,
+    array([False, False,  True,  True, False, False,  True,  True, False,
+           True, False,  True,  True, False,  True, False,  True,  True,
            False, False])
     >>> # Another way to get selected features chosen by anova_filter
     >>> anova_svm.named_steps.anova.get_support()
     ... # doctest: +NORMALIZE_WHITESPACE
-    array([False, False,  True,  True, False, False, True,  True, False,
-           True,  False,  True,  True, False, True,  False, True, True,
+    array([False, False,  True,  True, False, False,  True,  True, False,
+           True, False,  True,  True, False,  True, False,  True,  True,
            False, False])
+    >>> # Indexing can also be used to extract a sub-pipeline.
+    >>> sub_pipeline = anova_svm[:1]
+    >>> sub_pipeline  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    Pipeline(memory=None, steps=[('anova', ...)])
+    >>> coef = anova_svm[-1].coef_
+    >>> anova_svm['svc'] is anova_svm[-1]
+    True
+    >>> coef.shape
+    (1, 10)
+    >>> sub_pipeline.inverse_transform(coef).shape
+    (1, 20)
     """
 
     # BaseEstimator interface
@@ -188,6 +199,26 @@ def _iter(self, with_final=True):
             if trans is not None and trans != 'passthrough':
                 yield idx, name, trans
 
+    def __getitem__(self, ind):
+        """Returns a sub-pipeline or a single esimtator in the pipeline
+
+        Indexing with an integer will return an estimator; using a slice
+        returns another Pipeline instance which copies a slice of this
+        Pipeline. This copy is shallow: modifying (or fitting) estimators in
+        the sub-pipeline will affect the larger pipeline and vice-versa.
+        However, replacing a value in `step` will not affect a copy.
+        """
+        if isinstance(ind, slice):
+            if ind.step not in (1, None):
+                raise ValueError('Pipeline slicing only supports a step of 1')
+            return self.__class__(self.steps[ind])
+        try:
+            name, est = self.steps[ind]
+        except TypeError:
+            # Not an int, try get step by name
+            return self.named_steps[ind]
+        return est
+
     @property
     def _estimator_type(self):
         return self.steps[-1][1]._estimator_type

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
@@ -529,6 +529,29 @@ def test_pipeline_fit_transform():
     assert_array_almost_equal(X_trans, X_trans2)
 
 
+def test_pipeline_slice():
+    pipe = Pipeline([('transf1', Transf()),
+                     ('transf2', Transf()),
+                     ('clf', FitParamT())])
+    pipe2 = pipe[:-1]
+    assert isinstance(pipe2, Pipeline)
+    assert pipe2.steps == pipe.steps[:-1]
+    assert 2 == len(pipe2.named_steps)
+    assert_raises(ValueError, lambda: pipe[::-1])
+
+
+def test_pipeline_index():
+    transf = Transf()
+    clf = FitParamT()
+    pipe = Pipeline([('transf', transf), ('clf', clf)])
+    assert pipe[0] == transf
+    assert pipe['transf'] == transf
+    assert pipe[-1] == clf
+    assert pipe['clf'] == clf
+    assert_raises(IndexError, lambda: pipe[3])
+    assert_raises(KeyError, lambda: pipe['foobar'])
+
+
 def test_set_pipeline_steps():
     transf1 = Transf()
     transf2 = Transf()