From 53ff58b2d6be6b58dc02bb948e3fac6fffec9d0a Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 2 Nov 2013 23:35:10 +1100 Subject: [PATCH 01/18] ENH Pipeline can now be sliced or indexed --- sklearn/pipeline.py | 13 ++++++++++++- sklearn/tests/test_pipeline.py | 25 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 2570c378b8ee9..82df2f7af4c45 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -97,6 +97,18 @@ def __init__(self, steps): "'%s' (type %s) doesn't)" % (estimator, type(estimator))) + def __getitem__(self, ind): + if isinstance(ind, slice): + if ind.step not in (1, None): + raise ValueError('Pipeline slicing only supports a step of 1') + return self.__class__(self.steps[ind]) + try: + name, est = self.steps[ind] + except TypeError: + # Not an int, try get step by name + return self.named_steps[ind] + return est + def get_params(self, deep=True): if not deep: return super(Pipeline, self).get_params(deep=False) @@ -358,4 +370,3 @@ def _update_transformer_list(self, transformers): (name, new) for ((name, old), new) in zip(self.transformer_list, transformers) ] - diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index c3b2fbc170b70..e20d79d9cf529 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -6,6 +6,8 @@ from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_equal +from sklearn.utils.testing import assert_list_equal +from sklearn.utils.testing import assert_is_instance from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_true from sklearn.utils.testing import assert_array_equal @@ -264,6 +266,29 @@ def test_pipeline_fit_transform(): assert_array_almost_equal(X_trans, X_trans2) +def test_pipeline_slice(): + pipe = Pipeline([('transf1', TransfT()), + ('transf2', TransfT()), + ('clf', FitParamT())]) + pipe2 = pipe[:-1] + assert_is_instance(pipe2, Pipeline) + assert_list_equal(pipe2.steps, pipe.steps[:-1]) + assert_equal(len(pipe2.named_steps), 2) + assert_raises(ValueError, lambda: pipe[::-1]) + + +def test_pipeline_index(): + transf = TransfT() + clf = FitParamT() + pipe = Pipeline([('transf', transf), ('clf', clf)]) + assert_equal(pipe[0], transf) + assert_equal(pipe['transf'], transf) + assert_equal(pipe[-1], clf) + assert_equal(pipe['clf'], clf) + assert_raises(IndexError, lambda: pipe[3]) + assert_raises(KeyError, lambda: pipe['foobar']) + + def test_feature_union_weights(): # test feature union with transformer weights iris = load_iris() From d02a64a453536f2f5123b1874796d435782662a8 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 3 Nov 2013 07:07:49 +1100 Subject: [PATCH 02/18] Additional assertion imports for testing --- sklearn/utils/testing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py index 120d132ecc4b9..934abc3059513 100644 --- a/sklearn/utils/testing.py +++ b/sklearn/utils/testing.py @@ -32,6 +32,8 @@ from nose.tools import assert_true from nose.tools import assert_false from nose.tools import assert_raises +from nose.tools import assert_list_equal +from nose.tools import assert_is_instance from nose.tools import raises from nose import SkipTest from nose import with_setup From 7fa737dcbac8920cc907bc2d9e0a8c832976930b Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 26 Nov 2013 09:50:53 +1100 Subject: [PATCH 03/18] DOC Documentation and example for Pipeline slicing --- doc/modules/pipeline.rst | 26 ++++++++++++++++---------- examples/feature_selection_pipeline.py | 9 +++++++-- sklearn/pipeline.py | 26 +++++++++++++++++++++++--- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst index 10949029fc2bc..cc7ab1efee85a 100644 --- a/doc/modules/pipeline.rst +++ b/doc/modules/pipeline.rst @@ -41,16 +41,6 @@ is an estimator object:: probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False))]) -The estimators of the pipeline are stored as a list in the ``steps`` attribute:: - - >>> clf.steps[0] - ('reduce_dim', PCA(copy=True, n_components=None, whiten=False)) - -and as a ``dict`` in ``named_steps``:: - - >>> clf.named_steps['reduce_dim'] - PCA(copy=True, n_components=None, whiten=False) - Parameters of the estimators in the pipeline can be accessed using the ``__`` syntax:: @@ -68,6 +58,22 @@ This is particularly important for doing grid searches:: ... svm__C=[0.1, 10, 100]) >>> grid_search = GridSearchCV(clf, param_grid=params) +The estimators of the pipeline can be retrieved by index: + + >>> clf[0] + PCA(copy=True, n_components=None, whiten=False) + +or by name:: + + >>> clf['reduce_dim'] + PCA(copy=True, n_components=None, whiten=False) + +A sub-pipeline can also be extracted, which is convenient for performing +only some of the transformations (or their inverse): + + >>> clf[:1] # doctest: +NORMALIZE_WHITESPACE + Pipeline(steps=[('reduce_dim', PCA(copy=True, n_components=None, + whiten=False))]) .. topic:: Examples: diff --git a/examples/feature_selection_pipeline.py b/examples/feature_selection_pipeline.py index 2747e7de89131..f10b6435ba642 100644 --- a/examples/feature_selection_pipeline.py +++ b/examples/feature_selection_pipeline.py @@ -4,7 +4,10 @@ ================== Simple usage of Pipeline that runs successively a univariate -feature selection with anova and then a C-SVM of the selected features. +feature selection with anova and then a SVM of the selected features. + +Using a sub-pipeline, the fitted coefficients can be mapped back into +the original feature space. """ print(__doc__) @@ -22,8 +25,10 @@ # 1) anova filter, take 3 best ranked features anova_filter = SelectKBest(f_regression, k=3) # 2) svm -clf = svm.SVC(kernel='linear') +clf = svm.LinearSVC() anova_svm = Pipeline([('anova', anova_filter), ('svm', clf)]) anova_svm.fit(X, y) anova_svm.predict(X) + +coef = anova_svm[:-1].inverse_transform(anova_svm['svm'].coef_) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 82df2f7af4c45..2d619107352a3 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -57,7 +57,7 @@ class Pipeline(BaseEstimator): >>> # ANOVA SVM-C >>> anova_filter = SelectKBest(f_regression, k=5) - >>> clf = svm.SVC(kernel='linear') + >>> clf = svm.LinearSVC() >>> anova_svm = Pipeline([('anova', anova_filter), ('svc', clf)]) >>> # You can set the parameters using the names issued @@ -68,8 +68,20 @@ class Pipeline(BaseEstimator): Pipeline(steps=[...]) >>> prediction = anova_svm.predict(X) - >>> anova_svm.score(X, y) - 0.75 + >>> anova_svm.score(X, y) # doctest: +ELLIPSIS + 0.78... + + Slicing can also be used to extract a single estimator, or a sub-pipeline. + >>> sub_pipeline = anova_svm[:1] + >>> sub_pipeline # doctest: +ELLIPSIS + Pipeline(steps=[('anova', ...)]) + >>> coef = anova_svm[-1].coef_ + (1, 5) + >>> anova_svm['clf'] is anova_svm[-1] + True + >>> coef.shape + >>> sub_pipeline.inverse_transform(coef).shape + (1, 20) """ # BaseEstimator interface @@ -98,6 +110,14 @@ def __init__(self, steps): % (estimator, type(estimator))) def __getitem__(self, ind): + """Returns a sub-pipeline or a single esimtator in the pipeline + + Indexing with an integer will return an estimator; using a slice + returns another Pipeline instance which copies a slice of this + Pipeline. This copy is shallow: modifying (or fitting) estimators in + the sub-pipeline will affect the larger pipeline and vice-versa. + However, replacing a value in `step` will not affect a copy. + """ if isinstance(ind, slice): if ind.step not in (1, None): raise ValueError('Pipeline slicing only supports a step of 1') From d3052732fa452f46a3a7dc71a7d18f2b89b7353f Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 26 Nov 2013 22:29:12 +1100 Subject: [PATCH 04/18] FIX put doctest lines in correct order --- sklearn/pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 2d619107352a3..0871a42eb28ab 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -76,10 +76,10 @@ class Pipeline(BaseEstimator): >>> sub_pipeline # doctest: +ELLIPSIS Pipeline(steps=[('anova', ...)]) >>> coef = anova_svm[-1].coef_ - (1, 5) - >>> anova_svm['clf'] is anova_svm[-1] + >>> anova_svm['svc'] is anova_svm[-1] True >>> coef.shape + (1, 10) >>> sub_pipeline.inverse_transform(coef).shape (1, 20) """ From 48ee35e2ee9f720dfc61beb144d9aca1fd607c09 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 28 Feb 2019 13:30:05 +0100 Subject: [PATCH 05/18] DOC improve compose Pipeline docs --- doc/modules/compose.rst | 58 ++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 0145842b88e16..7bc2ac4d94120 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -45,6 +45,9 @@ The last estimator may be any type (transformer, classifier, etc.). Usage ----- +Construction +............ + The :class:`Pipeline` is built using a list of ``(key, value)`` pairs, where the ``key`` is a string containing the name you want to give this step and ``value`` is an estimator object:: @@ -74,17 +77,36 @@ filling in the names automatically:: class_prior=None, fit_prior=True))]) -The estimators of a pipeline are stored as a list in the ``steps`` attribute:: +Accessing steps +............... + +The estimators of a pipeline are stored as a list in the ``steps`` attribute, +but can be accessed by index or name by indexing (with ``[idx]``) the pipe:: >>> pipe.steps[0] # doctest: +NORMALIZE_WHITESPACE - ('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None, - svd_solver='auto', tol=0.0, whiten=False)) + ('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None, + random_state=None, svd_solver='auto', tol=0.0, + whiten=False)) + >>> pipe[0] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS + PCA(copy=True, iterated_power='auto', n_components=None, random_state=None, + svd_solver='auto', tol=0.0, whiten=False) + >>> pipe['reduce_dim'] # doctest: +NORMALIZE_WHITESPACE + PCA(copy=True, ...) -and as a ``dict`` in ``named_steps``:: +Pipeline's `named_steps` attribute allows accessing steps by name with tab +completion in interactive environments:: - >>> pipe.named_steps['reduce_dim'] # doctest: +NORMALIZE_WHITESPACE - PCA(copy=True, iterated_power='auto', n_components=None, random_state=None, - svd_solver='auto', tol=0.0, whiten=False) + >>> pipe.named_steps.reduce_dim is pipe['reduce_dim'] + True + +A sub-pipeline can also be extracted, which is convenient for performing +only some of the transformations (or their inverse): + + >>> clf[:1] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS + Pipeline(steps=[('reduce_dim', PCA(copy=True, ...))]) + +Nested parameters +................. Parameters of the estimators in the pipeline can be accessed using the ``__`` syntax:: @@ -94,11 +116,6 @@ Parameters of the estimators in the pipeline can be accessed using the steps=[('reduce_dim', PCA(copy=True, iterated_power='auto',...)), ('clf', SVC(C=10, cache_size=200, class_weight=None,...))]) -Attributes of named_steps map to keys, enabling tab completion in interactive environments:: - - >>> pipe.named_steps.reduce_dim is pipe.named_steps['reduce_dim'] - True - This is particularly important for doing grid searches:: >>> from sklearn.model_selection import GridSearchCV @@ -115,6 +132,23 @@ ignored by setting them to ``'passthrough'``:: ... clf__C=[0.1, 10, 100]) >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) +The estimators of the pipeline can be retrieved by index: + + >>> clf[0] + PCA(copy=True, n_components=None, whiten=False) + +or by name:: + + >>> clf['reduce_dim'] + PCA(copy=True, n_components=None, whiten=False) + +A sub-pipeline can also be extracted, which is convenient for performing +only some of the transformations (or their inverse): + + >>> clf[:1] # doctest: +NORMALIZE_WHITESPACE + Pipeline(steps=[('reduce_dim', PCA(copy=True, n_components=None, + whiten=False))]) + .. topic:: Examples: * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py` From a5024ca7346a0ed4ab966e1cfcfc4afacd07d71e Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 28 Feb 2019 13:30:11 +0100 Subject: [PATCH 06/18] Fix doctest --- sklearn/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 7fff7de6192d3..7ad41508c6f7c 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -94,7 +94,7 @@ class Pipeline(_BaseComposition): ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE Pipeline(memory=None, steps=[('anova', SelectKBest(...)), - ('svc', SVC(...))]) + ('svc', LinearSVC(...))]) >>> prediction = anova_svm.predict(X) >>> anova_svm.score(X, y) # doctest: +ELLIPSIS 0.83 From 7b21322d57abce371a532dd298e4dc464ae65807 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 28 Feb 2019 15:00:13 +0100 Subject: [PATCH 07/18] Fix merge error --- .../plot_feature_selection_pipeline.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/examples/feature_selection/plot_feature_selection_pipeline.py b/examples/feature_selection/plot_feature_selection_pipeline.py index 8b3cbf76a2c7b..65af75b89cfbc 100644 --- a/examples/feature_selection/plot_feature_selection_pipeline.py +++ b/examples/feature_selection/plot_feature_selection_pipeline.py @@ -31,15 +31,10 @@ # 2) svm clf = svm.LinearSVC() -<<<<<<< HEAD:examples/feature_selection_pipeline.py -anova_svm = Pipeline([('anova', anova_filter), ('svm', clf)]) -anova_svm.fit(X, y) -anova_svm.predict(X) - -coef = anova_svm[:-1].inverse_transform(anova_svm['svm'].coef_) -======= anova_svm = make_pipeline(anova_filter, clf) anova_svm.fit(X_train, y_train) y_pred = anova_svm.predict(X_test) print(classification_report(y_test, y_pred)) ->>>>>>> master:examples/feature_selection/plot_feature_selection_pipeline.py + +coef = anova_svm[:-1].inverse_transform(anova_svm['svm'].coef_) +print(coef) From 86dc37f1add01dcb5fbd92d1716ba803c5d384d4 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 28 Feb 2019 15:03:25 +0100 Subject: [PATCH 08/18] DOCs improved after Alex's comments --- doc/modules/compose.rst | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 7bc2ac4d94120..b95909b80c651 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -99,11 +99,15 @@ completion in interactive environments:: >>> pipe.named_steps.reduce_dim is pipe['reduce_dim'] True -A sub-pipeline can also be extracted, which is convenient for performing -only some of the transformations (or their inverse): +A sub-pipeline can also be extracted using the slicing notation commonly used +for Python Sequences such as lists or strings (although only a step of 1 is +permitted). This is convenient for performing only some of the transformations +(or their inverse): >>> clf[:1] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS Pipeline(steps=[('reduce_dim', PCA(copy=True, ...))]) + >>> clf[-1:] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS + Pipeline(steps=[('svc', SVC(C=10, ...))]) Nested parameters ................. @@ -142,13 +146,6 @@ or by name:: >>> clf['reduce_dim'] PCA(copy=True, n_components=None, whiten=False) -A sub-pipeline can also be extracted, which is convenient for performing -only some of the transformations (or their inverse): - - >>> clf[:1] # doctest: +NORMALIZE_WHITESPACE - Pipeline(steps=[('reduce_dim', PCA(copy=True, n_components=None, - whiten=False))]) - .. topic:: Examples: * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py` From 0840b84caa93da5f7fce868fa88fc2345d2326cb Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 28 Feb 2019 15:06:15 +0100 Subject: [PATCH 09/18] This is not the right place to change to LinearSVC --- sklearn/pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 7ad41508c6f7c..435c8550a8249 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -85,7 +85,7 @@ class Pipeline(_BaseComposition): ... n_informative=5, n_redundant=0, random_state=42) >>> # ANOVA SVM-C >>> anova_filter = SelectKBest(f_regression, k=5) - >>> clf = svm.LinearSVC() + >>> clf = svm.SVC(kernel='linear') >>> anova_svm = Pipeline([('anova', anova_filter), ('svc', clf)]) >>> # You can set the parameters using the names issued >>> # For instance, fit using a k of 10 in the SelectKBest @@ -94,7 +94,7 @@ class Pipeline(_BaseComposition): ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE Pipeline(memory=None, steps=[('anova', SelectKBest(...)), - ('svc', LinearSVC(...))]) + ('svc', SVC(...))]) >>> prediction = anova_svm.predict(X) >>> anova_svm.score(X, y) # doctest: +ELLIPSIS 0.83 From f7d20ff6dbfaf72357066c58110b36ee75e35700 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 28 Feb 2019 18:13:40 +0100 Subject: [PATCH 10/18] missed one --- doc/modules/compose.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index b95909b80c651..e5afc9a95cdfb 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -81,7 +81,8 @@ Accessing steps ............... The estimators of a pipeline are stored as a list in the ``steps`` attribute, -but can be accessed by index or name by indexing (with ``[idx]``) the pipe:: +but can be accessed by index or name by indexing (with ``[idx]``) the +Pipeline:: >>> pipe.steps[0] # doctest: +NORMALIZE_WHITESPACE ('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None, From 1b9215966d86ef4bd9c4471e30bb79799d6bda28 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Fri, 1 Mar 2019 07:06:24 +0100 Subject: [PATCH 11/18] DOC add what's new --- doc/whats_new/v0.21.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 4cbb42c569e1b..56c8ac413b1df 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -298,6 +298,12 @@ Support for Python 3.4 and below has been officially dropped. :mod:`sklearn.pipeline` ....................... +- |Feature| :class:`pipeline.Pipeline` can now use indexing notation (e.g. + ``my_pipeline[0:-1]``) to extract a subsequence of steps as another Pipeline + instance. A Pipeline can also be indexed directly to extract a particular + step (e.g. `my_pipeline['svc']`), rather than accessing `named_steps`. + :issue:`2568` by `Joel Nothman`_. + - |API| :class:`pipeline.Pipeline` now supports using ``'passthrough'`` as a transformer. :issue:`11144` by :user:`Thomas Fan `. From d6e414616a5592bcd5fc0b6e8fd565449159a102 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Fri, 1 Mar 2019 08:12:40 +0100 Subject: [PATCH 12/18] Fix doctest --- sklearn/pipeline.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 435c8550a8249..173db2166254a 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -101,14 +101,14 @@ class Pipeline(_BaseComposition): >>> # getting the selected features chosen by anova_filter >>> anova_svm['anova'].get_support() ... # doctest: +NORMALIZE_WHITESPACE - array([False, False, True, True, False, False, True, True, False, - True, False, True, True, False, True, False, True, True, + array([False, False, True, True, False, False, True, True, False, + True, False, True, True, False, True, False, True, True, False, False]) >>> # Another way to get selected features chosen by anova_filter >>> anova_svm.named_steps.anova.get_support() ... # doctest: +NORMALIZE_WHITESPACE - array([False, False, True, True, False, False, True, True, False, - True, False, True, True, False, True, False, True, True, + array([False, False, True, True, False, False, True, True, False, + True, False, True, True, False, True, False, True, True, False, False]) Indexing can also be used to extract a sub-pipeline. >>> sub_pipeline = anova_svm[:1] From 6582b066ad5a92d9e79ed11203ef172b1487b8e0 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sat, 2 Mar 2019 23:30:18 +0100 Subject: [PATCH 13/18] doctest tweaks Co-Authored-By: jnothman --- sklearn/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 173db2166254a..48bae2eac0896 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -110,7 +110,7 @@ class Pipeline(_BaseComposition): array([False, False, True, True, False, False, True, True, False, True, False, True, True, False, True, False, True, True, False, False]) - Indexing can also be used to extract a sub-pipeline. + >>> # Indexing can also be used to extract a sub-pipeline. >>> sub_pipeline = anova_svm[:1] >>> sub_pipeline # doctest: +ELLIPSIS Pipeline(steps=[('anova', ...)]) From 96509b13e64a8b94ab20363bfffce9d34bbca737 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sat, 2 Mar 2019 23:30:28 +0100 Subject: [PATCH 14/18] doctest tweaks Co-Authored-By: jnothman --- sklearn/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 48bae2eac0896..8d2498148b7cc 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -113,7 +113,7 @@ class Pipeline(_BaseComposition): >>> # Indexing can also be used to extract a sub-pipeline. >>> sub_pipeline = anova_svm[:1] >>> sub_pipeline # doctest: +ELLIPSIS - Pipeline(steps=[('anova', ...)]) + Pipeline(memory=None, steps=[('anova', ...)]) >>> coef = anova_svm[-1].coef_ >>> anova_svm['svc'] is anova_svm[-1] True From 210b26f987dbde3b4c6acd8b16c0b928fe3d9644 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sat, 2 Mar 2019 23:30:38 +0100 Subject: [PATCH 15/18] doctest tweaks Co-Authored-By: jnothman --- sklearn/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 8d2498148b7cc..7eaf9a46f09e9 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -112,7 +112,7 @@ class Pipeline(_BaseComposition): False, False]) >>> # Indexing can also be used to extract a sub-pipeline. >>> sub_pipeline = anova_svm[:1] - >>> sub_pipeline # doctest: +ELLIPSIS + >>> sub_pipeline # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Pipeline(memory=None, steps=[('anova', ...)]) >>> coef = anova_svm[-1].coef_ >>> anova_svm['svc'] is anova_svm[-1] From 37335695416eb559d0a74e96d81ab731b0e7ac0c Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 4 Mar 2019 11:06:28 -0500 Subject: [PATCH 16/18] fix doctests --- doc/modules/compose.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index e5afc9a95cdfb..c7e4afe5f359a 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -105,10 +105,10 @@ for Python Sequences such as lists or strings (although only a step of 1 is permitted). This is convenient for performing only some of the transformations (or their inverse): - >>> clf[:1] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS - Pipeline(steps=[('reduce_dim', PCA(copy=True, ...))]) - >>> clf[-1:] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS - Pipeline(steps=[('svc', SVC(C=10, ...))]) + >>> pipe[:1] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS + Pipeline(memory=None, steps=[('reduce_dim', PCA(copy=True, ...))]) + >>> pipe[-1:] # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS + Pipeline(memory=None, steps=[('clf', SVC(C=1.0, ...))]) Nested parameters ................. @@ -139,13 +139,13 @@ ignored by setting them to ``'passthrough'``:: The estimators of the pipeline can be retrieved by index: - >>> clf[0] - PCA(copy=True, n_components=None, whiten=False) + >>> pipe[0] # doctest: +ELLIPSIS + PCA(copy=True, ...) or by name:: - >>> clf['reduce_dim'] - PCA(copy=True, n_components=None, whiten=False) + >>> pipe['reduce_dim'] # doctest: +ELLIPSIS + PCA(copy=True, ...) .. topic:: Examples: From 86bd07564ee15ee417b0910633b160578aa2cdaf Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 6 Mar 2019 08:35:05 +0100 Subject: [PATCH 17/18] Correct step name --- examples/feature_selection/plot_feature_selection_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/feature_selection/plot_feature_selection_pipeline.py b/examples/feature_selection/plot_feature_selection_pipeline.py index 65af75b89cfbc..47d4fb82e46ee 100644 --- a/examples/feature_selection/plot_feature_selection_pipeline.py +++ b/examples/feature_selection/plot_feature_selection_pipeline.py @@ -36,5 +36,5 @@ y_pred = anova_svm.predict(X_test) print(classification_report(y_test, y_pred)) -coef = anova_svm[:-1].inverse_transform(anova_svm['svm'].coef_) +coef = anova_svm[:-1].inverse_transform(anova_svm['linearsvc'].coef_) print(coef) From 3d06b246d7045ad4e58f6c03be7e4f3de6ba831e Mon Sep 17 00:00:00 2001 From: Adrin Jalali Date: Thu, 7 Mar 2019 10:41:41 +0100 Subject: [PATCH 18/18] Update doc/whats_new/v0.21.rst Co-Authored-By: jnothman --- doc/whats_new/v0.21.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 56c8ac413b1df..1aa738f9b8a1c 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -301,7 +301,7 @@ Support for Python 3.4 and below has been officially dropped. - |Feature| :class:`pipeline.Pipeline` can now use indexing notation (e.g. ``my_pipeline[0:-1]``) to extract a subsequence of steps as another Pipeline instance. A Pipeline can also be indexed directly to extract a particular - step (e.g. `my_pipeline['svc']`), rather than accessing `named_steps`. + step (e.g. ``my_pipeline['svc']``), rather than accessing ``named_steps``. :issue:`2568` by `Joel Nothman`_. - |API| :class:`pipeline.Pipeline` now supports using ``'passthrough'`` as a