Skip to content

Commit de70e29

Browse files
author
Guillaume Lemaitre
committed
Fix the different tests
1 parent 3338171 commit de70e29

File tree

3 files changed

+39
-55
lines changed

3 files changed

+39
-55
lines changed

doc/modules/pipeline.rst

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -166,48 +166,6 @@ object::
166166
>>> # Clear the cache directory when you don't need it anymore
167167
>>> rmtree(cachedir)
168168

169-
.. warning:: **Side effect of caching transfomers**
170-
171-
Using a :class:`Pipeline` without cache enabled, it is possible to
172-
inspect the original instance such as::
173-
174-
>>> from sklearn.datasets import load_digits
175-
>>> digits = load_digits()
176-
>>> pca1 = PCA()
177-
>>> svm1 = SVC()
178-
>>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)])
179-
>>> pipe.fit(digits.data, digits.target)
180-
... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
181-
Pipeline(memory=None,
182-
steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))])
183-
>>> # The pca instance can be inspected directly
184-
>>> print(pca1.components_) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
185-
[[ -1.77484909e-19 ... 4.07058917e-18]]
186-
187-
Enabling caching triggers a clone of the transformers before fitting.
188-
Therefore, the transformer instance given to the pipeline cannot be
189-
inspected directly.
190-
In following example, accessing the :class:`PCA` instance ``pca2``
191-
will raise an ``AttributeError`` since ``pca2`` will be an unfitted
192-
transformer.
193-
Instead, use the attribute ``named_steps_`` to inspect estimators within
194-
the pipeline::
195-
196-
>>> cachedir = mkdtemp()
197-
>>> pca2 = PCA()
198-
>>> svm2 = SVC()
199-
>>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)],
200-
... memory=cachedir)
201-
>>> cached_pipe.fit(digits.data, digits.target)
202-
... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
203-
Pipeline(memory=...,
204-
steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))])
205-
>>> print(cached_pipe.named_steps_['reduce_dim'].components_)
206-
... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
207-
[[ -1.77484909e-19 ... 4.07058917e-18]]
208-
>>> # Remove the cache directory
209-
>>> rmtree(cachedir)
210-
211169
.. topic:: Examples:
212170

213171
* :ref:`sphx_glr_auto_examples_plot_compare_reduction.py`

sklearn/pipeline.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,11 @@ def fit_transform(self, X, y=None, **fit_params):
381381
"""
382382
last_step = self._final_estimator
383383
Xt, fit_params = self._fit(X, y, **fit_params)
384-
if last_step is None:
384+
if hasattr(last_step, 'fit_transform'):
385+
Xt = last_step.fit_transform(Xt, y, **fit_params)
386+
self.steps_[-1] = (self.steps_[-1][0], last_step)
387+
return Xt
388+
elif last_step is None:
385389
return Xt
386390
else:
387391
fitted_transformer = last_step.fit(Xt, y, **fit_params)

sklearn/tests/test_pipeline.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from sklearn.utils.testing import assert_true
1919
from sklearn.utils.testing import assert_array_equal
2020
from sklearn.utils.testing import assert_array_almost_equal
21-
from sklearn.utils.testing import assert_dict_equal
2221

2322
from sklearn.base import clone, BaseEstimator
2423
from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union
@@ -32,6 +31,8 @@
3231
from sklearn.preprocessing import StandardScaler
3332
from sklearn.feature_extraction.text import CountVectorizer
3433
from sklearn.externals.joblib import Memory
34+
from sklearn.externals.joblib import hash
35+
from sklearn.utils.validation import check_is_fitted
3536

3637

3738
JUNK_FOOD_DOCS = (
@@ -537,17 +538,22 @@ def make():
537538
assert_array_equal([[exp]], pipeline.fit_transform(X, y))
538539
assert_array_equal([exp], pipeline.fit(X).predict(X))
539540
assert_array_equal(X, pipeline.inverse_transform([[exp]]))
540-
print(pipeline.get_params(deep=True))
541-
assert_dict_equal(pipeline.get_params(deep=True),
542-
{'last': Mult(mult=5),
543-
'memory': None,
544-
'last__mult': 5,
545-
'steps': [('m2', Mult(mult=2)),
546-
('m3', None),
547-
('last', Mult(mult=5))],
548-
'm2__mult': 2,
549-
'm3': None,
550-
'm2': Mult(mult=2)})
541+
542+
pipeline_params = pipeline.get_params(deep=True)
543+
pipeline_params2 = {'steps': pipeline.steps,
544+
'm2': mult2,
545+
'm3': None,
546+
'last': mult5,
547+
'memory': None,
548+
'm2__mult': 2,
549+
'last__mult': 5}
550+
# check if the keys are the same
551+
assert_equal(sorted(pipeline_params.keys()),
552+
sorted(pipeline_params2.keys()))
553+
# check if the arrays are the same using joblib.hash
554+
for k in pipeline_params.keys():
555+
assert_equal(hash(pipeline_params[k]),
556+
hash(pipeline_params2[k]))
551557

552558
pipeline.set_params(m2=None)
553559
exp = 5
@@ -621,6 +627,22 @@ def test_pipeline_ducktyping():
621627
assert_false(hasattr(pipeline, 'inverse_transform'))
622628

623629

630+
def test_pipeline_steps():
631+
iris = load_iris()
632+
X = iris.data
633+
y = iris.target
634+
clf = SVC(probability=True, random_state=0)
635+
pca = PCA(svd_solver='full', n_components='mle', whiten=True)
636+
pipe = Pipeline([('pca', pca), ('svc', clf)])
637+
pipe.fit(X, y)
638+
639+
# check that _steps was not change after fitting
640+
assert_equal(pca, pipe._steps[0][1])
641+
assert_equal(clf, pipe._steps[1][1])
642+
# check that the estimators have been fitted in steps_
643+
check_is_fitted(pipe.named_steps_['pca'], 'n_components_')
644+
check_is_fitted(pipe.named_steps_['svc'], 'support_vectors_')
645+
624646
def test_make_pipeline():
625647
t1 = Transf()
626648
t2 = Transf()

0 commit comments

Comments
 (0)