Skip to content

[MRG+1] Deprecate Y parameter on transform() #8403

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
5 changes: 5 additions & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,11 @@ API changes summary
:func:`sklearn.model_selection.cross_val_predict`.
:issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.

- Deprecate the ``y`` parameter in :meth:`transform`.
The method should not accept ``y`` parameter, as it's used at the prediction time.
:issue:`8174` by :user:`Tahar Zanouda <tzano>`.


.. _changes_0_18_1:

Version 0.18.1
Expand Down
11 changes: 10 additions & 1 deletion sklearn/cluster/birch.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ..metrics.pairwise import euclidean_distances
from ..base import TransformerMixin, ClusterMixin, BaseEstimator
from ..externals.six.moves import xrange
from ..externals.six import string_types
from ..utils import check_array
from ..utils.extmath import row_norms, safe_sparse_dot
from ..utils.validation import check_is_fitted
Expand Down Expand Up @@ -569,7 +570,7 @@ def predict(self, X):
reduced_distance += self._subcluster_norms
return self.subcluster_labels_[np.argmin(reduced_distance, axis=1)]

def transform(self, X, y=None):
def transform(self, X, y='deprecated'):
"""
Transform X into subcluster centroids dimension.

Expand All @@ -580,12 +581,20 @@ def transform(self, X, y=None):
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Input data.
y : (ignored)
.. deprecated:: 0.19
This parameter will be removed in 0.21.

Returns
-------
X_trans : {array-like, sparse matrix}, shape (n_samples, n_clusters)
Transformed data.
"""
if not isinstance(y, string_types) or y != 'deprecated':
warnings.warn("The parameter y on transform() is "
"deprecated since 0.19 and will be removed in 0.21",
DeprecationWarning)

check_is_fitted(self, 'subcluster_centers_')
return euclidean_distances(X, self.subcluster_centers_)

Expand Down
10 changes: 9 additions & 1 deletion sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ def fit_transform(self, X, y=None):
X = self._check_fit_data(X)
return self.fit(X)._transform(X)

def transform(self, X, y=None):
def transform(self, X, y='deprecated'):
"""Transform X to a cluster-distance space.

In the new space, each dimension is the distance to the cluster
Expand All @@ -922,12 +922,20 @@ def transform(self, X, y=None):
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
New data to transform.
y : (ignored)
.. deprecated:: 0.19
This parameter will be removed in 0.21.

Returns
-------
X_new : array, shape [n_samples, k]
X transformed in the new space.
"""
if not isinstance(y, string_types) or y != 'deprecated':
warnings.warn("The parameter y on transform() is "
"deprecated since 0.19 and will be removed in 0.21",
DeprecationWarning)

check_is_fitted(self, 'cluster_centers_')

X = self._check_test_data(X)
Expand Down
13 changes: 11 additions & 2 deletions sklearn/decomposition/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#
# License: BSD 3 clause

import warnings
import numpy as np
from scipy import linalg

Expand All @@ -16,6 +17,7 @@
from ..utils.extmath import fast_dot
from ..utils.validation import check_is_fitted
from ..externals import six
from ..externals.six import string_types
from abc import ABCMeta, abstractmethod


Expand Down Expand Up @@ -97,8 +99,7 @@ def fit(X, y=None):
Returns the instance itself.
"""


def transform(self, X, y=None):
def transform(self, X, y='deprecated'):
"""Apply dimensionality reduction to X.

X is projected on the first principal components previously extracted
Expand All @@ -109,6 +110,9 @@ def transform(self, X, y=None):
X : array-like, shape (n_samples, n_features)
New data, where n_samples is the number of samples
and n_features is the number of features.
y : (ignored)
.. deprecated:: 0.19
This parameter will be removed in 0.21.

Returns
-------
Expand All @@ -125,6 +129,11 @@ def transform(self, X, y=None):
IncrementalPCA(batch_size=3, copy=True, n_components=2, whiten=False)
>>> ipca.transform(X) # doctest: +SKIP
"""
if not isinstance(y, string_types) or y != 'deprecated':
warnings.warn("The parameter y on transform() is "
"deprecated since 0.19 and will be removed in 0.21",
DeprecationWarning)

check_is_fitted(self, ['mean_', 'components_'], all_or_any=all)

X = check_array(X)
Expand Down
12 changes: 11 additions & 1 deletion sklearn/decomposition/dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import time
import sys
import itertools
import warnings

from math import sqrt, ceil

Expand All @@ -17,6 +18,7 @@
from ..base import BaseEstimator, TransformerMixin
from ..externals.joblib import Parallel, delayed, cpu_count
from ..externals.six.moves import zip
from ..externals.six import string_types
from ..utils import (check_array, check_random_state, gen_even_slices,
gen_batches, _get_n_jobs)
from ..utils.extmath import randomized_svd, row_norms
Expand Down Expand Up @@ -791,7 +793,7 @@ def _set_sparse_coding_params(self, n_components,
self.split_sign = split_sign
self.n_jobs = n_jobs

def transform(self, X, y=None):
def transform(self, X, y='deprecated'):
"""Encode the data as a sparse combination of the dictionary atoms.

Coding method is determined by the object parameter
Expand All @@ -802,13 +804,21 @@ def transform(self, X, y=None):
X : array of shape (n_samples, n_features)
Test data to be transformed, must have the same number of
features as the data used to train the model.
y : (ignored)
.. deprecated:: 0.19
This parameter will be removed in 0.21.

Returns
-------
X_new : array, shape (n_samples, n_components)
Transformed data

"""
if not isinstance(y, string_types) or y != 'deprecated':
warnings.warn("The parameter y on transform() is "
"deprecated since 0.19 and will be removed in 0.21",
DeprecationWarning)

check_is_fitted(self, 'components_')

# XXX : kwargs is not documented
Expand Down
14 changes: 12 additions & 2 deletions sklearn/decomposition/fastica_.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
# Authors: Pierre Lafaye de Micheaux, Stefan van der Walt, Gael Varoquaux,
# Bertrand Thirion, Alexandre Gramfort, Denis A. Engemann
# License: BSD 3 clause

import warnings

import numpy as np
from scipy import linalg

from ..base import BaseEstimator, TransformerMixin
from ..externals import six
from ..externals.six import moves
from ..externals.six import string_types
from ..utils import check_array, as_float_array, check_random_state
from ..utils.extmath import fast_dot
from ..utils.validation import check_is_fitted
Expand Down Expand Up @@ -523,22 +526,29 @@ def fit(self, X, y=None):
self._fit(X, compute_sources=False)
return self

def transform(self, X, y=None, copy=True):
def transform(self, X, y='deprecated', copy=True):
"""Recover the sources from X (apply the unmixing matrix).

Parameters
----------
X : array-like, shape (n_samples, n_features)
Data to transform, where n_samples is the number of samples
and n_features is the number of features.

copy : bool (optional)
If False, data passed to fit are overwritten. Defaults to True.
y : (ignored)
.. deprecated:: 0.19
This parameter will be removed in 0.21.

Returns
-------
X_new : array-like, shape (n_samples, n_components)
"""
if not isinstance(y, string_types) or y != 'deprecated':
warnings.warn("The parameter y on transform() is "
"deprecated since 0.19 and will be removed in 0.21",
DeprecationWarning)

check_is_fitted(self, 'mixing_')

X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
Expand Down
13 changes: 12 additions & 1 deletion sklearn/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

from math import log, sqrt

import warnings

import numpy as np
from scipy import linalg
from scipy.special import gammaln
Expand All @@ -28,6 +30,7 @@
from ..utils.extmath import stable_cumsum
from ..utils.validation import check_is_fitted
from ..utils.arpack import svds
from ..externals.six import string_types


def _assess_dimension_(spectrum, rank, n_samples, n_features):
Expand Down Expand Up @@ -719,7 +722,7 @@ def _fit(self, X):

return X

def transform(self, X, y=None):
def transform(self, X, y='deprecated'):
"""Apply dimensionality reduction on X.

X is projected on the first principal components previous extracted
Expand All @@ -730,12 +733,20 @@ def transform(self, X, y=None):
X : array-like, shape (n_samples, n_features)
New data, where n_samples in the number of samples
and n_features is the number of features.
y : (ignored)
.. deprecated:: 0.19
This parameter will be removed in 0.21.

Returns
-------
X_new : array-like, shape (n_samples, n_components)

"""
if not isinstance(y, string_types) or y != 'deprecated':
warnings.warn("The parameter y on transform() is "
"deprecated since 0.19 and will be removed in 0.21",
DeprecationWarning)

check_is_fitted(self, 'mean_')

X = check_array(X)
Expand Down
18 changes: 18 additions & 0 deletions sklearn/decomposition/tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,24 @@ def fit_deprecated(X):
assert_array_almost_equal(Y, Y_pca)


def test_deprecation_transform():
depr_message = "The parameter y on transform() is deprecated"

# PCA on iris data
X = iris.data

pca = PCA(n_components=2)
X_r = pca.fit(X)

# Tests that deprecated Y parameter throws warning
assert_warns_message(DeprecationWarning, depr_message, X_r.transform,
X, y=1)
assert_warns_message(DeprecationWarning, depr_message, X_r.transform,
X, y=[1])
assert_warns_message(DeprecationWarning, depr_message, X_r.transform,
X, y=None)


def test_pca_spase_input():

X = np.random.RandomState(0).rand(5, 4)
Expand Down
11 changes: 10 additions & 1 deletion sklearn/feature_extraction/dict_vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
from collections import Mapping
from operator import itemgetter

import warnings

import numpy as np
import scipy.sparse as sp

from ..base import BaseEstimator, TransformerMixin
from ..externals import six
from ..externals.six.moves import xrange
from ..externals.six import string_types
from ..utils import check_array, tosequence
from ..utils.fixes import frombuffer_empty

Expand Down Expand Up @@ -271,7 +274,7 @@ def inverse_transform(self, X, dict_type=dict):

return dicts

def transform(self, X, y=None):
def transform(self, X, y='deprecated'):
"""Transform feature->value dicts to array or sparse matrix.

Named features not encountered during fit or fit_transform will be
Expand All @@ -283,12 +286,18 @@ def transform(self, X, y=None):
Dict(s) or Mapping(s) from feature names (arbitrary Python
objects) to feature values (strings or convertible to dtype).
y : (ignored)
.. deprecated:: 0.19
This parameter will be removed in 0.21.

Returns
-------
Xa : {array, sparse matrix}
Feature vectors; always 2-d.
"""
if not isinstance(y, string_types) or y != 'deprecated':
warnings.warn("The parameter y on transform() is "
"deprecated since 0.19 and will be removed in 0.21",
DeprecationWarning)
if self.sparse:
return self._transform(X, fitting=False)

Expand Down
13 changes: 11 additions & 2 deletions sklearn/feature_extraction/hashing.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
# Author: Lars Buitinck
# License: BSD 3 clause

import warnings

import numbers

import numpy as np
import scipy.sparse as sp

from . import _hashing
from ..base import BaseEstimator, TransformerMixin
from ..externals.six import string_types


def _iteritems(d):
Expand Down Expand Up @@ -114,7 +117,7 @@ def fit(self, X=None, y=None):
self._validate_params(self.n_features, self.input_type)
return self

def transform(self, raw_X, y=None):
def transform(self, raw_X, y='deprecated'):
"""Transform a sequence of instances to a scipy.sparse matrix.

Parameters
Expand All @@ -126,13 +129,19 @@ def transform(self, raw_X, y=None):
raw_X need not support the len function, so it can be the result
of a generator; n_samples is determined on the fly.
y : (ignored)

.. deprecated:: 0.19
This parameter will be removed in 0.21.
Returns
-------
X : scipy.sparse matrix, shape = (n_samples, self.n_features)
Feature matrix, for use with estimators or further transformers.

"""
if not isinstance(y, string_types) or y != 'deprecated':
warnings.warn("The parameter y on transform() is "
"deprecated since 0.19 and will be removed in 0.21",
DeprecationWarning)

raw_X = iter(raw_X)
if self.input_type == "dict":
raw_X = (_iteritems(d) for d in raw_X)
Expand Down
Loading