Skip to content

MNT move PolynomialFeatures from _data.py to _polynomial.py #19611

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 18, 2021
2 changes: 1 addition & 1 deletion sklearn/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from ._data import quantile_transform
from ._data import power_transform
from ._data import PowerTransformer
from ._data import PolynomialFeatures

from ._encoders import OneHotEncoder
from ._encoders import OrdinalEncoder
Expand All @@ -35,6 +34,7 @@

from ._discretization import KBinsDiscretizer

from ._polynomial import PolynomialFeatures
from ._polynomial import SplineTransformer


Expand Down
290 changes: 0 additions & 290 deletions sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
# License: BSD 3 clause


from itertools import chain, combinations
import warnings
from itertools import combinations_with_replacement as combinations_w_r

import numpy as np
from scipy import sparse
Expand All @@ -31,7 +29,6 @@
from ..utils.validation import (check_is_fitted, check_random_state,
_check_sample_weight,
FLOAT_DTYPES, _deprecate_positional_args)
from ._csr_polynomial_expansion import _csr_polynomial_expansion

from ._encoders import OneHotEncoder

Expand Down Expand Up @@ -1570,293 +1567,6 @@ def robust_scale(X, *, axis=0, with_centering=True, with_scaling=True,
return X


class PolynomialFeatures(TransformerMixin, BaseEstimator):
"""Generate polynomial and interaction features.

Generate a new feature matrix consisting of all polynomial combinations
of the features with degree less than or equal to the specified degree.
For example, if an input sample is two dimensional and of the form
[a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].

Parameters
----------
degree : int, default=2
The degree of the polynomial features.

interaction_only : bool, default=False
If true, only interaction features are produced: features that are
products of at most ``degree`` *distinct* input features (so not
``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).

include_bias : bool, default=True
If True (default), then include a bias column, the feature in which
all polynomial powers are zero (i.e. a column of ones - acts as an
intercept term in a linear model).

order : {'C', 'F'}, default='C'
Order of output array in the dense case. 'F' order is faster to
compute, but may slow down subsequent estimators.

.. versionadded:: 0.21

Examples
--------
>>> import numpy as np
>>> from sklearn.preprocessing import PolynomialFeatures
>>> X = np.arange(6).reshape(3, 2)
>>> X
array([[0, 1],
[2, 3],
[4, 5]])
>>> poly = PolynomialFeatures(2)
>>> poly.fit_transform(X)
array([[ 1., 0., 1., 0., 0., 1.],
[ 1., 2., 3., 4., 6., 9.],
[ 1., 4., 5., 16., 20., 25.]])
>>> poly = PolynomialFeatures(interaction_only=True)
>>> poly.fit_transform(X)
array([[ 1., 0., 1., 0.],
[ 1., 2., 3., 6.],
[ 1., 4., 5., 20.]])

Attributes
----------
powers_ : ndarray of shape (n_output_features, n_input_features)
powers_[i, j] is the exponent of the jth input in the ith output.

n_input_features_ : int
The total number of input features.

n_output_features_ : int
The total number of polynomial output features. The number of output
features is computed by iterating over all suitably sized combinations
of input features.

See Also
--------
SplineTransformer : Transformer that generates univariate B-spline bases
for features

Notes
-----
Be aware that the number of features in the output array scales
polynomially in the number of features of the input array, and
exponentially in the degree. High degrees can cause overfitting.

See :ref:`examples/linear_model/plot_polynomial_interpolation.py
<sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`
"""
@_deprecate_positional_args
def __init__(self, degree=2, *, interaction_only=False, include_bias=True,
order='C'):
self.degree = degree
self.interaction_only = interaction_only
self.include_bias = include_bias
self.order = order

@staticmethod
def _combinations(n_features, degree, interaction_only, include_bias):
comb = (combinations if interaction_only else combinations_w_r)
start = int(not include_bias)
return chain.from_iterable(comb(range(n_features), i)
for i in range(start, degree + 1))

@property
def powers_(self):
check_is_fitted(self)

combinations = self._combinations(self.n_input_features_, self.degree,
self.interaction_only,
self.include_bias)
return np.vstack([np.bincount(c, minlength=self.n_input_features_)
for c in combinations])

def get_feature_names(self, input_features=None):
"""
Return feature names for output features

Parameters
----------
input_features : list of str of shape (n_features,), default=None
String names for input features if available. By default,
"x0", "x1", ... "xn_features" is used.

Returns
-------
output_feature_names : list of str of shape (n_output_features,)
"""
powers = self.powers_
if input_features is None:
input_features = ['x%d' % i for i in range(powers.shape[1])]
feature_names = []
for row in powers:
inds = np.where(row)[0]
if len(inds):
name = " ".join("%s^%d" % (input_features[ind], exp)
if exp != 1 else input_features[ind]
for ind, exp in zip(inds, row[inds]))
else:
name = "1"
feature_names.append(name)
return feature_names

def fit(self, X, y=None):
"""
Compute number of output features.


Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The data.

y : None
Ignored.

Returns
-------
self : object
Fitted transformer.
"""
n_samples, n_features = self._validate_data(
X, accept_sparse=True).shape
combinations = self._combinations(n_features, self.degree,
self.interaction_only,
self.include_bias)
self.n_input_features_ = n_features
self.n_output_features_ = sum(1 for _ in combinations)
return self

def transform(self, X):
"""Transform data to polynomial features.

Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The data to transform, row by row.

Prefer CSR over CSC for sparse input (for speed), but CSC is
required if the degree is 4 or higher. If the degree is less than
4 and the input format is CSC, it will be converted to CSR, have
its polynomial features generated, then converted back to CSC.

If the degree is 2 or 3, the method described in "Leveraging
Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices
Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is
used, which is much faster than the method used on CSC input. For
this reason, a CSC input will be converted to CSR, and the output
will be converted back to CSC prior to being returned, hence the
preference of CSR.

Returns
-------
XP : {ndarray, sparse matrix} of shape (n_samples, NP)
The matrix of features, where NP is the number of polynomial
features generated from the combination of inputs. If a sparse
matrix is provided, it will be converted into a sparse
``csr_matrix``.
"""
check_is_fitted(self)

X = self._validate_data(X, order='F', dtype=FLOAT_DTYPES, reset=False,
accept_sparse=('csr', 'csc'))

n_samples, n_features = X.shape

if n_features != self.n_input_features_:
raise ValueError("X shape does not match training shape")

if sparse.isspmatrix_csr(X):
if self.degree > 3:
return self.transform(X.tocsc()).tocsr()
to_stack = []
if self.include_bias:
to_stack.append(np.ones(shape=(n_samples, 1), dtype=X.dtype))
to_stack.append(X)
for deg in range(2, self.degree+1):
Xp_next = _csr_polynomial_expansion(X.data, X.indices,
X.indptr, X.shape[1],
self.interaction_only,
deg)
if Xp_next is None:
break
to_stack.append(Xp_next)
XP = sparse.hstack(to_stack, format='csr')
elif sparse.isspmatrix_csc(X) and self.degree < 4:
return self.transform(X.tocsr()).tocsc()
else:
if sparse.isspmatrix(X):
combinations = self._combinations(n_features, self.degree,
self.interaction_only,
self.include_bias)
columns = []
for comb in combinations:
if comb:
out_col = 1
for col_idx in comb:
out_col = X[:, col_idx].multiply(out_col)
columns.append(out_col)
else:
bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))
columns.append(bias)
XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
else:
XP = np.empty((n_samples, self.n_output_features_),
dtype=X.dtype, order=self.order)

# What follows is a faster implementation of:
# for i, comb in enumerate(combinations):
# XP[:, i] = X[:, comb].prod(1)
# This implementation uses two optimisations.
# First one is broadcasting,
# multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]
# multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]
# ...
# multiply ([X[:, start:end], X[:, start]) -> ...
# Second optimisation happens for degrees >= 3.
# Xi^3 is computed reusing previous computation:
# Xi^3 = Xi^2 * Xi.

if self.include_bias:
XP[:, 0] = 1
current_col = 1
else:
current_col = 0

# d = 0
XP[:, current_col:current_col + n_features] = X
index = list(range(current_col,
current_col + n_features))
current_col += n_features
index.append(current_col)

# d >= 1
for _ in range(1, self.degree):
new_index = []
end = index[-1]
for feature_idx in range(n_features):
start = index[feature_idx]
new_index.append(current_col)
if self.interaction_only:
start += (index[feature_idx + 1] -
index[feature_idx])
next_col = current_col + end - start
if next_col <= current_col:
break
# XP[:, start:end] are terms of degree d - 1
# that exclude feature #feature_idx.
np.multiply(XP[:, start:end],
X[:, feature_idx:feature_idx + 1],
out=XP[:, current_col:next_col],
casting='no')
current_col = next_col

new_index.append(current_col)
index = new_index

return XP


@_deprecate_positional_args
def normalize(X, norm='l2', *, axis=1, copy=True, return_norm=False):
"""Scale input vectors individually to unit norm (vector length).
Expand Down
Loading