From 5645b2487cdaf7eefb530c2898a93d2248c99c1f Mon Sep 17 00:00:00 2001
From: avigupta2612 <avigupta2612@gmail.com>
Date: Thu, 4 Mar 2021 04:40:09 +0530
Subject: [PATCH 1/6] moved Polynomialfeatures to polynomial

---
 sklearn/preprocessing/__init__.py             |   2 +-
 sklearn/preprocessing/_data.py                | 289 -----------------
 sklearn/preprocessing/_polynomial.py          | 294 +++++++++++++++++-
 sklearn/preprocessing/tests/test_data.py      | 199 ------------
 .../preprocessing/tests/test_polynomial.py    | 202 ++++++++++++
 5 files changed, 496 insertions(+), 490 deletions(-)

diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py
index 076b9e85e1150..a24346dc8d045 100644
--- a/sklearn/preprocessing/__init__.py
+++ b/sklearn/preprocessing/__init__.py
@@ -23,7 +23,6 @@
 from ._data import quantile_transform
 from ._data import power_transform
 from ._data import PowerTransformer
-from ._data import PolynomialFeatures
 
 from ._encoders import OneHotEncoder
 from ._encoders import OrdinalEncoder
@@ -36,6 +35,7 @@
 from ._discretization import KBinsDiscretizer
 
 from ._polynomial import SplineTransformer
+from ._polynomial import PolynomialFeatures
 
 
 __all__ = [
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 29190dd6e2b67..e45f6e869f45a 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -8,9 +8,7 @@
 # License: BSD 3 clause
 
 
-from itertools import chain, combinations
 import warnings
-from itertools import combinations_with_replacement as combinations_w_r
 
 import numpy as np
 from scipy import sparse
@@ -1570,293 +1568,6 @@ def robust_scale(X, *, axis=0, with_centering=True, with_scaling=True,
     return X
 
 
-class PolynomialFeatures(TransformerMixin, BaseEstimator):
-    """Generate polynomial and interaction features.
-
-    Generate a new feature matrix consisting of all polynomial combinations
-    of the features with degree less than or equal to the specified degree.
-    For example, if an input sample is two dimensional and of the form
-    [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].
-
-    Parameters
-    ----------
-    degree : int, default=2
-        The degree of the polynomial features.
-
-    interaction_only : bool, default=False
-        If true, only interaction features are produced: features that are
-        products of at most ``degree`` *distinct* input features (so not
-        ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).
-
-    include_bias : bool, default=True
-        If True (default), then include a bias column, the feature in which
-        all polynomial powers are zero (i.e. a column of ones - acts as an
-        intercept term in a linear model).
-
-    order : {'C', 'F'}, default='C'
-        Order of output array in the dense case. 'F' order is faster to
-        compute, but may slow down subsequent estimators.
-
-        .. versionadded:: 0.21
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from sklearn.preprocessing import PolynomialFeatures
-    >>> X = np.arange(6).reshape(3, 2)
-    >>> X
-    array([[0, 1],
-           [2, 3],
-           [4, 5]])
-    >>> poly = PolynomialFeatures(2)
-    >>> poly.fit_transform(X)
-    array([[ 1.,  0.,  1.,  0.,  0.,  1.],
-           [ 1.,  2.,  3.,  4.,  6.,  9.],
-           [ 1.,  4.,  5., 16., 20., 25.]])
-    >>> poly = PolynomialFeatures(interaction_only=True)
-    >>> poly.fit_transform(X)
-    array([[ 1.,  0.,  1.,  0.],
-           [ 1.,  2.,  3.,  6.],
-           [ 1.,  4.,  5., 20.]])
-
-    Attributes
-    ----------
-    powers_ : ndarray of shape (n_output_features, n_input_features)
-        powers_[i, j] is the exponent of the jth input in the ith output.
-
-    n_input_features_ : int
-        The total number of input features.
-
-    n_output_features_ : int
-        The total number of polynomial output features. The number of output
-        features is computed by iterating over all suitably sized combinations
-        of input features.
-
-    See Also
-    --------
-    SplineTransformer : Transformer that generates univariate B-spline bases
-        for features
-
-    Notes
-    -----
-    Be aware that the number of features in the output array scales
-    polynomially in the number of features of the input array, and
-    exponentially in the degree. High degrees can cause overfitting.
-
-    See :ref:`examples/linear_model/plot_polynomial_interpolation.py
-    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`
-    """
-    @_deprecate_positional_args
-    def __init__(self, degree=2, *, interaction_only=False, include_bias=True,
-                 order='C'):
-        self.degree = degree
-        self.interaction_only = interaction_only
-        self.include_bias = include_bias
-        self.order = order
-
-    @staticmethod
-    def _combinations(n_features, degree, interaction_only, include_bias):
-        comb = (combinations if interaction_only else combinations_w_r)
-        start = int(not include_bias)
-        return chain.from_iterable(comb(range(n_features), i)
-                                   for i in range(start, degree + 1))
-
-    @property
-    def powers_(self):
-        check_is_fitted(self)
-
-        combinations = self._combinations(self.n_input_features_, self.degree,
-                                          self.interaction_only,
-                                          self.include_bias)
-        return np.vstack([np.bincount(c, minlength=self.n_input_features_)
-                          for c in combinations])
-
-    def get_feature_names(self, input_features=None):
-        """
-        Return feature names for output features
-
-        Parameters
-        ----------
-        input_features : list of str of shape (n_features,), default=None
-            String names for input features if available. By default,
-            "x0", "x1", ... "xn_features" is used.
-
-        Returns
-        -------
-        output_feature_names : list of str of shape (n_output_features,)
-        """
-        powers = self.powers_
-        if input_features is None:
-            input_features = ['x%d' % i for i in range(powers.shape[1])]
-        feature_names = []
-        for row in powers:
-            inds = np.where(row)[0]
-            if len(inds):
-                name = " ".join("%s^%d" % (input_features[ind], exp)
-                                if exp != 1 else input_features[ind]
-                                for ind, exp in zip(inds, row[inds]))
-            else:
-                name = "1"
-            feature_names.append(name)
-        return feature_names
-
-    def fit(self, X, y=None):
-        """
-        Compute number of output features.
-
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            The data.
-
-        y : None
-            Ignored.
-
-        Returns
-        -------
-        self : object
-            Fitted transformer.
-        """
-        n_samples, n_features = self._validate_data(
-            X, accept_sparse=True).shape
-        combinations = self._combinations(n_features, self.degree,
-                                          self.interaction_only,
-                                          self.include_bias)
-        self.n_input_features_ = n_features
-        self.n_output_features_ = sum(1 for _ in combinations)
-        return self
-
-    def transform(self, X):
-        """Transform data to polynomial features.
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            The data to transform, row by row.
-
-            Prefer CSR over CSC for sparse input (for speed), but CSC is
-            required if the degree is 4 or higher. If the degree is less than
-            4 and the input format is CSC, it will be converted to CSR, have
-            its polynomial features generated, then converted back to CSC.
-
-            If the degree is 2 or 3, the method described in "Leveraging
-            Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices
-            Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is
-            used, which is much faster than the method used on CSC input. For
-            this reason, a CSC input will be converted to CSR, and the output
-            will be converted back to CSC prior to being returned, hence the
-            preference of CSR.
-
-        Returns
-        -------
-        XP : {ndarray, sparse matrix} of shape (n_samples, NP)
-            The matrix of features, where NP is the number of polynomial
-            features generated from the combination of inputs. If a sparse
-            matrix is provided, it will be converted into a sparse
-            ``csr_matrix``.
-        """
-        check_is_fitted(self)
-
-        X = self._validate_data(X, order='F', dtype=FLOAT_DTYPES, reset=False,
-                                accept_sparse=('csr', 'csc'))
-
-        n_samples, n_features = X.shape
-
-        if n_features != self.n_input_features_:
-            raise ValueError("X shape does not match training shape")
-
-        if sparse.isspmatrix_csr(X):
-            if self.degree > 3:
-                return self.transform(X.tocsc()).tocsr()
-            to_stack = []
-            if self.include_bias:
-                to_stack.append(np.ones(shape=(n_samples, 1), dtype=X.dtype))
-            to_stack.append(X)
-            for deg in range(2, self.degree+1):
-                Xp_next = _csr_polynomial_expansion(X.data, X.indices,
-                                                    X.indptr, X.shape[1],
-                                                    self.interaction_only,
-                                                    deg)
-                if Xp_next is None:
-                    break
-                to_stack.append(Xp_next)
-            XP = sparse.hstack(to_stack, format='csr')
-        elif sparse.isspmatrix_csc(X) and self.degree < 4:
-            return self.transform(X.tocsr()).tocsc()
-        else:
-            if sparse.isspmatrix(X):
-                combinations = self._combinations(n_features, self.degree,
-                                                  self.interaction_only,
-                                                  self.include_bias)
-                columns = []
-                for comb in combinations:
-                    if comb:
-                        out_col = 1
-                        for col_idx in comb:
-                            out_col = X[:, col_idx].multiply(out_col)
-                        columns.append(out_col)
-                    else:
-                        bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))
-                        columns.append(bias)
-                XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
-            else:
-                XP = np.empty((n_samples, self.n_output_features_),
-                              dtype=X.dtype, order=self.order)
-
-                # What follows is a faster implementation of:
-                # for i, comb in enumerate(combinations):
-                #     XP[:, i] = X[:, comb].prod(1)
-                # This implementation uses two optimisations.
-                # First one is broadcasting,
-                # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]
-                # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]
-                # ...
-                # multiply ([X[:, start:end], X[:, start]) -> ...
-                # Second optimisation happens for degrees >= 3.
-                # Xi^3 is computed reusing previous computation:
-                # Xi^3 = Xi^2 * Xi.
-
-                if self.include_bias:
-                    XP[:, 0] = 1
-                    current_col = 1
-                else:
-                    current_col = 0
-
-                # d = 0
-                XP[:, current_col:current_col + n_features] = X
-                index = list(range(current_col,
-                                   current_col + n_features))
-                current_col += n_features
-                index.append(current_col)
-
-                # d >= 1
-                for _ in range(1, self.degree):
-                    new_index = []
-                    end = index[-1]
-                    for feature_idx in range(n_features):
-                        start = index[feature_idx]
-                        new_index.append(current_col)
-                        if self.interaction_only:
-                            start += (index[feature_idx + 1] -
-                                      index[feature_idx])
-                        next_col = current_col + end - start
-                        if next_col <= current_col:
-                            break
-                        # XP[:, start:end] are terms of degree d - 1
-                        # that exclude feature #feature_idx.
-                        np.multiply(XP[:, start:end],
-                                    X[:, feature_idx:feature_idx + 1],
-                                    out=XP[:, current_col:next_col],
-                                    casting='no')
-                        current_col = next_col
-
-                    new_index.append(current_col)
-                    index = new_index
-
-        return XP
-
-
 @_deprecate_positional_args
 def normalize(X, norm='l2', *, axis=1, copy=True, return_norm=False):
     """Scale input vectors individually to unit norm (vector length).
diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 26587e7f05823..73a0af963ad33 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -2,14 +2,19 @@
 This file contains preprocessing tools based on polynomials.
 """
 import numbers
+from itertools import chain, combinations
+from itertools import combinations_with_replacement as combinations_w_r
 
 import numpy as np
+from scipy import sparse
 from scipy.interpolate import BSpline
 
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import check_array
 from ..utils.fixes import linspace
-from ..utils.validation import check_is_fitted, FLOAT_DTYPES
+from ..utils.validation import (check_is_fitted, FLOAT_DTYPES,
+                                _deprecate_positional_args)
+from ._csr_polynomial_expansion import _csr_polynomial_expansion
 
 
 __all__ = [
@@ -427,3 +432,290 @@ def transform(self, X):
                 j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0
             ]
             return XBS[:, indices]
+
+
+class PolynomialFeatures(TransformerMixin, BaseEstimator):
+    """Generate polynomial and interaction features.
+
+    Generate a new feature matrix consisting of all polynomial combinations
+    of the features with degree less than or equal to the specified degree.
+    For example, if an input sample is two dimensional and of the form
+    [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].
+
+    Parameters
+    ----------
+    degree : int, default=2
+        The degree of the polynomial features.
+
+    interaction_only : bool, default=False
+        If true, only interaction features are produced: features that are
+        products of at most ``degree`` *distinct* input features (so not
+        ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).
+
+    include_bias : bool, default=True
+        If True (default), then include a bias column, the feature in which
+        all polynomial powers are zero (i.e. a column of ones - acts as an
+        intercept term in a linear model).
+
+    order : {'C', 'F'}, default='C'
+        Order of output array in the dense case. 'F' order is faster to
+        compute, but may slow down subsequent estimators.
+
+        .. versionadded:: 0.21
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.preprocessing import PolynomialFeatures
+    >>> X = np.arange(6).reshape(3, 2)
+    >>> X
+    array([[0, 1],
+           [2, 3],
+           [4, 5]])
+    >>> poly = PolynomialFeatures(2)
+    >>> poly.fit_transform(X)
+    array([[ 1.,  0.,  1.,  0.,  0.,  1.],
+           [ 1.,  2.,  3.,  4.,  6.,  9.],
+           [ 1.,  4.,  5., 16., 20., 25.]])
+    >>> poly = PolynomialFeatures(interaction_only=True)
+    >>> poly.fit_transform(X)
+    array([[ 1.,  0.,  1.,  0.],
+           [ 1.,  2.,  3.,  6.],
+           [ 1.,  4.,  5., 20.]])
+
+    Attributes
+    ----------
+    powers_ : ndarray of shape (n_output_features, n_input_features)
+        powers_[i, j] is the exponent of the jth input in the ith output.
+
+    n_input_features_ : int
+        The total number of input features.
+
+    n_output_features_ : int
+        The total number of polynomial output features. The number of output
+        features is computed by iterating over all suitably sized combinations
+        of input features.
+
+    See Also
+    --------
+    SplineTransformer : Transformer that generates univariate B-spline bases
+        for features
+
+    Notes
+    -----
+    Be aware that the number of features in the output array scales
+    polynomially in the number of features of the input array, and
+    exponentially in the degree. High degrees can cause overfitting.
+
+    See :ref:`examples/linear_model/plot_polynomial_interpolation.py
+    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`
+    """
+    @_deprecate_positional_args
+    def __init__(self, degree=2, *, interaction_only=False, include_bias=True,
+                 order='C'):
+        self.degree = degree
+        self.interaction_only = interaction_only
+        self.include_bias = include_bias
+        self.order = order
+
+    @staticmethod
+    def _combinations(n_features, degree, interaction_only, include_bias):
+        comb = (combinations if interaction_only else combinations_w_r)
+        start = int(not include_bias)
+        return chain.from_iterable(comb(range(n_features), i)
+                                   for i in range(start, degree + 1))
+
+    @property
+    def powers_(self):
+        check_is_fitted(self)
+
+        combinations = self._combinations(self.n_input_features_, self.degree,
+                                          self.interaction_only,
+                                          self.include_bias)
+        return np.vstack([np.bincount(c, minlength=self.n_input_features_)
+                          for c in combinations])
+
+    def get_feature_names(self, input_features=None):
+        """
+        Return feature names for output features
+
+        Parameters
+        ----------
+        input_features : list of str of shape (n_features,), default=None
+            String names for input features if available. By default,
+            "x0", "x1", ... "xn_features" is used.
+
+        Returns
+        -------
+        output_feature_names : list of str of shape (n_output_features,)
+        """
+        powers = self.powers_
+        if input_features is None:
+            input_features = ['x%d' % i for i in range(powers.shape[1])]
+        feature_names = []
+        for row in powers:
+            inds = np.where(row)[0]
+            if len(inds):
+                name = " ".join("%s^%d" % (input_features[ind], exp)
+                                if exp != 1 else input_features[ind]
+                                for ind, exp in zip(inds, row[inds]))
+            else:
+                name = "1"
+            feature_names.append(name)
+        return feature_names
+
+    def fit(self, X, y=None):
+        """
+        Compute number of output features.
+
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The data.
+
+        y : None
+            Ignored.
+
+        Returns
+        -------
+        self : object
+            Fitted transformer.
+        """
+        n_samples, n_features = self._validate_data(
+            X, accept_sparse=True).shape
+        combinations = self._combinations(n_features, self.degree,
+                                          self.interaction_only,
+                                          self.include_bias)
+        self.n_input_features_ = n_features
+        self.n_output_features_ = sum(1 for _ in combinations)
+        return self
+
+    def transform(self, X):
+        """Transform data to polynomial features.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The data to transform, row by row.
+
+            Prefer CSR over CSC for sparse input (for speed), but CSC is
+            required if the degree is 4 or higher. If the degree is less than
+            4 and the input format is CSC, it will be converted to CSR, have
+            its polynomial features generated, then converted back to CSC.
+
+            If the degree is 2 or 3, the method described in "Leveraging
+            Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices
+            Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is
+            used, which is much faster than the method used on CSC input. For
+            this reason, a CSC input will be converted to CSR, and the output
+            will be converted back to CSC prior to being returned, hence the
+            preference of CSR.
+
+        Returns
+        -------
+        XP : {ndarray, sparse matrix} of shape (n_samples, NP)
+            The matrix of features, where NP is the number of polynomial
+            features generated from the combination of inputs. If a sparse
+            matrix is provided, it will be converted into a sparse
+            ``csr_matrix``.
+        """
+        check_is_fitted(self)
+
+        X = self._validate_data(X, order='F', dtype=FLOAT_DTYPES, reset=False,
+                                accept_sparse=('csr', 'csc'))
+
+        n_samples, n_features = X.shape
+
+        if n_features != self.n_input_features_:
+            raise ValueError("X shape does not match training shape")
+
+        if sparse.isspmatrix_csr(X):
+            if self.degree > 3:
+                return self.transform(X.tocsc()).tocsr()
+            to_stack = []
+            if self.include_bias:
+                to_stack.append(np.ones(shape=(n_samples, 1), dtype=X.dtype))
+            to_stack.append(X)
+            for deg in range(2, self.degree+1):
+                Xp_next = _csr_polynomial_expansion(X.data, X.indices,
+                                                    X.indptr, X.shape[1],
+                                                    self.interaction_only,
+                                                    deg)
+                if Xp_next is None:
+                    break
+                to_stack.append(Xp_next)
+            XP = sparse.hstack(to_stack, format='csr')
+        elif sparse.isspmatrix_csc(X) and self.degree < 4:
+            return self.transform(X.tocsr()).tocsc()
+        else:
+            if sparse.isspmatrix(X):
+                combinations = self._combinations(n_features, self.degree,
+                                                  self.interaction_only,
+                                                  self.include_bias)
+                columns = []
+                for comb in combinations:
+                    if comb:
+                        out_col = 1
+                        for col_idx in comb:
+                            out_col = X[:, col_idx].multiply(out_col)
+                        columns.append(out_col)
+                    else:
+                        bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))
+                        columns.append(bias)
+                XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
+            else:
+                XP = np.empty((n_samples, self.n_output_features_),
+                              dtype=X.dtype, order=self.order)
+
+                # What follows is a faster implementation of:
+                # for i, comb in enumerate(combinations):
+                #     XP[:, i] = X[:, comb].prod(1)
+                # This implementation uses two optimisations.
+                # First one is broadcasting,
+                # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]
+                # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]
+                # ...
+                # multiply ([X[:, start:end], X[:, start]) -> ...
+                # Second optimisation happens for degrees >= 3.
+                # Xi^3 is computed reusing previous computation:
+                # Xi^3 = Xi^2 * Xi.
+
+                if self.include_bias:
+                    XP[:, 0] = 1
+                    current_col = 1
+                else:
+                    current_col = 0
+
+                # d = 0
+                XP[:, current_col:current_col + n_features] = X
+                index = list(range(current_col,
+                                   current_col + n_features))
+                current_col += n_features
+                index.append(current_col)
+
+                # d >= 1
+                for _ in range(1, self.degree):
+                    new_index = []
+                    end = index[-1]
+                    for feature_idx in range(n_features):
+                        start = index[feature_idx]
+                        new_index.append(current_col)
+                        if self.interaction_only:
+                            start += (index[feature_idx + 1] -
+                                      index[feature_idx])
+                        next_col = current_col + end - start
+                        if next_col <= current_col:
+                            break
+                        # XP[:, start:end] are terms of degree d - 1
+                        # that exclude feature #feature_idx.
+                        np.multiply(XP[:, start:end],
+                                    X[:, feature_idx:feature_idx + 1],
+                                    out=XP[:, current_col:next_col],
+                                    casting='no')
+                        current_col = next_col
+
+                    new_index.append(current_col)
+                    index = new_index
+
+        return XP
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index fdd88be0ccff4..196060388ddd2 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -10,7 +10,6 @@
 import numpy as np
 import numpy.linalg as la
 from scipy import sparse, stats
-from scipy.sparse import random as sparse_random
 
 import pytest
 
@@ -43,7 +42,6 @@
 from sklearn.preprocessing import RobustScaler
 from sklearn.preprocessing import robust_scale
 from sklearn.preprocessing import add_dummy_feature
-from sklearn.preprocessing import PolynomialFeatures
 from sklearn.preprocessing import PowerTransformer
 from sklearn.preprocessing import power_transform
 from sklearn.preprocessing._data import _handle_zeros_in_scale
@@ -94,203 +92,6 @@ def assert_correct_incr(i, batch_start, batch_stop, n, chunk_size,
                 n_samples_seen)
 
 
-def test_polynomial_features():
-    # Test Polynomial Features
-    X1 = np.arange(6)[:, np.newaxis]
-    P1 = np.hstack([np.ones_like(X1),
-                    X1, X1 ** 2, X1 ** 3])
-    deg1 = 3
-
-    X2 = np.arange(6).reshape((3, 2))
-    x1 = X2[:, :1]
-    x2 = X2[:, 1:]
-    P2 = np.hstack([x1 ** 0 * x2 ** 0,
-                    x1 ** 1 * x2 ** 0,
-                    x1 ** 0 * x2 ** 1,
-                    x1 ** 2 * x2 ** 0,
-                    x1 ** 1 * x2 ** 1,
-                    x1 ** 0 * x2 ** 2])
-    deg2 = 2
-
-    for (deg, X, P) in [(deg1, X1, P1), (deg2, X2, P2)]:
-        P_test = PolynomialFeatures(deg, include_bias=True).fit_transform(X)
-        assert_array_almost_equal(P_test, P)
-
-        P_test = PolynomialFeatures(deg, include_bias=False).fit_transform(X)
-        assert_array_almost_equal(P_test, P[:, 1:])
-
-    interact = PolynomialFeatures(2, interaction_only=True, include_bias=True)
-    X_poly = interact.fit_transform(X)
-    assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]])
-
-    assert interact.powers_.shape == (interact.n_output_features_,
-                                      interact.n_input_features_)
-
-
-def test_polynomial_feature_names():
-    X = np.arange(30).reshape(10, 3)
-    poly = PolynomialFeatures(degree=2, include_bias=True).fit(X)
-    feature_names = poly.get_feature_names()
-    assert_array_equal(['1', 'x0', 'x1', 'x2', 'x0^2', 'x0 x1',
-                        'x0 x2', 'x1^2', 'x1 x2', 'x2^2'],
-                       feature_names)
-
-    poly = PolynomialFeatures(degree=3, include_bias=False).fit(X)
-    feature_names = poly.get_feature_names(["a", "b", "c"])
-    assert_array_equal(['a', 'b', 'c', 'a^2', 'a b', 'a c', 'b^2',
-                        'b c', 'c^2', 'a^3', 'a^2 b', 'a^2 c',
-                        'a b^2', 'a b c', 'a c^2', 'b^3', 'b^2 c',
-                        'b c^2', 'c^3'], feature_names)
-    # test some unicode
-    poly = PolynomialFeatures(degree=1, include_bias=True).fit(X)
-    feature_names = poly.get_feature_names(
-        ["\u0001F40D", "\u262E", "\u05D0"])
-    assert_array_equal(["1", "\u0001F40D", "\u262E", "\u05D0"],
-                       feature_names)
-
-
-def test_polynomial_feature_array_order():
-    """Test that output array has the given order."""
-    X = np.arange(10).reshape(5, 2)
-
-    def is_c_contiguous(a):
-        return np.isfortran(a.T)
-
-    assert is_c_contiguous(PolynomialFeatures().fit_transform(X))
-    assert is_c_contiguous(PolynomialFeatures(order='C').fit_transform(X))
-    assert np.isfortran(PolynomialFeatures(order='F').fit_transform(X))
-
-
-@pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
-                         [(1, True, False, int),
-                          (2, True, False, int),
-                          (2, True, False, np.float32),
-                          (2, True, False, np.float64),
-                          (3, False, False, np.float64),
-                          (3, False, True, np.float64),
-                          (4, False, False, np.float64),
-                          (4, False, True, np.float64)])
-def test_polynomial_features_csc_X(deg, include_bias, interaction_only, dtype):
-    rng = np.random.RandomState(0)
-    X = rng.randint(0, 2, (100, 2))
-    X_csc = sparse.csc_matrix(X)
-
-    est = PolynomialFeatures(deg, include_bias=include_bias,
-                             interaction_only=interaction_only)
-    Xt_csc = est.fit_transform(X_csc.astype(dtype))
-    Xt_dense = est.fit_transform(X.astype(dtype))
-
-    assert isinstance(Xt_csc, sparse.csc_matrix)
-    assert Xt_csc.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csc.A, Xt_dense)
-
-
-@pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
-                         [(1, True, False, int),
-                          (2, True, False, int),
-                          (2, True, False, np.float32),
-                          (2, True, False, np.float64),
-                          (3, False, False, np.float64),
-                          (3, False, True, np.float64)])
-def test_polynomial_features_csr_X(deg, include_bias, interaction_only, dtype):
-    rng = np.random.RandomState(0)
-    X = rng.randint(0, 2, (100, 2))
-    X_csr = sparse.csr_matrix(X)
-
-    est = PolynomialFeatures(deg, include_bias=include_bias,
-                             interaction_only=interaction_only)
-    Xt_csr = est.fit_transform(X_csr.astype(dtype))
-    Xt_dense = est.fit_transform(X.astype(dtype, copy=False))
-
-    assert isinstance(Xt_csr, sparse.csr_matrix)
-    assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
-
-
-@pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
-                         [(2, True, False, np.float32),
-                          (2, True, False, np.float64),
-                          (3, False, False, np.float64),
-                          (3, False, True, np.float64)])
-def test_polynomial_features_csr_X_floats(deg, include_bias,
-                                          interaction_only, dtype):
-    X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr()
-    X = X_csr.toarray()
-
-    est = PolynomialFeatures(deg, include_bias=include_bias,
-                             interaction_only=interaction_only)
-    Xt_csr = est.fit_transform(X_csr.astype(dtype))
-    Xt_dense = est.fit_transform(X.astype(dtype))
-
-    assert isinstance(Xt_csr, sparse.csr_matrix)
-    assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
-
-
-@pytest.mark.parametrize(['zero_row_index', 'deg', 'interaction_only'],
-                         [(0, 2, True), (1, 2, True), (2, 2, True),
-                          (0, 3, True), (1, 3, True), (2, 3, True),
-                          (0, 2, False), (1, 2, False), (2, 2, False),
-                          (0, 3, False), (1, 3, False), (2, 3, False)])
-def test_polynomial_features_csr_X_zero_row(zero_row_index, deg,
-                                            interaction_only):
-    X_csr = sparse_random(3, 10, 1.0, random_state=0).tocsr()
-    X_csr[zero_row_index, :] = 0.0
-    X = X_csr.toarray()
-
-    est = PolynomialFeatures(deg, include_bias=False,
-                             interaction_only=interaction_only)
-    Xt_csr = est.fit_transform(X_csr)
-    Xt_dense = est.fit_transform(X)
-
-    assert isinstance(Xt_csr, sparse.csr_matrix)
-    assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
-
-
-# This degree should always be one more than the highest degree supported by
-# _csr_expansion.
-@pytest.mark.parametrize(['include_bias', 'interaction_only'],
-                         [(True, True), (True, False),
-                          (False, True), (False, False)])
-def test_polynomial_features_csr_X_degree_4(include_bias, interaction_only):
-    X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr()
-    X = X_csr.toarray()
-
-    est = PolynomialFeatures(4, include_bias=include_bias,
-                             interaction_only=interaction_only)
-    Xt_csr = est.fit_transform(X_csr)
-    Xt_dense = est.fit_transform(X)
-
-    assert isinstance(Xt_csr, sparse.csr_matrix)
-    assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
-
-
-@pytest.mark.parametrize(['deg', 'dim', 'interaction_only'],
-                         [(2, 1, True),
-                          (2, 2, True),
-                          (3, 1, True),
-                          (3, 2, True),
-                          (3, 3, True),
-                          (2, 1, False),
-                          (2, 2, False),
-                          (3, 1, False),
-                          (3, 2, False),
-                          (3, 3, False)])
-def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only):
-    X_csr = sparse_random(1000, dim, 0.5, random_state=0).tocsr()
-    X = X_csr.toarray()
-
-    est = PolynomialFeatures(deg, interaction_only=interaction_only)
-    Xt_csr = est.fit_transform(X_csr)
-    Xt_dense = est.fit_transform(X)
-
-    assert isinstance(Xt_csr, sparse.csr_matrix)
-    assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
-
-
 def test_raises_value_error_if_sample_weights_greater_than_1d():
     # Sample weights must be either scalar or 1D
 
diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 2ca3260f7c05e..53d4c74005f07 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1,10 +1,15 @@
 import numpy as np
 from numpy.testing import assert_allclose, assert_array_equal
 import pytest
+from scipy import sparse
+from scipy.sparse import random as sparse_random
+
+from sklearn.utils._testing import assert_array_almost_equal
 
 from sklearn.linear_model import LinearRegression
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import KBinsDiscretizer, SplineTransformer
+from sklearn.preprocessing import PolynomialFeatures
 
 
 # TODO: add PolynomialFeatures if it moves to _polynomial.py
@@ -259,3 +264,200 @@ def test_spline_transformer_n_features_out(n_knots, include_bias, degree):
     splt.fit(X)
 
     assert splt.transform(X).shape[1] == splt.n_features_out_
+
+
+def test_polynomial_features():
+    # Test Polynomial Features
+    X1 = np.arange(6)[:, np.newaxis]
+    P1 = np.hstack([np.ones_like(X1),
+                    X1, X1 ** 2, X1 ** 3])
+    deg1 = 3
+
+    X2 = np.arange(6).reshape((3, 2))
+    x1 = X2[:, :1]
+    x2 = X2[:, 1:]
+    P2 = np.hstack([x1 ** 0 * x2 ** 0,
+                    x1 ** 1 * x2 ** 0,
+                    x1 ** 0 * x2 ** 1,
+                    x1 ** 2 * x2 ** 0,
+                    x1 ** 1 * x2 ** 1,
+                    x1 ** 0 * x2 ** 2])
+    deg2 = 2
+
+    for (deg, X, P) in [(deg1, X1, P1), (deg2, X2, P2)]:
+        P_test = PolynomialFeatures(deg, include_bias=True).fit_transform(X)
+        assert_array_almost_equal(P_test, P)
+
+        P_test = PolynomialFeatures(deg, include_bias=False).fit_transform(X)
+        assert_array_almost_equal(P_test, P[:, 1:])
+
+    interact = PolynomialFeatures(2, interaction_only=True, include_bias=True)
+    X_poly = interact.fit_transform(X)
+    assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]])
+
+    assert interact.powers_.shape == (interact.n_output_features_,
+                                      interact.n_input_features_)
+
+
+def test_polynomial_feature_names():
+    X = np.arange(30).reshape(10, 3)
+    poly = PolynomialFeatures(degree=2, include_bias=True).fit(X)
+    feature_names = poly.get_feature_names()
+    assert_array_equal(['1', 'x0', 'x1', 'x2', 'x0^2', 'x0 x1',
+                        'x0 x2', 'x1^2', 'x1 x2', 'x2^2'],
+                       feature_names)
+
+    poly = PolynomialFeatures(degree=3, include_bias=False).fit(X)
+    feature_names = poly.get_feature_names(["a", "b", "c"])
+    assert_array_equal(['a', 'b', 'c', 'a^2', 'a b', 'a c', 'b^2',
+                        'b c', 'c^2', 'a^3', 'a^2 b', 'a^2 c',
+                        'a b^2', 'a b c', 'a c^2', 'b^3', 'b^2 c',
+                        'b c^2', 'c^3'], feature_names)
+    # test some unicode
+    poly = PolynomialFeatures(degree=1, include_bias=True).fit(X)
+    feature_names = poly.get_feature_names(
+        ["\u0001F40D", "\u262E", "\u05D0"])
+    assert_array_equal(["1", "\u0001F40D", "\u262E", "\u05D0"],
+                       feature_names)
+
+
+def test_polynomial_feature_array_order():
+    """Test that output array has the given order."""
+    X = np.arange(10).reshape(5, 2)
+
+    def is_c_contiguous(a):
+        return np.isfortran(a.T)
+
+    assert is_c_contiguous(PolynomialFeatures().fit_transform(X))
+    assert is_c_contiguous(PolynomialFeatures(order='C').fit_transform(X))
+    assert np.isfortran(PolynomialFeatures(order='F').fit_transform(X))
+
+
+@pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
+                         [(1, True, False, int),
+                          (2, True, False, int),
+                          (2, True, False, np.float32),
+                          (2, True, False, np.float64),
+                          (3, False, False, np.float64),
+                          (3, False, True, np.float64),
+                          (4, False, False, np.float64),
+                          (4, False, True, np.float64)])
+def test_polynomial_features_csc_X(deg, include_bias, interaction_only, dtype):
+    rng = np.random.RandomState(0)
+    X = rng.randint(0, 2, (100, 2))
+    X_csc = sparse.csc_matrix(X)
+
+    est = PolynomialFeatures(deg, include_bias=include_bias,
+                             interaction_only=interaction_only)
+    Xt_csc = est.fit_transform(X_csc.astype(dtype))
+    Xt_dense = est.fit_transform(X.astype(dtype))
+
+    assert isinstance(Xt_csc, sparse.csc_matrix)
+    assert Xt_csc.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csc.A, Xt_dense)
+
+
+@pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
+                         [(1, True, False, int),
+                          (2, True, False, int),
+                          (2, True, False, np.float32),
+                          (2, True, False, np.float64),
+                          (3, False, False, np.float64),
+                          (3, False, True, np.float64)])
+def test_polynomial_features_csr_X(deg, include_bias, interaction_only, dtype):
+    rng = np.random.RandomState(0)
+    X = rng.randint(0, 2, (100, 2))
+    X_csr = sparse.csr_matrix(X)
+
+    est = PolynomialFeatures(deg, include_bias=include_bias,
+                             interaction_only=interaction_only)
+    Xt_csr = est.fit_transform(X_csr.astype(dtype))
+    Xt_dense = est.fit_transform(X.astype(dtype, copy=False))
+
+    assert isinstance(Xt_csr, sparse.csr_matrix)
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.A, Xt_dense)
+
+
+@pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
+                         [(2, True, False, np.float32),
+                          (2, True, False, np.float64),
+                          (3, False, False, np.float64),
+                          (3, False, True, np.float64)])
+def test_polynomial_features_csr_X_floats(deg, include_bias,
+                                          interaction_only, dtype):
+    X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr()
+    X = X_csr.toarray()
+
+    est = PolynomialFeatures(deg, include_bias=include_bias,
+                             interaction_only=interaction_only)
+    Xt_csr = est.fit_transform(X_csr.astype(dtype))
+    Xt_dense = est.fit_transform(X.astype(dtype))
+
+    assert isinstance(Xt_csr, sparse.csr_matrix)
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.A, Xt_dense)
+
+
+@pytest.mark.parametrize(['zero_row_index', 'deg', 'interaction_only'],
+                         [(0, 2, True), (1, 2, True), (2, 2, True),
+                          (0, 3, True), (1, 3, True), (2, 3, True),
+                          (0, 2, False), (1, 2, False), (2, 2, False),
+                          (0, 3, False), (1, 3, False), (2, 3, False)])
+def test_polynomial_features_csr_X_zero_row(zero_row_index, deg,
+                                            interaction_only):
+    X_csr = sparse_random(3, 10, 1.0, random_state=0).tocsr()
+    X_csr[zero_row_index, :] = 0.0
+    X = X_csr.toarray()
+
+    est = PolynomialFeatures(deg, include_bias=False,
+                             interaction_only=interaction_only)
+    Xt_csr = est.fit_transform(X_csr)
+    Xt_dense = est.fit_transform(X)
+
+    assert isinstance(Xt_csr, sparse.csr_matrix)
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.A, Xt_dense)
+
+
+# This degree should always be one more than the highest degree supported by
+# _csr_expansion.
+@pytest.mark.parametrize(['include_bias', 'interaction_only'],
+                         [(True, True), (True, False),
+                          (False, True), (False, False)])
+def test_polynomial_features_csr_X_degree_4(include_bias, interaction_only):
+    X_csr = sparse_random(1000, 10, 0.5, random_state=0).tocsr()
+    X = X_csr.toarray()
+
+    est = PolynomialFeatures(4, include_bias=include_bias,
+                             interaction_only=interaction_only)
+    Xt_csr = est.fit_transform(X_csr)
+    Xt_dense = est.fit_transform(X)
+
+    assert isinstance(Xt_csr, sparse.csr_matrix)
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.A, Xt_dense)
+
+
+@pytest.mark.parametrize(['deg', 'dim', 'interaction_only'],
+                         [(2, 1, True),
+                          (2, 2, True),
+                          (3, 1, True),
+                          (3, 2, True),
+                          (3, 3, True),
+                          (2, 1, False),
+                          (2, 2, False),
+                          (3, 1, False),
+                          (3, 2, False),
+                          (3, 3, False)])
+def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only):
+    X_csr = sparse_random(1000, dim, 0.5, random_state=0).tocsr()
+    X = X_csr.toarray()
+
+    est = PolynomialFeatures(deg, interaction_only=interaction_only)
+    Xt_csr = est.fit_transform(X_csr)
+    Xt_dense = est.fit_transform(X)
+
+    assert isinstance(Xt_csr, sparse.csr_matrix)
+    assert Xt_csr.dtype == Xt_dense.dtype
+    assert_array_almost_equal(Xt_csr.A, Xt_dense)

From 300a3fd6b587194cd96bb5ce9eadb5c815ceb31f Mon Sep 17 00:00:00 2001
From: avigupta2612 <avigupta2612@gmail.com>
Date: Thu, 4 Mar 2021 05:21:01 +0530
Subject: [PATCH 2/6] lint fix

---
 sklearn/preprocessing/_data.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index e45f6e869f45a..5e85b932a1e39 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -29,7 +29,6 @@
 from ..utils.validation import (check_is_fitted, check_random_state,
                                 _check_sample_weight,
                                 FLOAT_DTYPES, _deprecate_positional_args)
-from ._csr_polynomial_expansion import _csr_polynomial_expansion
 
 from ._encoders import OneHotEncoder
 

From 070e2fd71c314a27a6e83a6d409b183eb79a7734 Mon Sep 17 00:00:00 2001
From: avigupta2612 <avigupta2612@gmail.com>
Date: Sat, 13 Mar 2021 02:38:47 +0530
Subject: [PATCH 3/6] order changes

---
 sklearn/preprocessing/__init__.py             |    2 +-
 sklearn/preprocessing/_polynomial.py          | 1070 ++++++++---------
 .../preprocessing/tests/test_polynomial.py    |    6 +-
 3 files changed, 539 insertions(+), 539 deletions(-)

diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py
index a24346dc8d045..6653088ba85a7 100644
--- a/sklearn/preprocessing/__init__.py
+++ b/sklearn/preprocessing/__init__.py
@@ -34,8 +34,8 @@
 
 from ._discretization import KBinsDiscretizer
 
-from ._polynomial import SplineTransformer
 from ._polynomial import PolynomialFeatures
+from ._polynomial import SplineTransformer
 
 
 __all__ = [
diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 73a0af963ad33..1cf2135903a62 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -22,161 +22,110 @@
 ]
 
 
-# TODO:
-# - sparse support (either scipy or own cython solution)?
-# - extrapolation (cyclic)
-class SplineTransformer(TransformerMixin, BaseEstimator):
-    """Generate univariate B-spline bases for features.
-
-    Generate a new feature matrix consisting of
-    `n_splines=n_knots + degree - 1` spline basis functions (B-splines) of
-    polynomial order=`degree` for each feature.
-
-    Read more in the :ref:`User Guide <spline_transformer>`.
+class PolynomialFeatures(TransformerMixin, BaseEstimator):
+    """Generate polynomial and interaction features.
 
-    .. versionadded:: 1.0
+    Generate a new feature matrix consisting of all polynomial combinations
+    of the features with degree less than or equal to the specified degree.
+    For example, if an input sample is two dimensional and of the form
+    [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].
 
     Parameters
     ----------
-    n_knots : int, default=5
-        Number of knots of the splines if `knots` equals one of
-        {'uniform', 'quantile'}. Must be larger or equal 2.
-
-    degree : int, default=3
-        The polynomial degree of the spline basis. Must be a non-negative
-        integer.
-
-    knots : {'uniform', 'quantile'} or array-like of shape \
-        (n_knots, n_features), default='uniform'
-        Set knot positions such that first knot <= features <= last knot.
-
-        - If 'uniform', `n_knots` number of knots are distributed uniformly
-          from min to max values of the features.
-        - If 'quantile', they are distributed uniformly along the quantiles of
-          the features.
-        - If an array-like is given, it directly specifies the sorted knot
-          positions including the boundary knots. Note that, internally,
-          `degree` number of knots are added before the first knot, the same
-          after the last knot.
+    degree : int, default=2
+        The degree of the polynomial features.
 
-    extrapolation : {'error', 'constant', 'linear', 'continue'}, \
-        default='constant'
-        If 'error', values outside the min and max values of the training
-        features raises a `ValueError`. If 'constant', the value of the
-        splines at minimum and maximum value of the features is used as
-        constant extrapolation. If 'linear', a linear extrapolation is used.
-        If 'continue', the splines are extrapolated as is, i.e. option
-        `extrapolate=True` in :class:`scipy.interpolate.BSpline`.
+    interaction_only : bool, default=False
+        If true, only interaction features are produced: features that are
+        products of at most ``degree`` *distinct* input features (so not
+        ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).
 
     include_bias : bool, default=True
-        If True (default), then the last spline element inside the data range
-        of a feature is dropped. As B-splines sum to one over the spline basis
-        functions for each data point, they implicitly include a bias term,
-        i.e. a column of ones. It acts as an intercept term in a linear models.
+        If True (default), then include a bias column, the feature in which
+        all polynomial powers are zero (i.e. a column of ones - acts as an
+        intercept term in a linear model).
 
     order : {'C', 'F'}, default='C'
-        Order of output array. 'F' order is faster to compute, but may slow
-        down subsequent estimators.
+        Order of output array in the dense case. 'F' order is faster to
+        compute, but may slow down subsequent estimators.
+
+        .. versionadded:: 0.21
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.preprocessing import PolynomialFeatures
+    >>> X = np.arange(6).reshape(3, 2)
+    >>> X
+    array([[0, 1],
+           [2, 3],
+           [4, 5]])
+    >>> poly = PolynomialFeatures(2)
+    >>> poly.fit_transform(X)
+    array([[ 1.,  0.,  1.,  0.,  0.,  1.],
+           [ 1.,  2.,  3.,  4.,  6.,  9.],
+           [ 1.,  4.,  5., 16., 20., 25.]])
+    >>> poly = PolynomialFeatures(interaction_only=True)
+    >>> poly.fit_transform(X)
+    array([[ 1.,  0.,  1.,  0.],
+           [ 1.,  2.,  3.,  6.],
+           [ 1.,  4.,  5., 20.]])
 
     Attributes
     ----------
-    bsplines_ : list of shape (n_features,)
-        List of BSplines objects, one for each feature.
+    powers_ : ndarray of shape (n_output_features, n_input_features)
+        powers_[i, j] is the exponent of the jth input in the ith output.
 
-    n_features_in_ : int
+    n_input_features_ : int
         The total number of input features.
 
-    n_features_out_ : int
-        The total number of output features, which is computed as
-        `n_features * n_splines`, where `n_splines` is
-        the number of bases elements of the B-splines, `n_knots + degree - 1`.
-        If `include_bias=False`, then it is only
-        `n_features * (n_splines - 1)`.
+    n_output_features_ : int
+        The total number of polynomial output features. The number of output
+        features is computed by iterating over all suitably sized combinations
+        of input features.
 
     See Also
     --------
-    KBinsDiscretizer : Transformer that bins continuous data into intervals.
-
-    PolynomialFeatures : Transformer that generates polynomial and interaction
-        features.
+    SplineTransformer : Transformer that generates univariate B-spline bases
+        for features
 
     Notes
     -----
-    High degrees and a high number of knots can cause overfitting.
+    Be aware that the number of features in the output array scales
+    polynomially in the number of features of the input array, and
+    exponentially in the degree. High degrees can cause overfitting.
 
     See :ref:`examples/linear_model/plot_polynomial_interpolation.py
-    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`.
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from sklearn.preprocessing import SplineTransformer
-    >>> X = np.arange(6).reshape(6, 1)
-    >>> spline = SplineTransformer(degree=2, n_knots=3)
-    >>> spline.fit_transform(X)
-    array([[0.5 , 0.5 , 0.  , 0.  ],
-           [0.18, 0.74, 0.08, 0.  ],
-           [0.02, 0.66, 0.32, 0.  ],
-           [0.  , 0.32, 0.66, 0.02],
-           [0.  , 0.08, 0.74, 0.18],
-           [0.  , 0.  , 0.5 , 0.5 ]])
+    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`
     """
-
-    def __init__(
-        self,
-        n_knots=5,
-        degree=3,
-        *,
-        knots="uniform",
-        extrapolation="constant",
-        include_bias=True,
-        order="C",
-    ):
-        self.n_knots = n_knots
+    @_deprecate_positional_args
+    def __init__(self, degree=2, *, interaction_only=False, include_bias=True,
+                 order='C'):
         self.degree = degree
-        self.knots = knots
-        self.extrapolation = extrapolation
+        self.interaction_only = interaction_only
         self.include_bias = include_bias
         self.order = order
 
     @staticmethod
-    def _get_base_knot_positions(X, n_knots=10, knots="uniform"):
-        """Calculate base knot positions.
-
-        Base knots such that first knot <= feature <= last knot. For the
-        B-spline construction with scipy.interpolate.BSpline, 2*degree knots
-        beyond the base interval are added.
+    def _combinations(n_features, degree, interaction_only, include_bias):
+        comb = (combinations if interaction_only else combinations_w_r)
+        start = int(not include_bias)
+        return chain.from_iterable(comb(range(n_features), i)
+                                   for i in range(start, degree + 1))
 
-        Returns
-        -------
-        knots : ndarray of shape (n_knots, n_features), dtype=np.float64
-            Knot positions (points) of base interval.
-        """
-        if knots == "quantile":
-            knots = np.percentile(
-                X,
-                100
-                * np.linspace(start=0, stop=1, num=n_knots, dtype=np.float64),
-                axis=0,
-            )
-        else:
-            # knots == 'uniform':
-            # Note that the variable `knots` has already been validated and
-            # `else` is therefore safe.
-            x_min = np.amin(X, axis=0)
-            x_max = np.amax(X, axis=0)
-            knots = linspace(
-                start=x_min,
-                stop=x_max,
-                num=n_knots,
-                endpoint=True,
-                dtype=np.float64,
-            )
+    @property
+    def powers_(self):
+        check_is_fitted(self)
 
-        return knots
+        combinations = self._combinations(self.n_input_features_, self.degree,
+                                          self.interaction_only,
+                                          self.include_bias)
+        return np.vstack([np.bincount(c, minlength=self.n_input_features_)
+                          for c in combinations])
 
     def get_feature_names(self, input_features=None):
-        """Return feature names for output features.
+        """
+        Return feature names for output features
 
         Parameters
         ----------
@@ -188,21 +137,29 @@ def get_feature_names(self, input_features=None):
         -------
         output_feature_names : list of str of shape (n_output_features,)
         """
-        n_splines = self.bsplines_[0].c.shape[0]
+        powers = self.powers_
         if input_features is None:
-            input_features = ["x%d" % i for i in range(self.n_features_in_)]
+            input_features = ['x%d' % i for i in range(powers.shape[1])]
         feature_names = []
-        for i in range(self.n_features_in_):
-            for j in range(n_splines - 1 + self.include_bias):
-                feature_names.append(f"{input_features[i]}_sp_{j}")
+        for row in powers:
+            inds = np.where(row)[0]
+            if len(inds):
+                name = " ".join("%s^%d" % (input_features[ind], exp)
+                                if exp != 1 else input_features[ind]
+                                for ind, exp in zip(inds, row[inds]))
+            else:
+                name = "1"
+            feature_names.append(name)
         return feature_names
 
     def fit(self, X, y=None):
-        """Compute knot positions of splines.
+        """
+        Compute number of output features.
+
 
         Parameters
         ----------
-        X : array-like of shape (n_samples, n_features)
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
             The data.
 
         y : None
@@ -213,331 +170,300 @@ def fit(self, X, y=None):
         self : object
             Fitted transformer.
         """
-        X = self._validate_data(
-            X,
-            reset=True,
-            accept_sparse=False,
-            ensure_min_samples=2,
-            ensure_2d=True,
-        )
-        n_samples, n_features = X.shape
+        n_samples, n_features = self._validate_data(
+            X, accept_sparse=True).shape
+        combinations = self._combinations(n_features, self.degree,
+                                          self.interaction_only,
+                                          self.include_bias)
+        self.n_input_features_ = n_features
+        self.n_output_features_ = sum(1 for _ in combinations)
+        return self
 
-        if not (
-            isinstance(self.degree, numbers.Integral) and self.degree >= 0
-        ):
-            raise ValueError("degree must be a non-negative integer.")
+    def transform(self, X):
+        """Transform data to polynomial features.
 
-        if not (
-            isinstance(self.n_knots, numbers.Integral) and self.n_knots >= 2
-        ):
-            raise ValueError("n_knots must be a positive integer >= 2.")
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The data to transform, row by row.
 
-        if isinstance(self.knots, str) and self.knots in [
-            "uniform",
-            "quantile",
-        ]:
-            base_knots = self._get_base_knot_positions(
-                X, n_knots=self.n_knots, knots=self.knots
-            )
-        else:
-            base_knots = check_array(self.knots)
-            if base_knots.shape[0] < 2:
-                raise ValueError(
-                    "Number of knots, knots.shape[0], must be >= " "2."
-                )
-            elif base_knots.shape[1] != n_features:
-                raise ValueError("knots.shape[1] == n_features is violated.")
-            elif not np.all(np.diff(base_knots, axis=0) > 0):
-                raise ValueError("knots must be sorted without duplicates.")
-
-        if self.extrapolation not in (
-            "error",
-            "constant",
-            "linear",
-            "continue",
-        ):
-            raise ValueError(
-                "extrapolation must be one of 'error', "
-                "'constant', 'linear' or 'continue'."
-            )
-
-        if not isinstance(self.include_bias, (bool, np.bool_)):
-            raise ValueError("include_bias must be bool.")
-
-        # number of knots for base interval
-        n_knots = base_knots.shape[0]
-        # number of splines basis functions
-        n_splines = n_knots + self.degree - 1
-        degree = self.degree
-        n_out = n_features * n_splines
-        # We have to add degree number of knots below, and degree number knots
-        # above the base knots in order to make the spline basis complete.
-        # Eilers & Marx in "Flexible smoothing with B-splines and  penalties"
-        # https://doi.org/10.1214/ss/1038425655 advice against repeating first
-        # and last knot several times, which would have inferior behaviour at
-        # boundaries if combined with a penalty (hence P-Spline). We follow
-        # this advice even if our splines are unpenalized.
-        # Meaning we do not:
-        # knots = np.r_[np.tile(base_knots.min(axis=0), reps=[degree, 1]),
-        #              base_knots,
-        #              np.tile(base_knots.max(axis=0), reps=[degree, 1])
-        #              ]
-        # Instead, we reuse the distance of the 2 fist/last knots.
-        dist_min = base_knots[1] - base_knots[0]
-        dist_max = base_knots[-1] - base_knots[-2]
-        knots = np.r_[
-            linspace(
-                base_knots[0] - degree * dist_min,
-                base_knots[0] - dist_min,
-                num=degree,
-            ),
-            base_knots,
-            linspace(
-                base_knots[-1] + dist_max,
-                base_knots[-1] + degree * dist_max,
-                num=degree,
-            ),
-        ]
-
-        # With a diagonal coefficient matrix, we get back the spline basis
-        # elements, i.e. the design matrix of the spline.
-        # Note, BSpline appreciates C-contiguous float64 arrays as c=coef.
-        coef = np.eye(n_knots + self.degree - 1, dtype=np.float64)
-        extrapolate = self.extrapolation == "continue"
-        bsplines = [
-            BSpline.construct_fast(
-                knots[:, i], coef, self.degree, extrapolate=extrapolate
-            )
-            for i in range(n_features)
-        ]
-        self.bsplines_ = bsplines
-
-        self.n_features_out_ = n_out - n_features * (1 - self.include_bias)
-        return self
-
-    def transform(self, X):
-        """Transform each feature data to B-splines.
+            Prefer CSR over CSC for sparse input (for speed), but CSC is
+            required if the degree is 4 or higher. If the degree is less than
+            4 and the input format is CSC, it will be converted to CSR, have
+            its polynomial features generated, then converted back to CSC.
 
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            The data to transform.
+            If the degree is 2 or 3, the method described in "Leveraging
+            Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices
+            Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is
+            used, which is much faster than the method used on CSC input. For
+            this reason, a CSC input will be converted to CSR, and the output
+            will be converted back to CSC prior to being returned, hence the
+            preference of CSR.
 
         Returns
         -------
-        XBS : ndarray of shape (n_samples, n_features * n_splines)
-            The matrix of features, where n_splines is the number of bases
-            elements of the B-splines, n_knots + degree - 1.
+        XP : {ndarray, sparse matrix} of shape (n_samples, NP)
+            The matrix of features, where NP is the number of polynomial
+            features generated from the combination of inputs. If a sparse
+            matrix is provided, it will be converted into a sparse
+            ``csr_matrix``.
         """
         check_is_fitted(self)
 
-        X = self._validate_data(
-            X, reset=False, accept_sparse=False, ensure_2d=True
-        )
+        X = self._validate_data(X, order='F', dtype=FLOAT_DTYPES, reset=False,
+                                accept_sparse=('csr', 'csc'))
 
         n_samples, n_features = X.shape
-        n_splines = self.bsplines_[0].c.shape[0]
-        degree = self.degree
 
-        # Note that scipy BSpline returns float64 arrays and converts input
-        # x=X[:, i] to c-contiguous float64.
-        n_out = self.n_features_out_ + n_features * (1 - self.include_bias)
-        if X.dtype in FLOAT_DTYPES:
-            dtype = X.dtype
+        if n_features != self.n_input_features_:
+            raise ValueError("X shape does not match training shape")
+
+        if sparse.isspmatrix_csr(X):
+            if self.degree > 3:
+                return self.transform(X.tocsc()).tocsr()
+            to_stack = []
+            if self.include_bias:
+                to_stack.append(np.ones(shape=(n_samples, 1), dtype=X.dtype))
+            to_stack.append(X)
+            for deg in range(2, self.degree+1):
+                Xp_next = _csr_polynomial_expansion(X.data, X.indices,
+                                                    X.indptr, X.shape[1],
+                                                    self.interaction_only,
+                                                    deg)
+                if Xp_next is None:
+                    break
+                to_stack.append(Xp_next)
+            XP = sparse.hstack(to_stack, format='csr')
+        elif sparse.isspmatrix_csc(X) and self.degree < 4:
+            return self.transform(X.tocsr()).tocsc()
         else:
-            dtype = np.float64
-        XBS = np.zeros((n_samples, n_out), dtype=dtype, order=self.order)
+            if sparse.isspmatrix(X):
+                combinations = self._combinations(n_features, self.degree,
+                                                  self.interaction_only,
+                                                  self.include_bias)
+                columns = []
+                for comb in combinations:
+                    if comb:
+                        out_col = 1
+                        for col_idx in comb:
+                            out_col = X[:, col_idx].multiply(out_col)
+                        columns.append(out_col)
+                    else:
+                        bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))
+                        columns.append(bias)
+                XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
+            else:
+                XP = np.empty((n_samples, self.n_output_features_),
+                              dtype=X.dtype, order=self.order)
 
-        for i in range(n_features):
-            spl = self.bsplines_[i]
+                # What follows is a faster implementation of:
+                # for i, comb in enumerate(combinations):
+                #     XP[:, i] = X[:, comb].prod(1)
+                # This implementation uses two optimisations.
+                # First one is broadcasting,
+                # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]
+                # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]
+                # ...
+                # multiply ([X[:, start:end], X[:, start]) -> ...
+                # Second optimisation happens for degrees >= 3.
+                # Xi^3 is computed reusing previous computation:
+                # Xi^3 = Xi^2 * Xi.
 
-            if self.extrapolation in ("continue", "error"):
-                XBS[:, (i * n_splines):((i + 1) * n_splines)] = spl(X[:, i])
-            else:
-                xmin = spl.t[degree]
-                xmax = spl.t[-degree - 1]
-                mask = (xmin <= X[:, i]) & (X[:, i] <= xmax)
-                XBS[mask, (i * n_splines):((i + 1) * n_splines)] = spl(
-                    X[mask, i]
-                )
+                if self.include_bias:
+                    XP[:, 0] = 1
+                    current_col = 1
+                else:
+                    current_col = 0
 
-            # Note for extrapolation:
-            # 'continue' is already returned as is by scipy BSplines
-            if self.extrapolation == "error":
-                # BSpline with extrapolate=False does not raise an error, but
-                # output np.nan.
-                if np.any(
-                    np.isnan(XBS[:, (i * n_splines):((i + 1) * n_splines)])
-                ):
-                    raise ValueError(
-                        "X contains values beyond the limits of the knots."
-                    )
-            elif self.extrapolation == "constant":
-                # Set all values beyond xmin and xmax to the value of the
-                # spline basis functions at those two positions.
-                # Only the first degree and last degree number of splines
-                # have non-zero values at the boundaries.
+                # d = 0
+                XP[:, current_col:current_col + n_features] = X
+                index = list(range(current_col,
+                                   current_col + n_features))
+                current_col += n_features
+                index.append(current_col)
 
-                # spline values at boundaries
-                f_min = spl(xmin)
-                f_max = spl(xmax)
-                mask = X[:, i] < xmin
-                if np.any(mask):
-                    XBS[
-                        mask, (i * n_splines):(i * n_splines + degree)
-                    ] = f_min[:degree]
+                # d >= 1
+                for _ in range(1, self.degree):
+                    new_index = []
+                    end = index[-1]
+                    for feature_idx in range(n_features):
+                        start = index[feature_idx]
+                        new_index.append(current_col)
+                        if self.interaction_only:
+                            start += (index[feature_idx + 1] -
+                                      index[feature_idx])
+                        next_col = current_col + end - start
+                        if next_col <= current_col:
+                            break
+                        # XP[:, start:end] are terms of degree d - 1
+                        # that exclude feature #feature_idx.
+                        np.multiply(XP[:, start:end],
+                                    X[:, feature_idx:feature_idx + 1],
+                                    out=XP[:, current_col:next_col],
+                                    casting='no')
+                        current_col = next_col
 
-                mask = X[:, i] > xmax
-                if np.any(mask):
-                    XBS[
-                        mask,
-                        ((i + 1) * n_splines - degree):((i + 1) * n_splines),
-                    ] = f_max[-degree:]
-            elif self.extrapolation == "linear":
-                # Continue the degree first and degree last spline bases
-                # linearly beyond the boundaries, with slope = derivative at
-                # the boundary.
-                # Note that all others have derivative = value = 0 at the
-                # boundaries.
+                    new_index.append(current_col)
+                    index = new_index
 
-                # spline values at boundaries
-                f_min, f_max = spl(xmin), spl(xmax)
-                # spline derivatives = slopes at boundaries
-                fp_min, fp_max = spl(xmin, nu=1), spl(xmax, nu=1)
-                # Compute the linear continuation.
-                if degree <= 1:
-                    # For degree=1, the derivative of 2nd spline is not zero at
-                    # boundary. For degree=0 it is the same as 'constant'.
-                    degree += 1
-                for j in range(degree):
-                    mask = X[:, i] < xmin
-                    if np.any(mask):
-                        XBS[mask, i * n_splines + j] = (
-                            f_min[j] + (X[mask, i] - xmin) * fp_min[j]
-                        )
+        return XP
 
-                    mask = X[:, i] > xmax
-                    if np.any(mask):
-                        k = n_splines - 1 - j
-                        XBS[mask, i * n_splines + k] = (
-                            f_max[k] + (X[mask, i] - xmax) * fp_max[k]
-                        )
 
-        if self.include_bias:
-            return XBS
-        else:
-            # We throw away one spline basis per feature.
-            # We chose the last one.
-            indices = [
-                j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0
-            ]
-            return XBS[:, indices]
+# TODO:
+# - sparse support (either scipy or own cython solution)?
+# - extrapolation (cyclic)
+class SplineTransformer(TransformerMixin, BaseEstimator):
+    """Generate univariate B-spline bases for features.
 
+    Generate a new feature matrix consisting of
+    `n_splines=n_knots + degree - 1` spline basis functions (B-splines) of
+    polynomial order=`degree` for each feature.
 
-class PolynomialFeatures(TransformerMixin, BaseEstimator):
-    """Generate polynomial and interaction features.
+    Read more in the :ref:`User Guide <spline_transformer>`.
 
-    Generate a new feature matrix consisting of all polynomial combinations
-    of the features with degree less than or equal to the specified degree.
-    For example, if an input sample is two dimensional and of the form
-    [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].
+    .. versionadded:: 1.0
+
+    Parameters
+    ----------
+    n_knots : int, default=5
+        Number of knots of the splines if `knots` equals one of
+        {'uniform', 'quantile'}. Must be larger or equal 2.
+
+    degree : int, default=3
+        The polynomial degree of the spline basis. Must be a non-negative
+        integer.
+
+    knots : {'uniform', 'quantile'} or array-like of shape \
+        (n_knots, n_features), default='uniform'
+        Set knot positions such that first knot <= features <= last knot.
 
-    Parameters
-    ----------
-    degree : int, default=2
-        The degree of the polynomial features.
+        - If 'uniform', `n_knots` number of knots are distributed uniformly
+          from min to max values of the features.
+        - If 'quantile', they are distributed uniformly along the quantiles of
+          the features.
+        - If an array-like is given, it directly specifies the sorted knot
+          positions including the boundary knots. Note that, internally,
+          `degree` number of knots are added before the first knot, the same
+          after the last knot.
 
-    interaction_only : bool, default=False
-        If true, only interaction features are produced: features that are
-        products of at most ``degree`` *distinct* input features (so not
-        ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).
+    extrapolation : {'error', 'constant', 'linear', 'continue'}, \
+        default='constant'
+        If 'error', values outside the min and max values of the training
+        features raises a `ValueError`. If 'constant', the value of the
+        splines at minimum and maximum value of the features is used as
+        constant extrapolation. If 'linear', a linear extrapolation is used.
+        If 'continue', the splines are extrapolated as is, i.e. option
+        `extrapolate=True` in :class:`scipy.interpolate.BSpline`.
 
     include_bias : bool, default=True
-        If True (default), then include a bias column, the feature in which
-        all polynomial powers are zero (i.e. a column of ones - acts as an
-        intercept term in a linear model).
+        If True (default), then the last spline element inside the data range
+        of a feature is dropped. As B-splines sum to one over the spline basis
+        functions for each data point, they implicitly include a bias term,
+        i.e. a column of ones. It acts as an intercept term in a linear models.
 
     order : {'C', 'F'}, default='C'
-        Order of output array in the dense case. 'F' order is faster to
-        compute, but may slow down subsequent estimators.
-
-        .. versionadded:: 0.21
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from sklearn.preprocessing import PolynomialFeatures
-    >>> X = np.arange(6).reshape(3, 2)
-    >>> X
-    array([[0, 1],
-           [2, 3],
-           [4, 5]])
-    >>> poly = PolynomialFeatures(2)
-    >>> poly.fit_transform(X)
-    array([[ 1.,  0.,  1.,  0.,  0.,  1.],
-           [ 1.,  2.,  3.,  4.,  6.,  9.],
-           [ 1.,  4.,  5., 16., 20., 25.]])
-    >>> poly = PolynomialFeatures(interaction_only=True)
-    >>> poly.fit_transform(X)
-    array([[ 1.,  0.,  1.,  0.],
-           [ 1.,  2.,  3.,  6.],
-           [ 1.,  4.,  5., 20.]])
+        Order of output array. 'F' order is faster to compute, but may slow
+        down subsequent estimators.
 
     Attributes
     ----------
-    powers_ : ndarray of shape (n_output_features, n_input_features)
-        powers_[i, j] is the exponent of the jth input in the ith output.
+    bsplines_ : list of shape (n_features,)
+        List of BSplines objects, one for each feature.
 
-    n_input_features_ : int
+    n_features_in_ : int
         The total number of input features.
 
-    n_output_features_ : int
-        The total number of polynomial output features. The number of output
-        features is computed by iterating over all suitably sized combinations
-        of input features.
+    n_features_out_ : int
+        The total number of output features, which is computed as
+        `n_features * n_splines`, where `n_splines` is
+        the number of bases elements of the B-splines, `n_knots + degree - 1`.
+        If `include_bias=False`, then it is only
+        `n_features * (n_splines - 1)`.
 
     See Also
     --------
-    SplineTransformer : Transformer that generates univariate B-spline bases
-        for features
+    KBinsDiscretizer : Transformer that bins continuous data into intervals.
+
+    PolynomialFeatures : Transformer that generates polynomial and interaction
+        features.
 
     Notes
     -----
-    Be aware that the number of features in the output array scales
-    polynomially in the number of features of the input array, and
-    exponentially in the degree. High degrees can cause overfitting.
+    High degrees and a high number of knots can cause overfitting.
 
     See :ref:`examples/linear_model/plot_polynomial_interpolation.py
-    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`
+    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.preprocessing import SplineTransformer
+    >>> X = np.arange(6).reshape(6, 1)
+    >>> spline = SplineTransformer(degree=2, n_knots=3)
+    >>> spline.fit_transform(X)
+    array([[0.5 , 0.5 , 0.  , 0.  ],
+           [0.18, 0.74, 0.08, 0.  ],
+           [0.02, 0.66, 0.32, 0.  ],
+           [0.  , 0.32, 0.66, 0.02],
+           [0.  , 0.08, 0.74, 0.18],
+           [0.  , 0.  , 0.5 , 0.5 ]])
     """
-    @_deprecate_positional_args
-    def __init__(self, degree=2, *, interaction_only=False, include_bias=True,
-                 order='C'):
+
+    def __init__(
+        self,
+        n_knots=5,
+        degree=3,
+        *,
+        knots="uniform",
+        extrapolation="constant",
+        include_bias=True,
+        order="C",
+    ):
+        self.n_knots = n_knots
         self.degree = degree
-        self.interaction_only = interaction_only
+        self.knots = knots
+        self.extrapolation = extrapolation
         self.include_bias = include_bias
         self.order = order
 
     @staticmethod
-    def _combinations(n_features, degree, interaction_only, include_bias):
-        comb = (combinations if interaction_only else combinations_w_r)
-        start = int(not include_bias)
-        return chain.from_iterable(comb(range(n_features), i)
-                                   for i in range(start, degree + 1))
+    def _get_base_knot_positions(X, n_knots=10, knots="uniform"):
+        """Calculate base knot positions.
 
-    @property
-    def powers_(self):
-        check_is_fitted(self)
+        Base knots such that first knot <= feature <= last knot. For the
+        B-spline construction with scipy.interpolate.BSpline, 2*degree knots
+        beyond the base interval are added.
 
-        combinations = self._combinations(self.n_input_features_, self.degree,
-                                          self.interaction_only,
-                                          self.include_bias)
-        return np.vstack([np.bincount(c, minlength=self.n_input_features_)
-                          for c in combinations])
+        Returns
+        -------
+        knots : ndarray of shape (n_knots, n_features), dtype=np.float64
+            Knot positions (points) of base interval.
+        """
+        if knots == "quantile":
+            knots = np.percentile(
+                X,
+                100
+                * np.linspace(start=0, stop=1, num=n_knots, dtype=np.float64),
+                axis=0,
+            )
+        else:
+            # knots == 'uniform':
+            # Note that the variable `knots` has already been validated and
+            # `else` is therefore safe.
+            x_min = np.amin(X, axis=0)
+            x_max = np.amax(X, axis=0)
+            knots = linspace(
+                start=x_min,
+                stop=x_max,
+                num=n_knots,
+                endpoint=True,
+                dtype=np.float64,
+            )
+
+        return knots
 
     def get_feature_names(self, input_features=None):
-        """
-        Return feature names for output features
+        """Return feature names for output features.
 
         Parameters
         ----------
@@ -549,173 +475,247 @@ def get_feature_names(self, input_features=None):
         -------
         output_feature_names : list of str of shape (n_output_features,)
         """
-        powers = self.powers_
+        n_splines = self.bsplines_[0].c.shape[0]
         if input_features is None:
-            input_features = ['x%d' % i for i in range(powers.shape[1])]
+            input_features = ["x%d" % i for i in range(self.n_features_in_)]
         feature_names = []
-        for row in powers:
-            inds = np.where(row)[0]
-            if len(inds):
-                name = " ".join("%s^%d" % (input_features[ind], exp)
-                                if exp != 1 else input_features[ind]
-                                for ind, exp in zip(inds, row[inds]))
-            else:
-                name = "1"
-            feature_names.append(name)
+        for i in range(self.n_features_in_):
+            for j in range(n_splines - 1 + self.include_bias):
+                feature_names.append(f"{input_features[i]}_sp_{j}")
         return feature_names
 
     def fit(self, X, y=None):
-        """
-        Compute number of output features.
-
+        """Compute knot positions of splines.
 
         Parameters
         ----------
-        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        X : array-like of shape (n_samples, n_features)
             The data.
 
-        y : None
-            Ignored.
+        y : None
+            Ignored.
+
+        Returns
+        -------
+        self : object
+            Fitted transformer.
+        """
+        X = self._validate_data(
+            X,
+            reset=True,
+            accept_sparse=False,
+            ensure_min_samples=2,
+            ensure_2d=True,
+        )
+        n_samples, n_features = X.shape
+
+        if not (
+            isinstance(self.degree, numbers.Integral) and self.degree >= 0
+        ):
+            raise ValueError("degree must be a non-negative integer.")
+
+        if not (
+            isinstance(self.n_knots, numbers.Integral) and self.n_knots >= 2
+        ):
+            raise ValueError("n_knots must be a positive integer >= 2.")
+
+        if isinstance(self.knots, str) and self.knots in [
+            "uniform",
+            "quantile",
+        ]:
+            base_knots = self._get_base_knot_positions(
+                X, n_knots=self.n_knots, knots=self.knots
+            )
+        else:
+            base_knots = check_array(self.knots)
+            if base_knots.shape[0] < 2:
+                raise ValueError(
+                    "Number of knots, knots.shape[0], must be >= " "2."
+                )
+            elif base_knots.shape[1] != n_features:
+                raise ValueError("knots.shape[1] == n_features is violated.")
+            elif not np.all(np.diff(base_knots, axis=0) > 0):
+                raise ValueError("knots must be sorted without duplicates.")
+
+        if self.extrapolation not in (
+            "error",
+            "constant",
+            "linear",
+            "continue",
+        ):
+            raise ValueError(
+                "extrapolation must be one of 'error', "
+                "'constant', 'linear' or 'continue'."
+            )
+
+        if not isinstance(self.include_bias, (bool, np.bool_)):
+            raise ValueError("include_bias must be bool.")
+
+        # number of knots for base interval
+        n_knots = base_knots.shape[0]
+        # number of splines basis functions
+        n_splines = n_knots + self.degree - 1
+        degree = self.degree
+        n_out = n_features * n_splines
+        # We have to add degree number of knots below, and degree number knots
+        # above the base knots in order to make the spline basis complete.
+        # Eilers & Marx in "Flexible smoothing with B-splines and  penalties"
+        # https://doi.org/10.1214/ss/1038425655 advice against repeating first
+        # and last knot several times, which would have inferior behaviour at
+        # boundaries if combined with a penalty (hence P-Spline). We follow
+        # this advice even if our splines are unpenalized.
+        # Meaning we do not:
+        # knots = np.r_[np.tile(base_knots.min(axis=0), reps=[degree, 1]),
+        #              base_knots,
+        #              np.tile(base_knots.max(axis=0), reps=[degree, 1])
+        #              ]
+        # Instead, we reuse the distance of the 2 fist/last knots.
+        dist_min = base_knots[1] - base_knots[0]
+        dist_max = base_knots[-1] - base_knots[-2]
+        knots = np.r_[
+            linspace(
+                base_knots[0] - degree * dist_min,
+                base_knots[0] - dist_min,
+                num=degree,
+            ),
+            base_knots,
+            linspace(
+                base_knots[-1] + dist_max,
+                base_knots[-1] + degree * dist_max,
+                num=degree,
+            ),
+        ]
+
+        # With a diagonal coefficient matrix, we get back the spline basis
+        # elements, i.e. the design matrix of the spline.
+        # Note, BSpline appreciates C-contiguous float64 arrays as c=coef.
+        coef = np.eye(n_knots + self.degree - 1, dtype=np.float64)
+        extrapolate = self.extrapolation == "continue"
+        bsplines = [
+            BSpline.construct_fast(
+                knots[:, i], coef, self.degree, extrapolate=extrapolate
+            )
+            for i in range(n_features)
+        ]
+        self.bsplines_ = bsplines
 
-        Returns
-        -------
-        self : object
-            Fitted transformer.
-        """
-        n_samples, n_features = self._validate_data(
-            X, accept_sparse=True).shape
-        combinations = self._combinations(n_features, self.degree,
-                                          self.interaction_only,
-                                          self.include_bias)
-        self.n_input_features_ = n_features
-        self.n_output_features_ = sum(1 for _ in combinations)
+        self.n_features_out_ = n_out - n_features * (1 - self.include_bias)
         return self
 
     def transform(self, X):
-        """Transform data to polynomial features.
+        """Transform each feature data to B-splines.
 
         Parameters
         ----------
-        X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            The data to transform, row by row.
-
-            Prefer CSR over CSC for sparse input (for speed), but CSC is
-            required if the degree is 4 or higher. If the degree is less than
-            4 and the input format is CSC, it will be converted to CSR, have
-            its polynomial features generated, then converted back to CSC.
-
-            If the degree is 2 or 3, the method described in "Leveraging
-            Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices
-            Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is
-            used, which is much faster than the method used on CSC input. For
-            this reason, a CSC input will be converted to CSR, and the output
-            will be converted back to CSC prior to being returned, hence the
-            preference of CSR.
+        X : array-like of shape (n_samples, n_features)
+            The data to transform.
 
         Returns
         -------
-        XP : {ndarray, sparse matrix} of shape (n_samples, NP)
-            The matrix of features, where NP is the number of polynomial
-            features generated from the combination of inputs. If a sparse
-            matrix is provided, it will be converted into a sparse
-            ``csr_matrix``.
+        XBS : ndarray of shape (n_samples, n_features * n_splines)
+            The matrix of features, where n_splines is the number of bases
+            elements of the B-splines, n_knots + degree - 1.
         """
         check_is_fitted(self)
 
-        X = self._validate_data(X, order='F', dtype=FLOAT_DTYPES, reset=False,
-                                accept_sparse=('csr', 'csc'))
+        X = self._validate_data(
+            X, reset=False, accept_sparse=False, ensure_2d=True
+        )
 
         n_samples, n_features = X.shape
+        n_splines = self.bsplines_[0].c.shape[0]
+        degree = self.degree
 
-        if n_features != self.n_input_features_:
-            raise ValueError("X shape does not match training shape")
-
-        if sparse.isspmatrix_csr(X):
-            if self.degree > 3:
-                return self.transform(X.tocsc()).tocsr()
-            to_stack = []
-            if self.include_bias:
-                to_stack.append(np.ones(shape=(n_samples, 1), dtype=X.dtype))
-            to_stack.append(X)
-            for deg in range(2, self.degree+1):
-                Xp_next = _csr_polynomial_expansion(X.data, X.indices,
-                                                    X.indptr, X.shape[1],
-                                                    self.interaction_only,
-                                                    deg)
-                if Xp_next is None:
-                    break
-                to_stack.append(Xp_next)
-            XP = sparse.hstack(to_stack, format='csr')
-        elif sparse.isspmatrix_csc(X) and self.degree < 4:
-            return self.transform(X.tocsr()).tocsc()
+        # Note that scipy BSpline returns float64 arrays and converts input
+        # x=X[:, i] to c-contiguous float64.
+        n_out = self.n_features_out_ + n_features * (1 - self.include_bias)
+        if X.dtype in FLOAT_DTYPES:
+            dtype = X.dtype
         else:
-            if sparse.isspmatrix(X):
-                combinations = self._combinations(n_features, self.degree,
-                                                  self.interaction_only,
-                                                  self.include_bias)
-                columns = []
-                for comb in combinations:
-                    if comb:
-                        out_col = 1
-                        for col_idx in comb:
-                            out_col = X[:, col_idx].multiply(out_col)
-                        columns.append(out_col)
-                    else:
-                        bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))
-                        columns.append(bias)
-                XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
+            dtype = np.float64
+        XBS = np.zeros((n_samples, n_out), dtype=dtype, order=self.order)
+
+        for i in range(n_features):
+            spl = self.bsplines_[i]
+
+            if self.extrapolation in ("continue", "error"):
+                XBS[:, (i * n_splines):((i + 1) * n_splines)] = spl(X[:, i])
             else:
-                XP = np.empty((n_samples, self.n_output_features_),
-                              dtype=X.dtype, order=self.order)
+                xmin = spl.t[degree]
+                xmax = spl.t[-degree - 1]
+                mask = (xmin <= X[:, i]) & (X[:, i] <= xmax)
+                XBS[mask, (i * n_splines):((i + 1) * n_splines)] = spl(
+                    X[mask, i]
+                )
 
-                # What follows is a faster implementation of:
-                # for i, comb in enumerate(combinations):
-                #     XP[:, i] = X[:, comb].prod(1)
-                # This implementation uses two optimisations.
-                # First one is broadcasting,
-                # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]
-                # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]
-                # ...
-                # multiply ([X[:, start:end], X[:, start]) -> ...
-                # Second optimisation happens for degrees >= 3.
-                # Xi^3 is computed reusing previous computation:
-                # Xi^3 = Xi^2 * Xi.
+            # Note for extrapolation:
+            # 'continue' is already returned as is by scipy BSplines
+            if self.extrapolation == "error":
+                # BSpline with extrapolate=False does not raise an error, but
+                # output np.nan.
+                if np.any(
+                    np.isnan(XBS[:, (i * n_splines):((i + 1) * n_splines)])
+                ):
+                    raise ValueError(
+                        "X contains values beyond the limits of the knots."
+                    )
+            elif self.extrapolation == "constant":
+                # Set all values beyond xmin and xmax to the value of the
+                # spline basis functions at those two positions.
+                # Only the first degree and last degree number of splines
+                # have non-zero values at the boundaries.
 
-                if self.include_bias:
-                    XP[:, 0] = 1
-                    current_col = 1
-                else:
-                    current_col = 0
+                # spline values at boundaries
+                f_min = spl(xmin)
+                f_max = spl(xmax)
+                mask = X[:, i] < xmin
+                if np.any(mask):
+                    XBS[
+                        mask, (i * n_splines):(i * n_splines + degree)
+                    ] = f_min[:degree]
 
-                # d = 0
-                XP[:, current_col:current_col + n_features] = X
-                index = list(range(current_col,
-                                   current_col + n_features))
-                current_col += n_features
-                index.append(current_col)
+                mask = X[:, i] > xmax
+                if np.any(mask):
+                    XBS[
+                        mask,
+                        ((i + 1) * n_splines - degree):((i + 1) * n_splines),
+                    ] = f_max[-degree:]
+            elif self.extrapolation == "linear":
+                # Continue the degree first and degree last spline bases
+                # linearly beyond the boundaries, with slope = derivative at
+                # the boundary.
+                # Note that all others have derivative = value = 0 at the
+                # boundaries.
 
-                # d >= 1
-                for _ in range(1, self.degree):
-                    new_index = []
-                    end = index[-1]
-                    for feature_idx in range(n_features):
-                        start = index[feature_idx]
-                        new_index.append(current_col)
-                        if self.interaction_only:
-                            start += (index[feature_idx + 1] -
-                                      index[feature_idx])
-                        next_col = current_col + end - start
-                        if next_col <= current_col:
-                            break
-                        # XP[:, start:end] are terms of degree d - 1
-                        # that exclude feature #feature_idx.
-                        np.multiply(XP[:, start:end],
-                                    X[:, feature_idx:feature_idx + 1],
-                                    out=XP[:, current_col:next_col],
-                                    casting='no')
-                        current_col = next_col
+                # spline values at boundaries
+                f_min, f_max = spl(xmin), spl(xmax)
+                # spline derivatives = slopes at boundaries
+                fp_min, fp_max = spl(xmin, nu=1), spl(xmax, nu=1)
+                # Compute the linear continuation.
+                if degree <= 1:
+                    # For degree=1, the derivative of 2nd spline is not zero at
+                    # boundary. For degree=0 it is the same as 'constant'.
+                    degree += 1
+                for j in range(degree):
+                    mask = X[:, i] < xmin
+                    if np.any(mask):
+                        XBS[mask, i * n_splines + j] = (
+                            f_min[j] + (X[mask, i] - xmin) * fp_min[j]
+                        )
 
-                    new_index.append(current_col)
-                    index = new_index
+                    mask = X[:, i] > xmax
+                    if np.any(mask):
+                        k = n_splines - 1 - j
+                        XBS[mask, i * n_splines + k] = (
+                            f_max[k] + (X[mask, i] - xmax) * fp_max[k]
+                        )
 
-        return XP
+        if self.include_bias:
+            return XBS
+        else:
+            # We throw away one spline basis per feature.
+            # We chose the last one.
+            indices = [
+                j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0
+            ]
+            return XBS[:, indices]
\ No newline at end of file
diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 53d4c74005f07..af45542bcbad8 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -8,11 +8,11 @@
 
 from sklearn.linear_model import LinearRegression
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import KBinsDiscretizer, SplineTransformer
-from sklearn.preprocessing import PolynomialFeatures
+from sklearn.preprocessing import (
+    KBinsDiscretizer, PolynomialFeatures, SplineTransformer
+)                           
 
 
-# TODO: add PolynomialFeatures if it moves to _polynomial.py
 @pytest.mark.parametrize("est", (SplineTransformer,))
 def test_polynomial_and_spline_array_order(est):
     """Test that output array has the given order."""

From a5b0c7fcf351afdeb49a0ad06bc35ba5970acbf2 Mon Sep 17 00:00:00 2001
From: avigupta2612 <avigupta2612@gmail.com>
Date: Sat, 13 Mar 2021 02:43:55 +0530
Subject: [PATCH 4/6] order changes

---
 sklearn/preprocessing/_polynomial.py           | 2 +-
 sklearn/preprocessing/tests/test_polynomial.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 1cf2135903a62..5e1a0fad4abe0 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -718,4 +718,4 @@ def transform(self, X):
             indices = [
                 j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0
             ]
-            return XBS[:, indices]
\ No newline at end of file
+            return XBS[:, indices]
diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index af45542bcbad8..32edc664b36d5 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -10,7 +10,7 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import (
     KBinsDiscretizer, PolynomialFeatures, SplineTransformer
-)                           
+)
 
 
 @pytest.mark.parametrize("est", (SplineTransformer,))

From 9fc135d7191970800dbe251edabad26b25ed48c0 Mon Sep 17 00:00:00 2001
From: avigupta2612 <avigupta2612@gmail.com>
Date: Tue, 16 Mar 2021 18:35:32 +0530
Subject: [PATCH 5/6] removed
 test_polynomial_test_polynomial_feature_array_order

---
 sklearn/preprocessing/tests/test_polynomial.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 32edc664b36d5..31d97c021bc72 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -13,7 +13,7 @@
 )
 
 
-@pytest.mark.parametrize("est", (SplineTransformer,))
+@pytest.mark.parametrize("est", (PolynomialFeatures, SplineTransformer))
 def test_polynomial_and_spline_array_order(est):
     """Test that output array has the given order."""
     X = np.arange(10).reshape(5, 2)
@@ -321,18 +321,6 @@ def test_polynomial_feature_names():
                        feature_names)
 
 
-def test_polynomial_feature_array_order():
-    """Test that output array has the given order."""
-    X = np.arange(10).reshape(5, 2)
-
-    def is_c_contiguous(a):
-        return np.isfortran(a.T)
-
-    assert is_c_contiguous(PolynomialFeatures().fit_transform(X))
-    assert is_c_contiguous(PolynomialFeatures(order='C').fit_transform(X))
-    assert np.isfortran(PolynomialFeatures(order='F').fit_transform(X))
-
-
 @pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
                          [(1, True, False, int),
                           (2, True, False, int),

From bd544d2d32180607f384f74f22ec9434222f8921 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Thu, 18 Mar 2021 14:11:15 +0100
Subject: [PATCH 6/6] Fix imports following merge conflict

---
 sklearn/preprocessing/tests/test_polynomial.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 4545589f81b2c..5068a8c7d8bdd 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1,5 +1,8 @@
 import numpy as np
 import pytest
+from scipy import sparse
+from scipy.sparse import random as sparse_random
+from sklearn.utils._testing import assert_array_almost_equal
 
 from numpy.testing import assert_allclose, assert_array_equal
 from scipy.interpolate import BSpline