diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 8c8524ef6505c..639e4234f500d 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -1475,17 +1475,21 @@ def transform(self, X): Parameters ---------- - X : array-like or sparse matrix, shape [n_samples, n_features] + X : array-like or CSR/CSC sparse matrix, shape [n_samples, n_features] The data to transform, row by row. - Sparse input should preferably be in CSR format (for speed), - but must be in CSC format if the degree is 4 or higher. - - If the input matrix is in CSR format and the expansion is of - degree 2 or 3, the method described in the work "Leveraging - Sparsity to Speed Up Polynomial Feature Expansions of CSR - Matrices Using K-Simplex Numbers" by Andrew Nystrom and - John Hughes is used, which is much faster than the method - used on CSC input. + + Prefer CSR over CSC for sparse input (for speed), but CSC is + required if the degree is 4 or higher. If the degree is less than + 4 and the input format is CSC, it will be converted to CSR, have + its polynomial features generated, then converted back to CSC. + + If the degree is 2 or 3, the method described in "Leveraging + Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices + Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is + used, which is much faster than the method used on CSC input. For + this reason, a CSC input will be converted to CSR, and the output + will be converted back to CSC prior to being returned, hence the + preference of CSR. Returns -------