From bb6914936dd441bbfafc28adea0ef85e5c4342c2 Mon Sep 17 00:00:00 2001 From: Andrew Nystrom Date: Tue, 30 Apr 2019 22:16:49 -0700 Subject: [PATCH 1/3] Updating PolynomailFeatures.Transform docstring Updating the docstring of PolynomailFeatures.Transform to be correct regarding sparse input and more up front about what happens to it. --- sklearn/preprocessing/data.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 8c8524ef6505c..77ce6bc706c02 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -1475,17 +1475,20 @@ def transform(self, X): Parameters ---------- - X : array-like or sparse matrix, shape [n_samples, n_features] + X : array-like or CSR/CSC sparse matrix, shape [n_samples, n_features] The data to transform, row by row. - Sparse input should preferably be in CSR format (for speed), - but must be in CSC format if the degree is 4 or higher. - - If the input matrix is in CSR format and the expansion is of - degree 2 or 3, the method described in the work "Leveraging - Sparsity to Speed Up Polynomial Feature Expansions of CSR - Matrices Using K-Simplex Numbers" by Andrew Nystrom and - John Hughes is used, which is much faster than the method - used on CSC input. + + Prefer CSR over CSC for sparse input (for speed), but CSC is + required if the degree is 4 or higher. If the degree is less than 4 + and the input format is CSC, it will be converted to CSR, have its + polynomial features generated, then converted back to CSC. + + If the degree is 2 or 3, the method described in "Leveraging Sparsity + to Speed Up Polynomial Feature Expansions of CSR Matrices Using + K-Simplex Numbers" by Andrew Nystrom and John Hughes is used, which + is much faster than the method used on CSC input. For this reason, a + CSC input will be converted to CSR, and the output will be converted + back to CSC prior to being returned, hence the preference of CSR. Returns ------- From 6438f09d3d66fc8bff2076d921ea1343d2d1981a Mon Sep 17 00:00:00 2001 From: Andrew Nystrom Date: Tue, 30 Apr 2019 22:24:43 -0700 Subject: [PATCH 2/3] Max of 79 columns --- sklearn/preprocessing/data.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 77ce6bc706c02..5cf8ddec30275 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -1479,16 +1479,17 @@ def transform(self, X): The data to transform, row by row. Prefer CSR over CSC for sparse input (for speed), but CSC is - required if the degree is 4 or higher. If the degree is less than 4 - and the input format is CSC, it will be converted to CSR, have its - polynomial features generated, then converted back to CSC. - - If the degree is 2 or 3, the method described in "Leveraging Sparsity - to Speed Up Polynomial Feature Expansions of CSR Matrices Using - K-Simplex Numbers" by Andrew Nystrom and John Hughes is used, which - is much faster than the method used on CSC input. For this reason, a - CSC input will be converted to CSR, and the output will be converted - back to CSC prior to being returned, hence the preference of CSR. + required if the degree is 4 or higher. If the degree is less than + 4 and the input format is CSC, it will be converted to CSR, have + its polynomial features generated, then converted back to CSC. + + If the degree is 2 or 3, the method described in "Leveraging + Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices + Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is + used, which is much faster than the method used on CSC input. For + this reason, a CSC input will be converted to CSR, and the output + will be converted back to CSC prior to being returned, hence the + preference of CSR. Returns ------- From 73effc14c4e2b8ca5ce3ff98bf29ba9c69f3fdb1 Mon Sep 17 00:00:00 2001 From: Andrew Nystrom Date: Tue, 30 Apr 2019 22:33:07 -0700 Subject: [PATCH 3/3] Removing whitespace on blank line --- sklearn/preprocessing/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 5cf8ddec30275..639e4234f500d 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -1477,7 +1477,7 @@ def transform(self, X): ---------- X : array-like or CSR/CSC sparse matrix, shape [n_samples, n_features] The data to transform, row by row. - + Prefer CSR over CSC for sparse input (for speed), but CSC is required if the degree is 4 or higher. If the degree is less than 4 and the input format is CSC, it will be converted to CSR, have