-
-
Notifications
You must be signed in to change notification settings - Fork 26.2k
[MRG+1] Fix check array sparse param #7937
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0a58700
b71b4fa
5d60b4d
9f749cc
932021f
9d8bc15
7fec795
f886ce2
5dc78bd
71340b3
3b84206
e93fda0
32df3b6
cceff86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -200,40 +200,55 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, | |
spmatrix : scipy sparse matrix | ||
Input to validate and convert. | ||
|
||
accept_sparse : string, list of string or None (default=None) | ||
accept_sparse : string, boolean or list/tuple of strings | ||
String[s] representing allowed sparse matrix formats ('csc', | ||
'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). None means that sparse | ||
matrix input will raise an error. If the input is sparse but not in | ||
the allowed format, it will be converted to the first listed format. | ||
'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but | ||
not in the allowed format, it will be converted to the first listed | ||
format. True allows the input to be any format. False means | ||
that a sparse matrix input will raise an error. | ||
|
||
dtype : string, type or None (default=none) | ||
dtype : string, type or None | ||
Data type of result. If None, the dtype of the input is preserved. | ||
|
||
copy : boolean (default=False) | ||
copy : boolean | ||
Whether a forced copy will be triggered. If copy=False, a copy might | ||
be triggered by a conversion. | ||
|
||
force_all_finite : boolean (default=True) | ||
force_all_finite : boolean | ||
Whether to raise an error on np.inf and np.nan in X. | ||
|
||
Returns | ||
------- | ||
spmatrix_converted : scipy sparse matrix. | ||
Matrix that is ensured to have an allowed type. | ||
""" | ||
if accept_sparse in [None, False]: | ||
raise TypeError('A sparse matrix was passed, but dense ' | ||
'data is required. Use X.toarray() to ' | ||
'convert to a dense numpy array.') | ||
if dtype is None: | ||
dtype = spmatrix.dtype | ||
|
||
changed_format = False | ||
if (isinstance(accept_sparse, (list, tuple)) | ||
and spmatrix.format not in accept_sparse): | ||
# create new with correct sparse | ||
spmatrix = spmatrix.asformat(accept_sparse[0]) | ||
changed_format = True | ||
|
||
if isinstance(accept_sparse, six.string_types): | ||
accept_sparse = [accept_sparse] | ||
|
||
if accept_sparse is False: | ||
raise TypeError('A sparse matrix was passed, but dense ' | ||
'data is required. Use X.toarray() to ' | ||
'convert to a dense numpy array.') | ||
elif isinstance(accept_sparse, (list, tuple)): | ||
if len(accept_sparse) == 0: | ||
raise ValueError("When providing 'accept_sparse' " | ||
"as a tuple or list, it must contain at " | ||
"least one string value.") | ||
# ensure correct sparse format | ||
if spmatrix.format not in accept_sparse: | ||
# create new with correct sparse | ||
spmatrix = spmatrix.asformat(accept_sparse[0]) | ||
changed_format = True | ||
elif accept_sparse is not True: | ||
# any other type | ||
raise ValueError("Parameter 'accept_sparse' should be a string, " | ||
"boolean or list of strings. You provided " | ||
"'accept_sparse={}'.".format(accept_sparse)) | ||
|
||
if dtype != spmatrix.dtype: | ||
# convert dtype | ||
|
@@ -251,7 +266,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, | |
return spmatrix | ||
|
||
|
||
def check_array(array, accept_sparse=None, dtype="numeric", order=None, | ||
def check_array(array, accept_sparse=False, dtype="numeric", order=None, | ||
copy=False, force_all_finite=True, ensure_2d=True, | ||
allow_nd=False, ensure_min_samples=1, ensure_min_features=1, | ||
warn_on_dtype=False, estimator=None): | ||
|
@@ -266,11 +281,12 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, | |
array : object | ||
Input object to check / convert. | ||
|
||
accept_sparse : string, list of string or None (default=None) | ||
accept_sparse : string, boolean or list/tuple of strings (default=False) | ||
String[s] representing allowed sparse matrix formats, such as 'csc', | ||
'csr', etc. None means that sparse matrix input will raise an error. | ||
If the input is sparse but not in the allowed format, it will be | ||
converted to the first listed format. | ||
'csr', etc. If the input is sparse but not in the allowed format, | ||
it will be converted to the first listed format. True allows the input | ||
to be any format. False means that a sparse matrix input will | ||
raise an error. | ||
|
||
dtype : string, type, list of types or None (default="numeric") | ||
Data type of result. If None, the dtype of the input is preserved. | ||
|
@@ -321,8 +337,14 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, | |
X_converted : object | ||
The converted and validated X. | ||
""" | ||
if isinstance(accept_sparse, str): | ||
accept_sparse = [accept_sparse] | ||
# accept_sparse 'None' deprecation check | ||
if accept_sparse is None: | ||
warnings.warn( | ||
"Passing 'None' to parameter 'accept_sparse' in methods " | ||
"check_array and check_X_y is deprecated in version 0.19 " | ||
"and will be removed in 0.21. Use 'accept_sparse=False' " | ||
" instead.", DeprecationWarning) | ||
accept_sparse = False | ||
|
||
# store whether originally we wanted numeric dtype | ||
dtype_numeric = dtype == "numeric" | ||
|
@@ -406,7 +428,7 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, | |
return array | ||
|
||
|
||
def check_X_y(X, y, accept_sparse=None, dtype="numeric", order=None, | ||
def check_X_y(X, y, accept_sparse=False, dtype="numeric", order=None, | ||
copy=False, force_all_finite=True, ensure_2d=True, | ||
allow_nd=False, multi_output=False, ensure_min_samples=1, | ||
ensure_min_features=1, y_numeric=False, | ||
|
@@ -427,11 +449,12 @@ def check_X_y(X, y, accept_sparse=None, dtype="numeric", order=None, | |
y : nd-array, list or sparse matrix | ||
Labels. | ||
|
||
accept_sparse : string, list of string or None (default=None) | ||
accept_sparse : string, boolean or list of string (default=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you need to deprecate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean raise a warning? But There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah sorry I missed that, maybe the deprecation message in |
||
String[s] representing allowed sparse matrix formats, such as 'csc', | ||
'csr', etc. None means that sparse matrix input will raise an error. | ||
If the input is sparse but not in the allowed format, it will be | ||
converted to the first listed format. | ||
'csr', etc. If the input is sparse but not in the allowed format, | ||
it will be converted to the first listed format. True allows the input | ||
to be any format. False means that a sparse matrix input will | ||
raise an error. | ||
|
||
dtype : string, type, list of types or None (default="numeric") | ||
Data type of result. If None, the dtype of the input is preserved. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use
elif
and remove the newline mostly for consistency.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That won't work, right? We want to end up in the case below where
isinstance(accept_sparse, (list, tuple))
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh yeah, sorry I missed that. Ignore my comment then.