From 0a58700eeba1c6985610f4e7267827d1d97d6430 Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Wed, 23 Nov 2016 14:24:26 -0500 Subject: [PATCH 01/13] add check_array accept_spare type exceptions and test cases --- sklearn/utils/tests/test_validation.py | 16 +++++++++ sklearn/utils/validation.py | 48 +++++++++++++++----------- 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 9c57ab4e96de8..9f90bf5e2cb16 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -324,6 +324,22 @@ def test_check_array_dtype_warning(): assert_equal(X_checked.format, 'csr') +def test_check_array_accept_sparse_type_exception(): + X = [[1, 2], [3, 4]] + X_csr = sp.csr_matrix(X) + garbage_param = SVR() + + assert_raises(TypeError, check_array, X_csr, accept_sparse=False) + assert_raises(TypeError, check_array, X_csr, accept_sparse=None) + assert_raises(ValueError, check_array, X_csr, accept_sparse=garbage_param) + + # don't raise errors + check_array(X_csr, accept_sparse=True) + check_array(X_csr, accept_sparse='csr') + check_array(X_csr, accept_sparse=['csr']) + check_array(X_csr, accept_sparse=('csr')) + + def test_check_array_min_samples_and_features_messages(): # empty list is considered 2D by default: msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required." diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index deb98eef85039..3fdc4cec6f363 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -218,11 +218,12 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, spmatrix : scipy sparse matrix Input to validate and convert. - accept_sparse : string, list of string or None (default=None) + accept_sparse : string, list of string, boolean or None (default=None) String[s] representing allowed sparse matrix formats ('csc', - 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). None means that sparse - matrix input will raise an error. If the input is sparse but not in - the allowed format, it will be converted to the first listed format. + 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but + not in the allowed format, it will be converted to the first listed + format. True allows the input to be any format. False or None means + that a sparse matrix input will raise an error. dtype : string, type or None (default=none) Data type of result. If None, the dtype of the input is preserved. @@ -239,19 +240,28 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, spmatrix_converted : scipy sparse matrix. Matrix that is ensured to have an allowed type. """ - if accept_sparse in [None, False]: - raise TypeError('A sparse matrix was passed, but dense ' - 'data is required. Use X.toarray() to ' - 'convert to a dense numpy array.') if dtype is None: dtype = spmatrix.dtype changed_format = False - if (isinstance(accept_sparse, (list, tuple)) - and spmatrix.format not in accept_sparse): - # create new with correct sparse - spmatrix = spmatrix.asformat(accept_sparse[0]) - changed_format = True + + if isinstance(accept_sparse, str): + accept_sparse = [accept_sparse] + + if accept_sparse in [None, False]: + raise TypeError('A sparse matrix was passed, but dense ' + 'data is required. Use X.toarray() to ' + 'convert to a dense numpy array.') + elif isinstance(accept_sparse, (list, tuple)): + # ensure correct sparse format + if spmatrix.format not in accept_sparse: + # create new with correct sparse + spmatrix = spmatrix.asformat(accept_sparse[0]) + changed_format = True + elif accept_sparse is not True: + # any other type + raise ValueError('The parameter \'accept_sparse\' was ' + 'not a correct type.') if dtype != spmatrix.dtype: # convert dtype @@ -284,11 +294,12 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, array : object Input object to check / convert. - accept_sparse : string, list of string or None (default=None) + accept_sparse : string, list of string, boolean or None (default=None) String[s] representing allowed sparse matrix formats, such as 'csc', - 'csr', etc. None means that sparse matrix input will raise an error. - If the input is sparse but not in the allowed format, it will be - converted to the first listed format. + 'csr', etc. If the input is sparse but not in the allowed format, + it will be converted to the first listed format. True allows the input + to be any format. False or None means that a sparse matrix input will + raise an error. dtype : string, type, list of types or None (default="numeric") Data type of result. If None, the dtype of the input is preserved. @@ -339,9 +350,6 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, X_converted : object The converted and validated X. """ - if isinstance(accept_sparse, str): - accept_sparse = [accept_sparse] - # store whether originally we wanted numeric dtype dtype_numeric = dtype == "numeric" From b71b4fabb799f0a0413f1d8384392b78ef21e8ae Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Thu, 24 Nov 2016 16:29:47 -0500 Subject: [PATCH 02/13] add accept_sparse condition for list --- sklearn/utils/tests/test_validation.py | 19 ++++++++++++++----- sklearn/utils/validation.py | 3 ++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 9f90bf5e2cb16..f55ecbf69f040 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -327,11 +327,20 @@ def test_check_array_dtype_warning(): def test_check_array_accept_sparse_type_exception(): X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) - garbage_param = SVR() - - assert_raises(TypeError, check_array, X_csr, accept_sparse=False) - assert_raises(TypeError, check_array, X_csr, accept_sparse=None) - assert_raises(ValueError, check_array, X_csr, accept_sparse=garbage_param) + invalid_type = SVR() + + msg = "A sparse matrix was passed, but dense data is required. " \ + "Use X.toarray() to convert to a dense numpy array." + assert_raise_message(TypeError, msg, + check_array, X_csr, accept_sparse=False) + assert_raise_message(TypeError, msg, + check_array, X_csr, accept_sparse=None) + + msg = "The parameter \'accept_sparse\' was not a correct type." + assert_raise_message(ValueError, msg, + check_array, X_csr, accept_sparse=invalid_type) + assert_raise_message(ValueError, msg, + check_array, X_csr, accept_sparse=[invalid_type]) # don't raise errors check_array(X_csr, accept_sparse=True) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 3fdc4cec6f363..a8362e5d14550 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -252,7 +252,8 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, raise TypeError('A sparse matrix was passed, but dense ' 'data is required. Use X.toarray() to ' 'convert to a dense numpy array.') - elif isinstance(accept_sparse, (list, tuple)): + elif (isinstance(accept_sparse, (list, tuple)) and + isinstance(accept_sparse[0], str)): # ensure correct sparse format if spmatrix.format not in accept_sparse: # create new with correct sparse From 5d60b4d663691ff6bcc61e87bc2044ae5da29c0e Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Tue, 29 Nov 2016 17:15:37 -0500 Subject: [PATCH 03/13] check_array clean docs and add empty list check --- sklearn/utils/tests/test_validation.py | 4 ++++ sklearn/utils/validation.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index f55ecbf69f040..17095978b3b85 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -341,6 +341,10 @@ def test_check_array_accept_sparse_type_exception(): check_array, X_csr, accept_sparse=invalid_type) assert_raise_message(ValueError, msg, check_array, X_csr, accept_sparse=[invalid_type]) + assert_raise_message(ValueError, msg, + check_array, X_csr, accept_sparse=[]) + assert_raise_message(ValueError, msg, + check_array, X_csr, accept_sparse=()) # don't raise errors check_array(X_csr, accept_sparse=True) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index a8362e5d14550..e0ede45a65085 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -218,7 +218,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, spmatrix : scipy sparse matrix Input to validate and convert. - accept_sparse : string, list of string, boolean or None (default=None) + accept_sparse : string, list of strings or boolean (default=None) String[s] representing allowed sparse matrix formats ('csc', 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but not in the allowed format, it will be converted to the first listed @@ -253,6 +253,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, 'data is required. Use X.toarray() to ' 'convert to a dense numpy array.') elif (isinstance(accept_sparse, (list, tuple)) and + len(accept_sparse) and isinstance(accept_sparse[0], str)): # ensure correct sparse format if spmatrix.format not in accept_sparse: @@ -295,7 +296,7 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, array : object Input object to check / convert. - accept_sparse : string, list of string, boolean or None (default=None) + accept_sparse : string, list of strings or boolean (default=None) String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input From 9f749cc02d797b4bc3374e9dbbb0ac52769b299a Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Wed, 30 Nov 2016 01:58:49 -0500 Subject: [PATCH 04/13] check_array improve type checks and test cases --- sklearn/utils/tests/test_validation.py | 16 +++++++++------- sklearn/utils/validation.py | 7 +++---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 17095978b3b85..2ca9813cfa9a3 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -336,15 +336,17 @@ def test_check_array_accept_sparse_type_exception(): assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=None) - msg = "The parameter \'accept_sparse\' was not a correct type." - assert_raise_message(ValueError, msg, - check_array, X_csr, accept_sparse=invalid_type) - assert_raise_message(ValueError, msg, - check_array, X_csr, accept_sparse=[invalid_type]) - assert_raise_message(ValueError, msg, + msg = "The parameter 'accept_sparse={}' was not a correct type." + assert_raise_message(ValueError, msg.format([]), check_array, X_csr, accept_sparse=[]) - assert_raise_message(ValueError, msg, + assert_raise_message(ValueError, msg.format(()), check_array, X_csr, accept_sparse=()) + assert_raise_message(ValueError, msg.format(invalid_type), + check_array, X_csr, accept_sparse=invalid_type) + + msg = "Can't convert 'SVR' object to str implicitly" + assert_raise_message(TypeError, msg, + check_array, X_csr, accept_sparse=[invalid_type]) # don't raise errors check_array(X_csr, accept_sparse=True) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index e0ede45a65085..8953e803ee480 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -253,8 +253,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, 'data is required. Use X.toarray() to ' 'convert to a dense numpy array.') elif (isinstance(accept_sparse, (list, tuple)) and - len(accept_sparse) and - isinstance(accept_sparse[0], str)): + len(accept_sparse)): # ensure correct sparse format if spmatrix.format not in accept_sparse: # create new with correct sparse @@ -262,8 +261,8 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, changed_format = True elif accept_sparse is not True: # any other type - raise ValueError('The parameter \'accept_sparse\' was ' - 'not a correct type.') + raise ValueError(("The parameter 'accept_sparse={}' " + "was not a correct type.").format(accept_sparse)) if dtype != spmatrix.dtype: # convert dtype From 932021f9501e9b9cf5ede2442fc398e5ffbbf7a9 Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Wed, 30 Nov 2016 15:45:38 -0500 Subject: [PATCH 05/13] check_array update test case message --- sklearn/utils/tests/test_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 2ca9813cfa9a3..78c79d8606f6c 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -344,7 +344,7 @@ def test_check_array_accept_sparse_type_exception(): assert_raise_message(ValueError, msg.format(invalid_type), check_array, X_csr, accept_sparse=invalid_type) - msg = "Can't convert 'SVR' object to str implicitly" + msg = "'SVR' object" assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=[invalid_type]) From 9d8bc1528f517bd5ed4be3ca5f13282fab8a946e Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Wed, 30 Nov 2016 16:31:20 -0500 Subject: [PATCH 06/13] check_array improve type check message --- sklearn/utils/tests/test_validation.py | 2 +- sklearn/utils/validation.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 78c79d8606f6c..09a0d36d03801 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -336,7 +336,7 @@ def test_check_array_accept_sparse_type_exception(): assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=None) - msg = "The parameter 'accept_sparse={}' was not a correct type." + msg = "Invalid parameter 'accept_sparse={}'" assert_raise_message(ValueError, msg.format([]), check_array, X_csr, accept_sparse=[]) assert_raise_message(ValueError, msg.format(()), diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 8953e803ee480..12322a7b99034 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -261,8 +261,8 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, changed_format = True elif accept_sparse is not True: # any other type - raise ValueError(("The parameter 'accept_sparse={}' " - "was not a correct type.").format(accept_sparse)) + raise ValueError(("Invalid parameter " + "'accept_sparse={}'").format(accept_sparse)) if dtype != spmatrix.dtype: # convert dtype From 7fec795bb4b3d19e6e3aea5ed77a77b37f36ac6b Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Thu, 1 Dec 2016 16:06:56 -0500 Subject: [PATCH 07/13] check_array deprecate None type --- sklearn/utils/tests/test_validation.py | 18 +++++++++++--- sklearn/utils/validation.py | 34 ++++++++++++++++---------- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 09a0d36d03801..6ed340e4ce1eb 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -336,7 +336,8 @@ def test_check_array_accept_sparse_type_exception(): assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=None) - msg = "Invalid parameter 'accept_sparse={}'" + msg = "Parameter 'accept_sparse' should be a string, " \ + "boolean or list of strings. You provided 'accept_sparse={}'." assert_raise_message(ValueError, msg.format([]), check_array, X_csr, accept_sparse=[]) assert_raise_message(ValueError, msg.format(()), @@ -348,11 +349,22 @@ def test_check_array_accept_sparse_type_exception(): assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=[invalid_type]) - # don't raise errors + # Test deprecation of 'None' + msg = "Passing None to parameter 'accept_sparse' is " \ + "deprecated in 0.19. Use False instead." + + assert_raise_message((DeprecationWarning, TypeError), msg, + check_array, X_csr, accept_sparse=None) + + +def test_check_array_accept_sparse_no_exception(): + X = [[1, 2], [3, 4]] + X_csr = sp.csr_matrix(X) + check_array(X_csr, accept_sparse=True) check_array(X_csr, accept_sparse='csr') check_array(X_csr, accept_sparse=['csr']) - check_array(X_csr, accept_sparse=('csr')) + check_array(X_csr, accept_sparse=('csr',)) def test_check_array_min_samples_and_features_messages(): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 12322a7b99034..7037689601162 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -218,11 +218,11 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, spmatrix : scipy sparse matrix Input to validate and convert. - accept_sparse : string, list of strings or boolean (default=None) + accept_sparse : string, boolean or list/tuple of strings (default=False) String[s] representing allowed sparse matrix formats ('csc', 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but not in the allowed format, it will be converted to the first listed - format. True allows the input to be any format. False or None means + format. True allows the input to be any format. False means that a sparse matrix input will raise an error. dtype : string, type or None (default=none) @@ -245,10 +245,16 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, changed_format = False - if isinstance(accept_sparse, str): + if isinstance(accept_sparse, six.string_types): accept_sparse = [accept_sparse] if accept_sparse in [None, False]: + if accept_sparse is None: + warnings.warn( + "Passing None to parameter 'accept_sparse' is " + "deprecated in 0.19. Use False instead.", + DeprecationWarning) + raise TypeError('A sparse matrix was passed, but dense ' 'data is required. Use X.toarray() to ' 'convert to a dense numpy array.') @@ -261,8 +267,9 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, changed_format = True elif accept_sparse is not True: # any other type - raise ValueError(("Invalid parameter " - "'accept_sparse={}'").format(accept_sparse)) + raise ValueError(("Parameter 'accept_sparse' should be a string, " + "boolean or list of strings. You provided " + "'accept_sparse={}'.").format(accept_sparse)) if dtype != spmatrix.dtype: # convert dtype @@ -280,7 +287,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, return spmatrix -def check_array(array, accept_sparse=None, dtype="numeric", order=None, +def check_array(array, accept_sparse=False, dtype="numeric", order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, ensure_min_samples=1, ensure_min_features=1, warn_on_dtype=False, estimator=None): @@ -295,11 +302,11 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, array : object Input object to check / convert. - accept_sparse : string, list of strings or boolean (default=None) + accept_sparse : string, boolean or list/tuple of strings (default=False) String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input - to be any format. False or None means that a sparse matrix input will + to be any format. False means that a sparse matrix input will raise an error. dtype : string, type, list of types or None (default="numeric") @@ -439,7 +446,7 @@ def check_array(array, accept_sparse=None, dtype="numeric", order=None, return array -def check_X_y(X, y, accept_sparse=None, dtype="numeric", order=None, +def check_X_y(X, y, accept_sparse=False, dtype="numeric", order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, multi_output=False, ensure_min_samples=1, ensure_min_features=1, y_numeric=False, @@ -460,11 +467,12 @@ def check_X_y(X, y, accept_sparse=None, dtype="numeric", order=None, y : nd-array, list or sparse matrix Labels. - accept_sparse : string, list of string or None (default=None) + accept_sparse : string, boolean or list of string (default=False) String[s] representing allowed sparse matrix formats, such as 'csc', - 'csr', etc. None means that sparse matrix input will raise an error. - If the input is sparse but not in the allowed format, it will be - converted to the first listed format. + 'csr', etc. If the input is sparse but not in the allowed format, + it will be converted to the first listed format. True allows the input + to be any format. False means that a sparse matrix input will + raise an error. dtype : string, type, list of types or None (default="numeric") Data type of result. If None, the dtype of the input is preserved. From f886ce2d435d9961da8af14cc49b736200bb7929 Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Mon, 12 Dec 2016 01:47:18 -0500 Subject: [PATCH 08/13] fix check_array deprecation --- sklearn/utils/tests/test_validation.py | 6 +----- sklearn/utils/validation.py | 17 ++++++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 6ed340e4ce1eb..bdab10bda786e 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -350,11 +350,7 @@ def test_check_array_accept_sparse_type_exception(): check_array, X_csr, accept_sparse=[invalid_type]) # Test deprecation of 'None' - msg = "Passing None to parameter 'accept_sparse' is " \ - "deprecated in 0.19. Use False instead." - - assert_raise_message((DeprecationWarning, TypeError), msg, - check_array, X_csr, accept_sparse=None) + assert_warns(DeprecationWarning, check_array, X, accept_sparse=None) def test_check_array_accept_sparse_no_exception(): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 7037689601162..aff405434a399 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -248,13 +248,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, if isinstance(accept_sparse, six.string_types): accept_sparse = [accept_sparse] - if accept_sparse in [None, False]: - if accept_sparse is None: - warnings.warn( - "Passing None to parameter 'accept_sparse' is " - "deprecated in 0.19. Use False instead.", - DeprecationWarning) - + if accept_sparse is False: raise TypeError('A sparse matrix was passed, but dense ' 'data is required. Use X.toarray() to ' 'convert to a dense numpy array.') @@ -358,6 +352,15 @@ def check_array(array, accept_sparse=False, dtype="numeric", order=None, X_converted : object The converted and validated X. """ + # accept_sparse 'None' deprecation check + if accept_sparse is None: + warnings.warn( + "Passing 'None' to parameter 'accept_sparse' is " + "deprecated in version 0.19 and will be deprecated " + "in 0.21. Use 'False' instead.", + DeprecationWarning) + accept_sparse = False + # store whether originally we wanted numeric dtype dtype_numeric = dtype == "numeric" From 5dc78bd380e81e5c86a1be1fb0b3126388d7fc56 Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Sun, 18 Dec 2016 18:15:52 -0500 Subject: [PATCH 09/13] check_array add whats new and improve deprecation --- doc/whats_new.rst | 6 ++++++ sklearn/utils/tests/test_validation.py | 15 +++++++++------ sklearn/utils/validation.py | 24 +++++++++++++----------- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index fef9ec8b72d9f..a9a27a166fad5 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -85,6 +85,12 @@ Enhancements do not set attributes on the estimator. :issue:`7533` by :user:`Ekaterina Krivich `. + - Added type checking to the ``accept_sparse`` parameter in + :mod:`sklearn.utils.validation` methods. This parameter now accepts only + boolean, string, or list/tuple of strings values. `None` is no longer an + accepted type, and has been marked as deprecated. + :issue:`7880` by `Josh Karnofsky `_. + Bug fixes ......... diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index bdab10bda786e..3f3297449fcce 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -329,21 +329,24 @@ def test_check_array_accept_sparse_type_exception(): X_csr = sp.csr_matrix(X) invalid_type = SVR() - msg = "A sparse matrix was passed, but dense data is required. " \ - "Use X.toarray() to convert to a dense numpy array." + msg = ("A sparse matrix was passed, but dense data is required. " + "Use X.toarray() to convert to a dense numpy array.") assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=False) assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=None) - msg = "Parameter 'accept_sparse' should be a string, " \ - "boolean or list of strings. You provided 'accept_sparse={}'." + msg = ("Parameter 'accept_sparse' should be a string, " + "boolean or list of strings. You provided 'accept_sparse={}'.") + assert_raise_message(ValueError, msg.format(invalid_type), + check_array, X_csr, accept_sparse=invalid_type) + + msg = ("When providing 'accept_sparse' as a tuple or list, " + "it must contain at least one string value.") assert_raise_message(ValueError, msg.format([]), check_array, X_csr, accept_sparse=[]) assert_raise_message(ValueError, msg.format(()), check_array, X_csr, accept_sparse=()) - assert_raise_message(ValueError, msg.format(invalid_type), - check_array, X_csr, accept_sparse=invalid_type) msg = "'SVR' object" assert_raise_message(TypeError, msg, diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index aff405434a399..d89a93ae695a9 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -218,21 +218,21 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, spmatrix : scipy sparse matrix Input to validate and convert. - accept_sparse : string, boolean or list/tuple of strings (default=False) + accept_sparse : string, boolean or list/tuple of strings String[s] representing allowed sparse matrix formats ('csc', 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error. - dtype : string, type or None (default=none) + dtype : string, type or None Data type of result. If None, the dtype of the input is preserved. - copy : boolean (default=False) + copy : boolean Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion. - force_all_finite : boolean (default=True) + force_all_finite : boolean Whether to raise an error on np.inf and np.nan in X. Returns @@ -252,8 +252,10 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, raise TypeError('A sparse matrix was passed, but dense ' 'data is required. Use X.toarray() to ' 'convert to a dense numpy array.') - elif (isinstance(accept_sparse, (list, tuple)) and - len(accept_sparse)): + elif (isinstance(accept_sparse, (list, tuple))): + if len(accept_sparse) == 0: + raise ValueError("When providing 'accept_sparse' as a tuple or list, " + "it must contain at least one string value.") # ensure correct sparse format if spmatrix.format not in accept_sparse: # create new with correct sparse @@ -261,9 +263,9 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, changed_format = True elif accept_sparse is not True: # any other type - raise ValueError(("Parameter 'accept_sparse' should be a string, " + raise ValueError("Parameter 'accept_sparse' should be a string, " "boolean or list of strings. You provided " - "'accept_sparse={}'.").format(accept_sparse)) + "'accept_sparse={}'.".format(accept_sparse)) if dtype != spmatrix.dtype: # convert dtype @@ -355,9 +357,9 @@ def check_array(array, accept_sparse=False, dtype="numeric", order=None, # accept_sparse 'None' deprecation check if accept_sparse is None: warnings.warn( - "Passing 'None' to parameter 'accept_sparse' is " - "deprecated in version 0.19 and will be deprecated " - "in 0.21. Use 'False' instead.", + "Passing 'None' to parameter 'accept_sparse' in methods " + "check_array and check_X_y is deprecated in version 0.19 " + "and will be removed in 0.21. Use 'False' instead.", DeprecationWarning) accept_sparse = False From 71340b3f9b3124ecfd1ab8dfcb16b715778ba3fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Mon, 19 Dec 2016 09:22:37 +0100 Subject: [PATCH 10/13] COSMIT more explicit wording in error message Also remove unnecessary parentheses --- sklearn/utils/validation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index d89a93ae695a9..f8f6bd71cda7b 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -252,7 +252,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, raise TypeError('A sparse matrix was passed, but dense ' 'data is required. Use X.toarray() to ' 'convert to a dense numpy array.') - elif (isinstance(accept_sparse, (list, tuple))): + elif isinstance(accept_sparse, (list, tuple)): if len(accept_sparse) == 0: raise ValueError("When providing 'accept_sparse' as a tuple or list, " "it must contain at least one string value.") @@ -359,8 +359,8 @@ def check_array(array, accept_sparse=False, dtype="numeric", order=None, warnings.warn( "Passing 'None' to parameter 'accept_sparse' in methods " "check_array and check_X_y is deprecated in version 0.19 " - "and will be removed in 0.21. Use 'False' instead.", - DeprecationWarning) + "and will be removed in 0.21. Use 'accept_sparse=False' " + " instead.", DeprecationWarning) accept_sparse = False # store whether originally we wanted numeric dtype From 3b84206c8f5b9f94455c60c1bba7d20b0dbf1aa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Mon, 19 Dec 2016 09:28:04 +0100 Subject: [PATCH 11/13] Tweak wording in whats_new.rst --- doc/whats_new.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index a9a27a166fad5..27ab82d8ad1d4 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -87,9 +87,9 @@ Enhancements - Added type checking to the ``accept_sparse`` parameter in :mod:`sklearn.utils.validation` methods. This parameter now accepts only - boolean, string, or list/tuple of strings values. `None` is no longer an - accepted type, and has been marked as deprecated. - :issue:`7880` by `Josh Karnofsky `_. + boolean, string, or list/tuple of strings. ``accept_sparse=None`` is deprecated + and should be replaced by ``accept_sparse=False``. + :issue:`7880` by :user:`Josh Karnofsky `. Bug fixes ......... From 32df3b65cde9d1667b9aa9cdc386ed605f64abb5 Mon Sep 17 00:00:00 2001 From: Josh Karnofsky Date: Mon, 19 Dec 2016 12:16:25 -0500 Subject: [PATCH 12/13] check_array small style improvements --- sklearn/utils/tests/test_validation.py | 2 +- sklearn/utils/validation.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index fc6168ee445fe..96387170e56db 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -333,7 +333,7 @@ def test_check_array_accept_sparse_type_exception(): assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=None) - msg = ("Parameter 'accept_sparse' should be a string, " + msg = ("Parameter 'accept_sparse' should be a string, " "boolean or list of strings. You provided 'accept_sparse={}'.") assert_raise_message(ValueError, msg.format(invalid_type), check_array, X_csr, accept_sparse=invalid_type) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 05422d2f31fa1..07888719f457b 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -236,8 +236,9 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, 'convert to a dense numpy array.') elif isinstance(accept_sparse, (list, tuple)): if len(accept_sparse) == 0: - raise ValueError("When providing 'accept_sparse' as a tuple or list, " - "it must contain at least one string value.") + raise ValueError("When providing 'accept_sparse' " + "as a tuple or list, it must contain at " + "least one string value.") # ensure correct sparse format if spmatrix.format not in accept_sparse: # create new with correct sparse @@ -246,8 +247,8 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, elif accept_sparse is not True: # any other type raise ValueError("Parameter 'accept_sparse' should be a string, " - "boolean or list of strings. You provided " - "'accept_sparse={}'.".format(accept_sparse)) + "boolean or list of strings. You provided " + "'accept_sparse={}'.".format(accept_sparse)) if dtype != spmatrix.dtype: # convert dtype From cceff8617113a81882d88bdd49a55045910fa311 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 20 Dec 2016 16:42:21 +1100 Subject: [PATCH 13/13] PEP8 indentation --- sklearn/utils/validation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 07888719f457b..58ea733c3a118 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -340,10 +340,10 @@ def check_array(array, accept_sparse=False, dtype="numeric", order=None, # accept_sparse 'None' deprecation check if accept_sparse is None: warnings.warn( - "Passing 'None' to parameter 'accept_sparse' in methods " - "check_array and check_X_y is deprecated in version 0.19 " - "and will be removed in 0.21. Use 'accept_sparse=False' " - " instead.", DeprecationWarning) + "Passing 'None' to parameter 'accept_sparse' in methods " + "check_array and check_X_y is deprecated in version 0.19 " + "and will be removed in 0.21. Use 'accept_sparse=False' " + " instead.", DeprecationWarning) accept_sparse = False # store whether originally we wanted numeric dtype