From bd34680d0348f634261ecd2f40d83263a5ebc9b4 Mon Sep 17 00:00:00 2001 From: rpstanley90 <45323905+rpstanley90@users.noreply.github.com> Date: Wed, 1 Jul 2020 11:09:45 -0500 Subject: [PATCH 1/2] Added tolerance to _handle_zeros_in_scale Added floating point tolerance to _handle_zeros_in_scale to address issue #17794 created on 6/30/2020. I'm using numpy's isclose() function with default absolute and relative tolerance values. The default values handled my test cases fine up until floats around 1e+20 when the variable 'scale' grew to non-zero values even for constant-valued vectors. There may be floating point sensitivities in that function as well but that's outside the scope of this issue. I also could not test the first if-statement in _handle_zeros_in_scale which checks for scalars close to zero through StandardScaler(). Scalar values passed in are stopped by check_array(). It may be prudent to adjust this statement as well, but without a way to properly check it and deeper knowledge of the package at the moment, I didn't want to mess with it. --- sklearn/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 2b92bd3d09467..e82ba7a114821 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -74,7 +74,7 @@ def _handle_zeros_in_scale(scale, copy=True): if copy: # New array to avoid side-effects scale = scale.copy() - scale[scale == 0.0] = 1.0 + scale[np.isclose(scale,0.0)] = 1.0 return scale From 51d5349cfb037a5418b1418c29a3295515db727d Mon Sep 17 00:00:00 2001 From: rpstanley90 <45323905+rpstanley90@users.noreply.github.com> Date: Wed, 1 Jul 2020 11:50:05 -0500 Subject: [PATCH 2/2] Linting update Updating format from linting results. Added floating point tolerance to _handle_zeros_in_scale to address issue #17794 created on 6/30/2020. I'm using numpy's isclose() function with default absolute and relative tolerance values. The default values handled my test cases fine up until floats around 1e+20 when the variable 'scale' grew to non-zero values even for constant-valued vectors. There may be floating point sensitivities in that function as well but that's outside the scope of this issue. I also could not test the first if-statement in _handle_zeros_in_scale which checks for scalars close to zero through StandardScaler(). Scalar values passed in are stopped by check_array(). It may be prudent to adjust this statement as well, but without a way to properly check it and deeper knowledge of the package at the moment, I didn't want to mess with it. --- sklearn/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index e82ba7a114821..5ec4e14087a14 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -74,7 +74,7 @@ def _handle_zeros_in_scale(scale, copy=True): if copy: # New array to avoid side-effects scale = scale.copy() - scale[np.isclose(scale,0.0)] = 1.0 + scale[np.isclose(scale, 0.0)] = 1.0 return scale