From 1cf77b0cb32e9ac3d424d8eaeb3e4125886e3c6d Mon Sep 17 00:00:00 2001 From: rnmourao Date: Wed, 19 Jun 2024 19:06:14 +0100 Subject: [PATCH 01/21] fix issue 28946 --- sklearn/preprocessing/_data.py | 3 ++- sklearn/preprocessing/tests/test_data.py | 26 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index d1415e0ff71d2..0e1f980e65df2 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3352,7 +3352,8 @@ def inverse_transform(self, X): "yeo-johnson": self._yeo_johnson_inverse_transform, }[self.method] for i, lmbda in enumerate(self.lambdas_): - with np.errstate(invalid="ignore"): # hide NaN warnings + # raise RuntimeWarning if return NaNs + with np.errstate(invalid="warn"): X[:, i] = inv_fun(X[:, i], lmbda) return X diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 5d254e491b400..d54cdc37feb4f 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2605,3 +2605,29 @@ def test_power_transformer_constant_feature(standardize): assert_allclose(Xt_, np.zeros_like(X)) else: assert_allclose(Xt_, X) + + +def test_yeo_johnson_inverse_transform_warning(): + """Check if a warning is triggered when the inverse transformations of the + Box-Cox and Yeo-Johnson transformers return NaN values.""" + trans = PowerTransformer(method='yeo-johnson') + x = np.array([1, 1, 1e10]).reshape(-1, 1) # extreme skew + trans.fit(x) + lmbda = trans.lambdas_[0] + print(f"Lambda: {lmbda}") + assert lmbda < 0 # Should be negative + + # any value `psi` for which lambda * psi + 1 <= 0 will result in nan due + # to lacking support + psi = np.array([10]).reshape(-1, 1) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + x_inv = trans.inverse_transform(psi).item() + print(f"Inverse transformed value: {x_inv}") + assert np.isnan(x_inv) + + # Check if warning was raised + assert len(w) > 0 + for i in w: + print(i) + assert issubclass(w[-1].category, RuntimeWarning) From cbac66722b8cc765798460935c144a086d437425 Mon Sep 17 00:00:00 2001 From: rnmourao Date: Thu, 20 Jun 2024 14:21:58 +0100 Subject: [PATCH 02/21] lint --- sklearn/preprocessing/tests/test_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index d54cdc37feb4f..bef42ab3c5057 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2609,8 +2609,8 @@ def test_power_transformer_constant_feature(standardize): def test_yeo_johnson_inverse_transform_warning(): """Check if a warning is triggered when the inverse transformations of the - Box-Cox and Yeo-Johnson transformers return NaN values.""" - trans = PowerTransformer(method='yeo-johnson') + Box-Cox and Yeo-Johnson transformers return NaN values.""" + trans = PowerTransformer(method="yeo-johnson") x = np.array([1, 1, 1e10]).reshape(-1, 1) # extreme skew trans.fit(x) lmbda = trans.lambdas_[0] From 47037a8c31904a056e83d872255658ab1a0c98f4 Mon Sep 17 00:00:00 2001 From: maf-rnmourao <104142107+maf-rnmourao@users.noreply.github.com> Date: Sat, 23 Nov 2024 21:48:45 +0400 Subject: [PATCH 03/21] Update sklearn/preprocessing/tests/test_data.py Co-authored-by: Thomas J. Fan --- sklearn/preprocessing/tests/test_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 7d2cdd4e74c0a..7e74e2401b705 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2628,7 +2628,6 @@ def test_yeo_johnson_inverse_transform_warning(): x = np.array([1, 1, 1e10]).reshape(-1, 1) # extreme skew trans.fit(x) lmbda = trans.lambdas_[0] - print(f"Lambda: {lmbda}") assert lmbda < 0 # Should be negative # any value `psi` for which lambda * psi + 1 <= 0 will result in nan due From 2cb8a3e463b13af20f1779f83c3c77643d90c119 Mon Sep 17 00:00:00 2001 From: maf-rnmourao <104142107+maf-rnmourao@users.noreply.github.com> Date: Sat, 23 Nov 2024 21:49:08 +0400 Subject: [PATCH 04/21] Update sklearn/preprocessing/tests/test_data.py Co-authored-by: Thomas J. Fan --- sklearn/preprocessing/tests/test_data.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 7e74e2401b705..b544ac59e4c60 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2641,6 +2641,4 @@ def test_yeo_johnson_inverse_transform_warning(): # Check if warning was raised assert len(w) > 0 - for i in w: - print(i) assert issubclass(w[-1].category, RuntimeWarning) From 3d1ded9cf9e7e7eac09ef09b6cc17286dbc7d78d Mon Sep 17 00:00:00 2001 From: maf-rnmourao <104142107+maf-rnmourao@users.noreply.github.com> Date: Sat, 23 Nov 2024 21:50:11 +0400 Subject: [PATCH 05/21] Update sklearn/preprocessing/tests/test_data.py Co-authored-by: Thomas J. Fan --- sklearn/preprocessing/tests/test_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index b544ac59e4c60..6ca2fa24bfef2 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2636,7 +2636,6 @@ def test_yeo_johnson_inverse_transform_warning(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") x_inv = trans.inverse_transform(psi).item() - print(f"Inverse transformed value: {x_inv}") assert np.isnan(x_inv) # Check if warning was raised From 587eb129a158ec7e58f0675f9a3e24714f1f8a6b Mon Sep 17 00:00:00 2001 From: rnmourao Date: Mon, 25 Nov 2024 19:57:28 +0000 Subject: [PATCH 06/21] refined warning message for NaNs in inverse transform --- .../sklearn.preprocessing/28946.enhancement.rst | 4 ++++ sklearn/preprocessing/_data.py | 16 +++++++++++----- sklearn/preprocessing/tests/test_data.py | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 doc/whats_new/upcoming_changes/sklearn.preprocessing/28946.enhancement.rst diff --git a/doc/whats_new/upcoming_changes/sklearn.preprocessing/28946.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.preprocessing/28946.enhancement.rst new file mode 100644 index 0000000000000..8b083ab7bed0c --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.preprocessing/28946.enhancement.rst @@ -0,0 +1,4 @@ +- The :class:preprocessing.PowerTransformer now returns a warning +when NaN values are encountered in the inverse transform, typically +caused by extremely skewed data. +By :user:Roberto Mourao \ No newline at end of file diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index ff23d070553d3..e9a648a7bdaff 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3428,12 +3428,18 @@ def inverse_transform(self, X): "yeo-johnson": self._yeo_johnson_inverse_transform, }[self.method] for i, lmbda in enumerate(self.lambdas_): - # raise RuntimeWarning if return NaNs - with np.errstate(invalid="warn"): - X[:, i] = inv_fun(X[:, i], lmbda) - + with np.errstate(invalid='warn'): + with warnings.catch_warnings(record=True) as captured_warnings: + X[:, i] = inv_fun(X[:, i], lmbda) + if captured_warnings and np.isnan(X[:, i]).any(): + warnings.warn( + f"""Some values in column {i} of the inverse-transformed data are NaN. This may be due to + extreme skewness or outliers in the data for this column. Consider addressing these issues, + such as removing or imputing outliers, before applying the transformation.""", + UserWarning + ) return X - + def _yeo_johnson_inverse_transform(self, x, lmbda): """Return inverse-transformed input x following Yeo-Johnson inverse transform with parameter lambda. diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 6ca2fa24bfef2..24c204ede65f0 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2640,4 +2640,4 @@ def test_yeo_johnson_inverse_transform_warning(): # Check if warning was raised assert len(w) > 0 - assert issubclass(w[-1].category, RuntimeWarning) + assert issubclass(w[-1].category, UserWarning) From 01f6aafd5cdab3deb392da588af20ab7e4ee64b4 Mon Sep 17 00:00:00 2001 From: rnmourao Date: Mon, 25 Nov 2024 20:26:46 +0000 Subject: [PATCH 07/21] linting fixes --- sklearn/preprocessing/_data.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index e9a648a7bdaff..dc666c77d0f4d 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3433,13 +3433,15 @@ def inverse_transform(self, X): X[:, i] = inv_fun(X[:, i], lmbda) if captured_warnings and np.isnan(X[:, i]).any(): warnings.warn( - f"""Some values in column {i} of the inverse-transformed data are NaN. This may be due to - extreme skewness or outliers in the data for this column. Consider addressing these issues, - such as removing or imputing outliers, before applying the transformation.""", + f"""Some values in column {i} of the inverse-transformed data + are NaN. This may be due to extreme skewness or outliers in + the data for this column. Consider addressing these issues, + such as removing or imputing outliers, before applying the + transformation.""", UserWarning - ) + ) return X - + def _yeo_johnson_inverse_transform(self, x, lmbda): """Return inverse-transformed input x following Yeo-Johnson inverse transform with parameter lambda. From 6a98c89958c48e51aa81b08203f9333104c97350 Mon Sep 17 00:00:00 2001 From: rnmourao Date: Tue, 26 Nov 2024 10:49:28 +0000 Subject: [PATCH 08/21] linting fixes --- sklearn/preprocessing/_data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index dc666c77d0f4d..674ce110024c3 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3428,18 +3428,18 @@ def inverse_transform(self, X): "yeo-johnson": self._yeo_johnson_inverse_transform, }[self.method] for i, lmbda in enumerate(self.lambdas_): - with np.errstate(invalid='warn'): + with np.errstate(invalid="warn"): with warnings.catch_warnings(record=True) as captured_warnings: X[:, i] = inv_fun(X[:, i], lmbda) if captured_warnings and np.isnan(X[:, i]).any(): - warnings.warn( - f"""Some values in column {i} of the inverse-transformed data + warnings.warn( + f"""Some values in column {i} of the inverse-transformed data are NaN. This may be due to extreme skewness or outliers in the data for this column. Consider addressing these issues, such as removing or imputing outliers, before applying the transformation.""", - UserWarning - ) + UserWarning, + ) return X def _yeo_johnson_inverse_transform(self, x, lmbda): From 2552e2a9b9be79e6ad0b0282570cbe24b153b701 Mon Sep 17 00:00:00 2001 From: rnmourao Date: Tue, 26 Nov 2024 18:31:20 +0000 Subject: [PATCH 09/21] fix whats new number --- .../{28946.enhancement.rst => 29307.enhancement.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename doc/whats_new/upcoming_changes/sklearn.preprocessing/{28946.enhancement.rst => 29307.enhancement.rst} (100%) diff --git a/doc/whats_new/upcoming_changes/sklearn.preprocessing/28946.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst similarity index 100% rename from doc/whats_new/upcoming_changes/sklearn.preprocessing/28946.enhancement.rst rename to doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst From 2c9567319d1122eddb98524c7a6d564a4ba9d25b Mon Sep 17 00:00:00 2001 From: rnmourao Date: Thu, 28 Nov 2024 06:41:58 +0000 Subject: [PATCH 10/21] adjust the with nest logic --- sklearn/preprocessing/_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 674ce110024c3..d33de69cda7f2 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3428,8 +3428,8 @@ def inverse_transform(self, X): "yeo-johnson": self._yeo_johnson_inverse_transform, }[self.method] for i, lmbda in enumerate(self.lambdas_): - with np.errstate(invalid="warn"): - with warnings.catch_warnings(record=True) as captured_warnings: + with warnings.catch_warnings(record=True) as captured_warnings: + with np.errstate(invalid="warn"): X[:, i] = inv_fun(X[:, i], lmbda) if captured_warnings and np.isnan(X[:, i]).any(): warnings.warn( From c039ee2603e688aab1c3fc614db41bcd61ce2b14 Mon Sep 17 00:00:00 2001 From: maf-rnmourao <104142107+maf-rnmourao@users.noreply.github.com> Date: Wed, 4 Dec 2024 23:54:12 +0400 Subject: [PATCH 11/21] Update doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst Co-authored-by: Thomas J. Fan --- .../sklearn.preprocessing/29307.enhancement.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst index 8b083ab7bed0c..20c040a69bdc3 100644 --- a/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst +++ b/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst @@ -1,4 +1,4 @@ - The :class:preprocessing.PowerTransformer now returns a warning -when NaN values are encountered in the inverse transform, typically -caused by extremely skewed data. -By :user:Roberto Mourao \ No newline at end of file + when NaN values are encountered in the inverse transform, typically + caused by extremely skewed data. + By :user:Roberto Mourao \ No newline at end of file From cd5d8e47408ba683c45abba032aa7fd7d29349f0 Mon Sep 17 00:00:00 2001 From: rnmourao Date: Wed, 4 Dec 2024 21:30:17 +0000 Subject: [PATCH 12/21] added TransformationFailedWarning; light check for Yeo-Johnson inverse transform --- sklearn/exceptions.py | 13 +++++++++++++ sklearn/preprocessing/_data.py | 22 +++++++++++++--------- sklearn/preprocessing/tests/test_data.py | 4 ++-- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py index 1c9162dc760f9..9b2f6153eb3c5 100644 --- a/sklearn/exceptions.py +++ b/sklearn/exceptions.py @@ -15,6 +15,7 @@ "PositiveSpectrumWarning", "UnsetMetadataPassedError", "EstimatorCheckFailedWarning", + "TransformFailedWarning", ] @@ -128,6 +129,18 @@ class FitFailedWarning(RuntimeWarning): """ +class TransformFailedWarning(RuntimeWarning): + """Warning raised when a transformation fails to process data correctly. + + This warning is used when a transformation produces invalid values, such as + NaNs, due to numerical computation failures or instability in the + transformation process. + + .. versionadded:: 1.7.dev0 + Added to warn users about failures in transformation processes. + """ + + class SkipTestWarning(UserWarning): """Warning class used to notify the user of a test that was skipped. diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index d33de69cda7f2..18afa05c8d3ca 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -9,6 +9,7 @@ from scipy import optimize, sparse, stats from scipy.special import boxcox, inv_boxcox +from sklearn.exceptions import TransformFailedWarning from sklearn.utils import metadata_routing from ..base import ( @@ -3431,15 +3432,18 @@ def inverse_transform(self, X): with warnings.catch_warnings(record=True) as captured_warnings: with np.errstate(invalid="warn"): X[:, i] = inv_fun(X[:, i], lmbda) - if captured_warnings and np.isnan(X[:, i]).any(): - warnings.warn( - f"""Some values in column {i} of the inverse-transformed data - are NaN. This may be due to extreme skewness or outliers in - the data for this column. Consider addressing these issues, - such as removing or imputing outliers, before applying the - transformation.""", - UserWarning, - ) + + if captured_warnings: + last_warning = captured_warnings[-1] + # Check for the specific warning message + if "invalid value encountered" in str(last_warning.message): + warnings.warn( + f"Some values in column {i} of the inverse-transformed data " + f"are NaN. This may be due to numerical issues in the " + f"transformation process. Consider inspecting the input data " + f"or preprocessing it before applying the transformation.", + TransformFailedWarning, + ) return X def _yeo_johnson_inverse_transform(self, x, lmbda): diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 24c204ede65f0..073c364dc7f58 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -11,7 +11,7 @@ from sklearn import datasets from sklearn.base import clone -from sklearn.exceptions import NotFittedError +from sklearn.exceptions import NotFittedError, TransformFailedWarning from sklearn.metrics.pairwise import linear_kernel from sklearn.model_selection import cross_val_predict from sklearn.pipeline import Pipeline @@ -2640,4 +2640,4 @@ def test_yeo_johnson_inverse_transform_warning(): # Check if warning was raised assert len(w) > 0 - assert issubclass(w[-1].category, UserWarning) + assert issubclass(w[-1].category, TransformFailedWarning) From 5041cf928c4ffcc45618cc9db042626162d2b5ea Mon Sep 17 00:00:00 2001 From: rnmourao Date: Thu, 5 Dec 2024 22:01:10 +0000 Subject: [PATCH 13/21] replaced TransformFailedWarning with UserWarning --- sklearn/exceptions.py | 13 ------------- sklearn/preprocessing/_data.py | 5 ++--- sklearn/preprocessing/tests/test_data.py | 4 ++-- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py index 9b2f6153eb3c5..1c9162dc760f9 100644 --- a/sklearn/exceptions.py +++ b/sklearn/exceptions.py @@ -15,7 +15,6 @@ "PositiveSpectrumWarning", "UnsetMetadataPassedError", "EstimatorCheckFailedWarning", - "TransformFailedWarning", ] @@ -129,18 +128,6 @@ class FitFailedWarning(RuntimeWarning): """ -class TransformFailedWarning(RuntimeWarning): - """Warning raised when a transformation fails to process data correctly. - - This warning is used when a transformation produces invalid values, such as - NaNs, due to numerical computation failures or instability in the - transformation process. - - .. versionadded:: 1.7.dev0 - Added to warn users about failures in transformation processes. - """ - - class SkipTestWarning(UserWarning): """Warning class used to notify the user of a test that was skipped. diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 18afa05c8d3ca..ebf50043647ca 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -9,7 +9,6 @@ from scipy import optimize, sparse, stats from scipy.special import boxcox, inv_boxcox -from sklearn.exceptions import TransformFailedWarning from sklearn.utils import metadata_routing from ..base import ( @@ -3436,13 +3435,13 @@ def inverse_transform(self, X): if captured_warnings: last_warning = captured_warnings[-1] # Check for the specific warning message - if "invalid value encountered" in str(last_warning.message): + if "invalid value encountered in power" in str(last_warning.message): warnings.warn( f"Some values in column {i} of the inverse-transformed data " f"are NaN. This may be due to numerical issues in the " f"transformation process. Consider inspecting the input data " f"or preprocessing it before applying the transformation.", - TransformFailedWarning, + UserWarning, ) return X diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 073c364dc7f58..24c204ede65f0 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -11,7 +11,7 @@ from sklearn import datasets from sklearn.base import clone -from sklearn.exceptions import NotFittedError, TransformFailedWarning +from sklearn.exceptions import NotFittedError from sklearn.metrics.pairwise import linear_kernel from sklearn.model_selection import cross_val_predict from sklearn.pipeline import Pipeline @@ -2640,4 +2640,4 @@ def test_yeo_johnson_inverse_transform_warning(): # Check if warning was raised assert len(w) > 0 - assert issubclass(w[-1].category, TransformFailedWarning) + assert issubclass(w[-1].category, UserWarning) From 0b6de1042fc193e2d074e507ff80652f860ddb16 Mon Sep 17 00:00:00 2001 From: rnmourao Date: Fri, 6 Dec 2024 06:26:47 +0000 Subject: [PATCH 14/21] checking all warnings --- sklearn/preprocessing/_data.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index ebf50043647ca..a73dfe5572a62 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3432,10 +3432,8 @@ def inverse_transform(self, X): with np.errstate(invalid="warn"): X[:, i] = inv_fun(X[:, i], lmbda) - if captured_warnings: - last_warning = captured_warnings[-1] - # Check for the specific warning message - if "invalid value encountered in power" in str(last_warning.message): + for warning in captured_warnings: + if "invalid value encountered in power" in str(warning.message): warnings.warn( f"Some values in column {i} of the inverse-transformed data " f"are NaN. This may be due to numerical issues in the " @@ -3443,6 +3441,7 @@ def inverse_transform(self, X): f"or preprocessing it before applying the transformation.", UserWarning, ) + break return X def _yeo_johnson_inverse_transform(self, x, lmbda): From 5f44202cbd79444995e5ac2dc83644425dfc1cee Mon Sep 17 00:00:00 2001 From: rnmourao Date: Fri, 6 Dec 2024 06:29:55 +0000 Subject: [PATCH 15/21] a more elegant test --- sklearn/preprocessing/_data.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index a73dfe5572a62..69616932d3f57 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3431,17 +3431,17 @@ def inverse_transform(self, X): with warnings.catch_warnings(record=True) as captured_warnings: with np.errstate(invalid="warn"): X[:, i] = inv_fun(X[:, i], lmbda) - - for warning in captured_warnings: - if "invalid value encountered in power" in str(warning.message): - warnings.warn( - f"Some values in column {i} of the inverse-transformed data " - f"are NaN. This may be due to numerical issues in the " - f"transformation process. Consider inspecting the input data " - f"or preprocessing it before applying the transformation.", - UserWarning, - ) - break + if any( + "invalid value encountered in power" in str(w.message) + for w in captured_warnings + ): + warnings.warn( + f"Some values in column {i} of the inverse-transformed data " + f"are NaN. This may be due to numerical issues in the " + f"transformation process. Consider inspecting the input data " + f"or preprocessing it before applying the transformation.", + UserWarning, + ) return X def _yeo_johnson_inverse_transform(self, x, lmbda): From 264c4c42c9b8b6ec1181a22a5661add8310a40e9 Mon Sep 17 00:00:00 2001 From: Roberto Mourao <104142107+maf-rnmourao@users.noreply.github.com> Date: Tue, 2 Sep 2025 13:18:58 +0400 Subject: [PATCH 16/21] Update doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst Co-authored-by: Christian Lorentzen --- .../sklearn.preprocessing/29307.enhancement.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst index 20c040a69bdc3..166db37830efc 100644 --- a/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst +++ b/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst @@ -1,4 +1,4 @@ -- The :class:preprocessing.PowerTransformer now returns a warning +- The :class:`preprocessing.PowerTransformer` now returns a warning when NaN values are encountered in the inverse transform, typically caused by extremely skewed data. By :user:Roberto Mourao \ No newline at end of file From 54fcae396e32b099ee5c11fff1b66088add4bbd6 Mon Sep 17 00:00:00 2001 From: Roberto Mourao <104142107+maf-rnmourao@users.noreply.github.com> Date: Tue, 2 Sep 2025 13:19:11 +0400 Subject: [PATCH 17/21] Update doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst Co-authored-by: Christian Lorentzen --- .../sklearn.preprocessing/29307.enhancement.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst index 166db37830efc..55fd869902d62 100644 --- a/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst +++ b/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst @@ -1,4 +1,4 @@ - The :class:`preprocessing.PowerTransformer` now returns a warning - when NaN values are encountered in the inverse transform, typically + when NaN values are encountered in the inverse transform, `inverse_transform`, typically caused by extremely skewed data. By :user:Roberto Mourao \ No newline at end of file From 519abe4e2c3ff2b011ed56a762833d0550ea8f2c Mon Sep 17 00:00:00 2001 From: Roberto Mourao <104142107+maf-rnmourao@users.noreply.github.com> Date: Tue, 2 Sep 2025 13:19:31 +0400 Subject: [PATCH 18/21] Update sklearn/preprocessing/_data.py Co-authored-by: Christian Lorentzen --- sklearn/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 69616932d3f57..0b30e7f726191 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3437,7 +3437,7 @@ def inverse_transform(self, X): ): warnings.warn( f"Some values in column {i} of the inverse-transformed data " - f"are NaN. This may be due to numerical issues in the " + f"are NaN. This may be caused by numerical issues in the " f"transformation process. Consider inspecting the input data " f"or preprocessing it before applying the transformation.", UserWarning, From 18bdbea1c96ef5cc63071d662a89ff6661181fac Mon Sep 17 00:00:00 2001 From: Roberto Mourao <104142107+maf-rnmourao@users.noreply.github.com> Date: Tue, 2 Sep 2025 13:19:47 +0400 Subject: [PATCH 19/21] Update sklearn/preprocessing/_data.py Co-authored-by: Christian Lorentzen --- sklearn/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 0b30e7f726191..966a9c6558d54 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3438,7 +3438,7 @@ def inverse_transform(self, X): warnings.warn( f"Some values in column {i} of the inverse-transformed data " f"are NaN. This may be caused by numerical issues in the " - f"transformation process. Consider inspecting the input data " + f"transformation process, e.g. extremely skewed data. Consider inspecting the input data " f"or preprocessing it before applying the transformation.", UserWarning, ) From fbf57985e51d95c9ba8892508422fd16685c4c36 Mon Sep 17 00:00:00 2001 From: Roberto Mourao <104142107+maf-rnmourao@users.noreply.github.com> Date: Tue, 2 Sep 2025 13:38:54 +0400 Subject: [PATCH 20/21] Update sklearn/preprocessing/tests/test_data.py Co-authored-by: Christian Lorentzen --- sklearn/preprocessing/tests/test_data.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 24c204ede65f0..6d32946b447a8 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2633,11 +2633,7 @@ def test_yeo_johnson_inverse_transform_warning(): # any value `psi` for which lambda * psi + 1 <= 0 will result in nan due # to lacking support psi = np.array([10]).reshape(-1, 1) - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") + with pytest.warns(UserWarning, match="Some values in column"): x_inv = trans.inverse_transform(psi).item() - assert np.isnan(x_inv) - # Check if warning was raised - assert len(w) > 0 - assert issubclass(w[-1].category, UserWarning) + assert np.isnan(x_inv) From 85b24842f2acf385d23d12e81a7e5996ac3d549b Mon Sep 17 00:00:00 2001 From: rnmourao Date: Tue, 2 Sep 2025 10:50:54 +0100 Subject: [PATCH 21/21] lint fixes --- sklearn/preprocessing/_data.py | 5 +++-- sklearn/preprocessing/tests/test_data.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 322ac70d7c8bb..3213dccab5a8f 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3511,8 +3511,9 @@ def inverse_transform(self, X): warnings.warn( f"Some values in column {i} of the inverse-transformed data " f"are NaN. This may be caused by numerical issues in the " - f"transformation process, e.g. extremely skewed data. Consider inspecting the input data " - f"or preprocessing it before applying the transformation.", + f"transformation process, e.g. extremely skewed data. " + f"Consider inspecting the input data or preprocessing it " + f"before applying the transformation.", UserWarning, ) return X diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 76d485a1e28b7..62edb701b3bcc 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2777,7 +2777,7 @@ def test_yeo_johnson_inverse_transform_warning(): assert np.isnan(x_inv) - + @pytest.mark.skipif( sp_version < parse_version("1.12"), reason="scipy version 1.12 required for stable yeo-johnson",