diff --git a/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst new file mode 100644 index 0000000000000..55fd869902d62 --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.preprocessing/29307.enhancement.rst @@ -0,0 +1,4 @@ +- The :class:`preprocessing.PowerTransformer` now returns a warning + when NaN values are encountered in the inverse transform, `inverse_transform`, typically + caused by extremely skewed data. + By :user:Roberto Mourao \ No newline at end of file diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 99f7ac486e545..3213dccab5a8f 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -3501,9 +3501,21 @@ def inverse_transform(self, X): "yeo-johnson": self._yeo_johnson_inverse_transform, }[self.method] for i, lmbda in enumerate(self.lambdas_): - with np.errstate(invalid="ignore"): # hide NaN warnings - X[:, i] = inv_fun(X[:, i], lmbda) - + with warnings.catch_warnings(record=True) as captured_warnings: + with np.errstate(invalid="warn"): + X[:, i] = inv_fun(X[:, i], lmbda) + if any( + "invalid value encountered in power" in str(w.message) + for w in captured_warnings + ): + warnings.warn( + f"Some values in column {i} of the inverse-transformed data " + f"are NaN. This may be caused by numerical issues in the " + f"transformation process, e.g. extremely skewed data. " + f"Consider inspecting the input data or preprocessing it " + f"before applying the transformation.", + UserWarning, + ) return X def _yeo_johnson_inverse_transform(self, x, lmbda): diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 60a3af0d02d61..62edb701b3bcc 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -2760,6 +2760,24 @@ def test_power_transformer_constant_feature(standardize): assert_allclose(Xt_, X) +def test_yeo_johnson_inverse_transform_warning(): + """Check if a warning is triggered when the inverse transformations of the + Box-Cox and Yeo-Johnson transformers return NaN values.""" + trans = PowerTransformer(method="yeo-johnson") + x = np.array([1, 1, 1e10]).reshape(-1, 1) # extreme skew + trans.fit(x) + lmbda = trans.lambdas_[0] + assert lmbda < 0 # Should be negative + + # any value `psi` for which lambda * psi + 1 <= 0 will result in nan due + # to lacking support + psi = np.array([10]).reshape(-1, 1) + with pytest.warns(UserWarning, match="Some values in column"): + x_inv = trans.inverse_transform(psi).item() + + assert np.isnan(x_inv) + + @pytest.mark.skipif( sp_version < parse_version("1.12"), reason="scipy version 1.12 required for stable yeo-johnson",