scikit-learn · jnothman · Jun 21, 2018 · Jun 5, 2018 · Jun 5, 2018 · Jun 8, 2018
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -256,6 +256,10 @@ Preprocessing
   ignore and pass-through NaN values.
   :issue:`11206` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- :class:`preprocessing.PowerTransformer` and
+  :func:`preprocessing.power_transform` ignore and pass-through NaN values.
+  :issue:`11306` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Model evaluation and meta-estimators
 
 - A scorer based on :func:`metrics.brier_score_loss` is also available.

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
@@ -24,7 +24,7 @@
 from ..utils import check_array
 from ..utils.extmath import row_norms
 from ..utils.extmath import _incremental_mean_and_var
-from ..utils.fixes import nanpercentile
+from ..utils.fixes import boxcox, nanpercentile
 from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
                                       inplace_csr_row_normalize_l2)
 from ..utils.sparsefuncs import (inplace_column_scale,
@@ -836,6 +836,9 @@ class MaxAbsScaler(BaseEstimator, TransformerMixin):
 
     Notes
     -----
+    NaNs are treated as missing values: disregarded in fit, and maintained in
+    transform.
+
     For a comparison of the different scalers, transformers, and normalizers,
     see :ref:`examples/preprocessing/plot_all_scaling.py
     <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
@@ -973,6 +976,9 @@ def maxabs_scale(X, axis=0, copy=True):
 
     Notes
     -----
+    NaNs are treated as missing values: disregarded to compute the statistics,
+    and maintained during the data transformation.
+
     For a comparison of the different scalers, transformers, and normalizers,
     see :ref:`examples/preprocessing/plot_all_scaling.py
     <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
@@ -2429,6 +2435,9 @@ class PowerTransformer(BaseEstimator, TransformerMixin):
 
     Notes
     -----
+    NaNs are treated as missing values: disregarded in fit, and maintained in
+    transform.
+
     For a comparison of the different scalers, transformers, and normalizers,
     see :ref:`examples/preprocessing/plot_all_scaling.py
     <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
@@ -2468,7 +2477,10 @@ def fit(self, X, y=None):
         transformed = []
 
         for col in X.T:
-            col_trans, lmbda = stats.boxcox(col, lmbda=None)
+            # the computation of lambda is influenced by NaNs and we need to
+            # get rid of them to compute them.
+            _, lmbda = stats.boxcox(col[~np.isnan(col)], lmbda=None)
+            col_trans = boxcox(col, lmbda)
             self.lambdas_.append(lmbda)
             transformed.append(col_trans)
 
@@ -2493,7 +2505,7 @@ def transform(self, X):
         X = self._check_input(X, check_positive=True, check_shape=True)
 
         for i, lmbda in enumerate(self.lambdas_):
-            X[:, i] = stats.boxcox(X[:, i], lmbda=lmbda)
+            X[:, i] = boxcox(X[:, i], lmbda)
 
         if self.standardize:
             X = self._scaler.transform(X)
@@ -2548,9 +2560,10 @@ def _check_input(self, X, check_positive=False, check_shape=False,
         check_method : bool
             If True, check that the transformation method is valid.
         """
-        X = check_array(X, ensure_2d=True, dtype=FLOAT_DTYPES, copy=self.copy)
+        X = check_array(X, ensure_2d=True, dtype=FLOAT_DTYPES, copy=self.copy,
+                        force_all_finite='allow-nan')
 
-        if check_positive and self.method == 'box-cox' and np.any(X <= 0):
+        if check_positive and self.method == 'box-cox' and np.nanmin(X) <= 0:
             raise ValueError("The Box-Cox transformation can only be applied "
                              "to strictly positive data")
 
@@ -2622,6 +2635,9 @@ def power_transform(X, method='box-cox', standardize=True, copy=True):
 
     Notes
     -----
+    NaNs are treated as missing values: disregarded to compute the statistics,
+    and maintained during the data transformation.
+
     For a comparison of the different scalers, transformers, and normalizers,
     see :ref:`examples/preprocessing/plot_all_scaling.py
     <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.

diff --git a/sklearn/preprocessing/tests/test_common.py b/sklearn/preprocessing/tests/test_common.py
@@ -10,10 +10,12 @@
 
 from sklearn.preprocessing import minmax_scale
 from sklearn.preprocessing import scale
+from sklearn.preprocessing import power_transform
 from sklearn.preprocessing import quantile_transform
 
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import PowerTransformer
 from sklearn.preprocessing import QuantileTransformer
 
 from sklearn.utils.testing import assert_array_equal
@@ -28,19 +30,22 @@ def _get_valid_samples_by_column(X, col):
 
 
 @pytest.mark.parametrize(
-    "est, func, support_sparse",
-    [(MinMaxScaler(), minmax_scale, False),
-     (StandardScaler(), scale, False),
-     (StandardScaler(with_mean=False), scale, True),
-     (QuantileTransformer(n_quantiles=10), quantile_transform, True)]
+    "est, func, support_sparse, strictly_positive",
+    [(MinMaxScaler(), minmax_scale, False, False),
+     (StandardScaler(), scale, False, False),
+     (StandardScaler(with_mean=False), scale, True, False),
+     (PowerTransformer(), power_transform, False, True),
+     (QuantileTransformer(n_quantiles=10), quantile_transform, True, False)]
 )
-def test_missing_value_handling(est, func, support_sparse):
+def test_missing_value_handling(est, func, support_sparse, strictly_positive):
     # check that the preprocessing method let pass nan
     rng = np.random.RandomState(42)
     X = iris.data.copy()
     n_missing = 50
     X[rng.randint(X.shape[0], size=n_missing),
       rng.randint(X.shape[1], size=n_missing)] = np.nan
+    if strictly_positive:
+        X += np.nanmin(X) + 0.1
     X_train, X_test = train_test_split(X, random_state=1)
     # sanity check
     assert not np.all(np.isnan(X_train), axis=0).any()

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -78,7 +78,8 @@
                 'RandomForestRegressor', 'Ridge', 'RidgeCV']
 
 ALLOW_NAN = ['Imputer', 'SimpleImputer', 'MICEImputer',
-             'MinMaxScaler', 'StandardScaler', 'QuantileTransformer']
+             'MinMaxScaler', 'StandardScaler', 'PowerTransformer',
+             'QuantileTransformer']
 
 
 def _yield_non_meta_checks(name, estimator):

diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
@@ -70,6 +70,17 @@ def divide(x1, x2, out=None, dtype=None):
         return out
 
 
+# boxcox ignore NaN in scipy.special.boxcox after 0.14
+if sp_version < (0, 14):
+    from scipy import stats
+
+    def boxcox(x, lmbda):
+        with np.errstate(invalid='ignore'):
+            return stats.boxcox(x, lmbda)
+else:
+    from scipy.special import boxcox  # noqa
+
+
 if sp_version < (0, 15):
     # Backport fix for scikit-learn/scikit-learn#2986 / scipy/scipy#4142
     from ._scipy_sparse_lsqr_backport import lsqr as sparse_lsqr