From 1bc4e88811557c345ec342dc72d8aa61e307122c Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 8 Apr 2020 15:23:25 -0400 Subject: [PATCH 1/2] ENH Uses training data to find missing values --- .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index b3225dccf3c6d..ab0641881b191 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -184,8 +184,6 @@ def fit(self, X, y, sample_weight=None): X_train, y_train, sample_weight_train = X, y, sample_weight X_val = y_val = sample_weight_val = None - has_missing_values = np.isnan(X_train).any(axis=0).astype(np.uint8) - # Bin the data # For ease of use of the API, the user-facing GBDT classes accept the # parameter max_bins, which doesn't take into account the bin for @@ -203,6 +201,10 @@ def fit(self, X, y, sample_weight=None): else: X_binned_val = None + # Uses binned data to check for missing values + has_missing_values = ( + X_binned_train == self.bin_mapper_.missing_values_bin_idx_).any( + axis=0).astype(np.uint8) if self.verbose: print("Fitting gradient boosted rounds:") From 396d6e25332a5ea51b63f5a3b1fdb61a5277f772 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 8 Apr 2020 15:24:00 -0400 Subject: [PATCH 2/2] CLN Uses the binned data to find missing value --- sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index ab0641881b191..f17e6a94b1bc0 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -205,6 +205,7 @@ def fit(self, X, y, sample_weight=None): has_missing_values = ( X_binned_train == self.bin_mapper_.missing_values_bin_idx_).any( axis=0).astype(np.uint8) + if self.verbose: print("Fitting gradient boosted rounds:")