diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index b3225dccf3c6d..f17e6a94b1bc0 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -184,8 +184,6 @@ def fit(self, X, y, sample_weight=None): X_train, y_train, sample_weight_train = X, y, sample_weight X_val = y_val = sample_weight_val = None - has_missing_values = np.isnan(X_train).any(axis=0).astype(np.uint8) - # Bin the data # For ease of use of the API, the user-facing GBDT classes accept the # parameter max_bins, which doesn't take into account the bin for @@ -203,6 +201,11 @@ def fit(self, X, y, sample_weight=None): else: X_binned_val = None + # Uses binned data to check for missing values + has_missing_values = ( + X_binned_train == self.bin_mapper_.missing_values_bin_idx_).any( + axis=0).astype(np.uint8) + if self.verbose: print("Fitting gradient boosted rounds:")