diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 576f88e66a71e..2f7e7590a1c6b 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -262,6 +262,13 @@ Changelog :mod:`sklearn.ensemble` ....................... +- |Enhancement| :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and + :class:`~sklearn.ensemble.HistGradientBoostingRegressor` take cgroups quotas + into account when deciding the number of threads used by OpenMP. This + avoids performance problems caused by over-subscription when using those + classes in a docker container for instance. :pr:`20477` + by `Thomas Fan`_. + - |Fix| Do not allow to compute out-of-bag (OOB) score in :class:`ensemble.RandomForestClassifier` and :class:`ensemble.ExtraTreesClassifier` with multiclass-multioutput target diff --git a/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx b/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx index fc564c9a126ab..5f5dd68935fd4 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx @@ -22,6 +22,7 @@ np.import_array() def _map_to_bins(const X_DTYPE_C [:, :] data, list binning_thresholds, const unsigned char missing_values_bin_idx, + int n_threads, X_BINNED_DTYPE_C [::1, :] binned): """Bin continuous and categorical values to discrete integer-coded levels. @@ -35,6 +36,8 @@ def _map_to_bins(const X_DTYPE_C [:, :] data, binning_thresholds : list of arrays For each feature, stores the increasing numeric values that are used to separate the bins. + n_threads : int + Number of OpenMP threads to use. binned : ndarray, shape (n_samples, n_features) Output array, must be fortran aligned. """ @@ -45,12 +48,14 @@ def _map_to_bins(const X_DTYPE_C [:, :] data, _map_col_to_bins(data[:, feature_idx], binning_thresholds[feature_idx], missing_values_bin_idx, + n_threads, binned[:, feature_idx]) cdef void _map_col_to_bins(const X_DTYPE_C [:] data, const X_DTYPE_C [:] binning_thresholds, const unsigned char missing_values_bin_idx, + int n_threads, X_BINNED_DTYPE_C [:] binned): """Binary search to find the bin index for each value in the data.""" cdef: @@ -59,8 +64,8 @@ cdef void _map_col_to_bins(const X_DTYPE_C [:] data, int right int middle - for i in prange(data.shape[0], schedule='static', nogil=True): - + for i in prange(data.shape[0], schedule='static', nogil=True, + num_threads=n_threads): if isnan(data[i]): binned[i] = missing_values_bin_idx else: diff --git a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx index 18f1b6a365421..f684ca57e560d 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx @@ -18,7 +18,9 @@ np.import_array() def _update_raw_predictions( Y_DTYPE_C [::1] raw_predictions, # OUT - grower): + grower, + n_threads, +): """Update raw_predictions with the predictions of the newest tree. This is equivalent to (and much faster than): @@ -42,7 +44,7 @@ def _update_raw_predictions( values = np.array([leaf.value for leaf in leaves], dtype=Y_DTYPE) _update_raw_predictions_helper(raw_predictions, starts, stops, partition, - values) + values, n_threads) cdef inline void _update_raw_predictions_helper( @@ -50,13 +52,16 @@ cdef inline void _update_raw_predictions_helper( const unsigned int [::1] starts, const unsigned int [::1] stops, const unsigned int [::1] partition, - const Y_DTYPE_C [::1] values): + const Y_DTYPE_C [::1] values, + int n_threads, +): cdef: unsigned int position int leaf_idx int n_leaves = starts.shape[0] - for leaf_idx in prange(n_leaves, schedule='static', nogil=True): + for leaf_idx in prange(n_leaves, schedule='static', nogil=True, + num_threads=n_threads): for position in range(starts[leaf_idx], stops[leaf_idx]): raw_predictions[partition[position]] += values[leaf_idx] diff --git a/sklearn/ensemble/_hist_gradient_boosting/_loss.pyx b/sklearn/ensemble/_hist_gradient_boosting/_loss.pyx index 4114cd24aa8df..da900e28c6457 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_loss.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_loss.pyx @@ -21,14 +21,16 @@ np.import_array() def _update_gradients_least_squares( G_H_DTYPE_C [::1] gradients, # OUT const Y_DTYPE_C [::1] y_true, # IN - const Y_DTYPE_C [::1] raw_predictions): # IN + const Y_DTYPE_C [::1] raw_predictions, # IN + int n_threads, # IN +): cdef: int n_samples int i n_samples = raw_predictions.shape[0] - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): # Note: a more correct expression is 2 * (raw_predictions - y_true) # but since we use 1 for the constant hessian value (and not 2) this # is strictly equivalent for the leaves values. @@ -40,14 +42,16 @@ def _update_gradients_hessians_least_squares( G_H_DTYPE_C [::1] hessians, # OUT const Y_DTYPE_C [::1] y_true, # IN const Y_DTYPE_C [::1] raw_predictions, # IN - const Y_DTYPE_C [::1] sample_weight): # IN + const Y_DTYPE_C [::1] sample_weight, # IN + int n_threads, # IN +): cdef: int n_samples int i n_samples = raw_predictions.shape[0] - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): # Note: a more correct exp is 2 * (raw_predictions - y_true) * sample_weight # but since we use 1 for the constant hessian value (and not 2) this # is strictly equivalent for the leaves values. @@ -60,14 +64,15 @@ def _update_gradients_hessians_least_absolute_deviation( G_H_DTYPE_C [::1] hessians, # OUT const Y_DTYPE_C [::1] y_true, # IN const Y_DTYPE_C [::1] raw_predictions, # IN - const Y_DTYPE_C [::1] sample_weight): # IN - + const Y_DTYPE_C [::1] sample_weight, # IN + int n_threads, # IN +): cdef: int n_samples int i n_samples = raw_predictions.shape[0] - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): # gradient = sign(raw_predicition - y_pred) * sample_weight gradients[i] = sample_weight[i] * (2 * (y_true[i] - raw_predictions[i] < 0) - 1) @@ -77,14 +82,15 @@ def _update_gradients_hessians_least_absolute_deviation( def _update_gradients_least_absolute_deviation( G_H_DTYPE_C [::1] gradients, # OUT const Y_DTYPE_C [::1] y_true, # IN - const Y_DTYPE_C [::1] raw_predictions): # IN - + const Y_DTYPE_C [::1] raw_predictions, # IN + int n_threads, # IN +): cdef: int n_samples int i n_samples = raw_predictions.shape[0] - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): # gradient = sign(raw_predicition - y_pred) gradients[i] = 2 * (y_true[i] - raw_predictions[i] < 0) - 1 @@ -94,8 +100,9 @@ def _update_gradients_hessians_poisson( G_H_DTYPE_C [::1] hessians, # OUT const Y_DTYPE_C [::1] y_true, # IN const Y_DTYPE_C [::1] raw_predictions, # IN - const Y_DTYPE_C [::1] sample_weight): # IN - + const Y_DTYPE_C [::1] sample_weight, # IN + int n_threads, # IN +): cdef: int n_samples int i @@ -103,14 +110,14 @@ def _update_gradients_hessians_poisson( n_samples = raw_predictions.shape[0] if sample_weight is None: - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): # Note: We use only half of the deviance loss. Therefore, there is # no factor of 2. y_pred = exp(raw_predictions[i]) gradients[i] = (y_pred - y_true[i]) hessians[i] = y_pred else: - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): # Note: We use only half of the deviance loss. Therefore, there is # no factor of 2. y_pred = exp(raw_predictions[i]) @@ -123,7 +130,9 @@ def _update_gradients_hessians_binary_crossentropy( G_H_DTYPE_C [::1] hessians, # OUT const Y_DTYPE_C [::1] y_true, # IN const Y_DTYPE_C [::1] raw_predictions, # IN - const Y_DTYPE_C [::1] sample_weight): # IN + const Y_DTYPE_C [::1] sample_weight, # IN + int n_threads, # IN +): cdef: int n_samples Y_DTYPE_C p_i # proba that ith sample belongs to positive class @@ -131,12 +140,12 @@ def _update_gradients_hessians_binary_crossentropy( n_samples = raw_predictions.shape[0] if sample_weight is None: - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): p_i = _cexpit(raw_predictions[i]) gradients[i] = p_i - y_true[i] hessians[i] = p_i * (1. - p_i) else: - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): p_i = _cexpit(raw_predictions[i]) gradients[i] = (p_i - y_true[i]) * sample_weight[i] hessians[i] = p_i * (1. - p_i) * sample_weight[i] @@ -147,7 +156,9 @@ def _update_gradients_hessians_categorical_crossentropy( G_H_DTYPE_C [:, ::1] hessians, # OUT const Y_DTYPE_C [::1] y_true, # IN const Y_DTYPE_C [:, ::1] raw_predictions, # IN - const Y_DTYPE_C [::1] sample_weight): # IN + const Y_DTYPE_C [::1] sample_weight, # IN + int n_threads, # IN +): cdef: int prediction_dim = raw_predictions.shape[0] int n_samples = raw_predictions.shape[1] @@ -160,7 +171,7 @@ def _update_gradients_hessians_categorical_crossentropy( Y_DTYPE_C p_i_k if sample_weight is None: - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): # first compute softmaxes of sample i for each class for k in range(prediction_dim): p[i, k] = raw_predictions[k, i] # prepare softmax @@ -171,7 +182,7 @@ def _update_gradients_hessians_categorical_crossentropy( gradients[k, i] = p_i_k - (y_true[i] == k) hessians[k, i] = p_i_k * (1. - p_i_k) else: - for i in prange(n_samples, schedule='static', nogil=True): + for i in prange(n_samples, schedule='static', nogil=True, num_threads=n_threads): # first compute softmaxes of sample i for each class for k in range(prediction_dim): p[i, k] = raw_predictions[k, i] # prepare softmax diff --git a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx index d1bb98d82c936..a6b2f8b90de8e 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx @@ -30,12 +30,14 @@ def _predict_from_raw_data( # raw data = non-binned data const BITSET_INNER_DTYPE_C [:, ::1] raw_left_cat_bitsets, const BITSET_INNER_DTYPE_C [:, ::1] known_cat_bitsets, const unsigned int [::1] f_idx_map, + int n_threads, Y_DTYPE_C [:] out): cdef: int i - for i in prange(numeric_data.shape[0], schedule='static', nogil=True): + for i in prange(numeric_data.shape[0], schedule='static', nogil=True, + num_threads=n_threads): out[i] = _predict_one_from_raw_data( nodes, numeric_data, raw_left_cat_bitsets, known_cat_bitsets, @@ -95,12 +97,14 @@ def _predict_from_binned_data( const X_BINNED_DTYPE_C [:, :] binned_data, BITSET_INNER_DTYPE_C [:, :] binned_left_cat_bitsets, const unsigned char missing_values_bin_idx, + int n_threads, Y_DTYPE_C [:] out): cdef: int i - for i in prange(binned_data.shape[0], schedule='static', nogil=True): + for i in prange(binned_data.shape[0], schedule='static', nogil=True, + num_threads=n_threads): out[i] = _predict_one_from_binned_data(nodes, binned_data, binned_left_cat_bitsets, i, diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py index f8c1d3553e2c5..c76ee270b2270 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py @@ -12,6 +12,7 @@ from ...utils import check_random_state, check_array from ...base import BaseEstimator, TransformerMixin from ...utils.validation import check_is_fitted +from ...utils._openmp_helpers import _openmp_effective_n_threads from ._binning import _map_to_bins from .common import X_DTYPE, X_BINNED_DTYPE, ALMOST_INF, X_BITSET_INNER_DTYPE from ._bitset import set_bitset_memoryview @@ -115,6 +116,11 @@ class _BinMapper(TransformerMixin, BaseEstimator): Pass an int for reproducible output across multiple function calls. See :term: `Glossary `. + n_threads : int, default=None + Number of OpenMP threads to use. `_openmp_effective_n_threads` is called + to determine the effective number of threads use, which takes cgroups CPU + quotes into account. See the docstring of `_openmp_effective_n_threads` + for details. Attributes ---------- @@ -151,12 +157,14 @@ def __init__( is_categorical=None, known_categories=None, random_state=None, + n_threads=None, ): self.n_bins = n_bins self.subsample = subsample self.is_categorical = is_categorical self.known_categories = known_categories self.random_state = random_state + self.n_threads = n_threads def fit(self, X, y=None): """Fit data X by computing the binning thresholds. @@ -264,8 +272,12 @@ def transform(self, X): "This estimator was fitted with {} features but {} got passed " "to transform()".format(self.n_bins_non_missing_.shape[0], X.shape[1]) ) + + n_threads = _openmp_effective_n_threads(self.n_threads) binned = np.zeros_like(X, dtype=X_BINNED_DTYPE, order="F") - _map_to_bins(X, self.bin_thresholds_, self.missing_values_bin_idx_, binned) + _map_to_bins( + X, self.bin_thresholds_, self.missing_values_bin_idx_, n_threads, binned + ) return binned def make_known_categories_bitsets(self): diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 09dcfa779e756..1a98326a36f89 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -14,6 +14,7 @@ check_consistent_length, _check_sample_weight, ) +from ...utils._openmp_helpers import _openmp_effective_n_threads from ...utils.multiclass import check_classification_targets from ...metrics import check_scoring from ...model_selection import train_test_split @@ -263,8 +264,14 @@ def fit(self, X, y, sample_weight=None): # data. self._in_fit = True + # `_openmp_effective_n_threads` is used to take cgroups CPU quotes + # into account when determine the maximum number of threads to use. + n_threads = _openmp_effective_n_threads() + if isinstance(self.loss, str): - self._loss = self._get_loss(sample_weight=sample_weight) + self._loss = self._get_loss( + sample_weight=sample_weight, n_threads=n_threads + ) elif isinstance(self.loss, BaseLoss): self._loss = self.loss @@ -328,6 +335,7 @@ def fit(self, X, y, sample_weight=None): is_categorical=self.is_categorical_, known_categories=known_categories, random_state=self._random_seed, + n_threads=n_threads, ) X_binned_train = self._bin_data(X_train, is_training_data=True) if X_val is not None: @@ -449,9 +457,11 @@ def fit(self, X, y, sample_weight=None): self.validation_score_ = self.validation_score_.tolist() # Compute raw predictions - raw_predictions = self._raw_predict(X_binned_train) + raw_predictions = self._raw_predict(X_binned_train, n_threads=n_threads) if self.do_early_stopping_ and self._use_validation_data: - raw_predictions_val = self._raw_predict(X_binned_val) + raw_predictions_val = self._raw_predict( + X_binned_val, n_threads=n_threads + ) else: raw_predictions_val = None @@ -510,6 +520,7 @@ def fit(self, X, y, sample_weight=None): min_samples_leaf=self.min_samples_leaf, l2_regularization=self.l2_regularization, shrinkage=self.learning_rate, + n_threads=n_threads, ) grower.grow() @@ -530,7 +541,7 @@ def fit(self, X, y, sample_weight=None): # Update raw_predictions with the predictions of the newly # created tree. tic_pred = time() - _update_raw_predictions(raw_predictions[k, :], grower) + _update_raw_predictions(raw_predictions[k, :], grower, n_threads) toc_pred = time() acc_prediction_time += toc_pred - tic_pred @@ -541,7 +552,9 @@ def fit(self, X, y, sample_weight=None): if self._use_validation_data: for k, pred in enumerate(self._predictors[-1]): raw_predictions_val[k, :] += pred.predict_binned( - X_binned_val, self._bin_mapper.missing_values_bin_idx_ + X_binned_val, + self._bin_mapper.missing_values_bin_idx_, + n_threads, ) should_early_stop = self._check_early_stopping_loss( @@ -809,13 +822,18 @@ def _print_iteration_stats(self, iteration_start_time): print(log_msg) - def _raw_predict(self, X): + def _raw_predict(self, X, n_threads=None): """Return the sum of the leaves values over all predictors. Parameters ---------- X : array-like of shape (n_samples, n_features) The input samples. + n_threads : int, default=None + Number of OpenMP threads to use. `_openmp_effective_n_threads` is called + to determine the effective number of threads use, which takes cgroups CPU + quotes into account. See the docstring of `_openmp_effective_n_threads` + for details. Returns ------- @@ -837,10 +855,17 @@ def _raw_predict(self, X): dtype=self._baseline_prediction.dtype, ) raw_predictions += self._baseline_prediction - self._predict_iterations(X, self._predictors, raw_predictions, is_binned) + + # We intentionally decouple the number of threads used at prediction + # time from the number of threads used at fit time because the model + # can be deployed on a different machine for prediction purposes. + n_threads = _openmp_effective_n_threads(n_threads) + self._predict_iterations( + X, self._predictors, raw_predictions, is_binned, n_threads + ) return raw_predictions - def _predict_iterations(self, X, predictors, raw_predictions, is_binned): + def _predict_iterations(self, X, predictors, raw_predictions, is_binned, n_threads): """Add the predictions of the predictors to raw_predictions.""" if not is_binned: ( @@ -853,13 +878,15 @@ def _predict_iterations(self, X, predictors, raw_predictions, is_binned): if is_binned: predict = partial( predictor.predict_binned, - missing_values_bin_idx=self._bin_mapper.missing_values_bin_idx_, # noqa + missing_values_bin_idx=self._bin_mapper.missing_values_bin_idx_, + n_threads=n_threads, ) else: predict = partial( predictor.predict, known_cat_bitsets=known_cat_bitsets, f_idx_map=f_idx_map, + n_threads=n_threads, ) raw_predictions[k, :] += predict(X) @@ -894,12 +921,18 @@ def _staged_raw_predict(self, X): dtype=self._baseline_prediction.dtype, ) raw_predictions += self._baseline_prediction + + # We intentionally decouple the number of threads used at prediction + # time from the number of threads used at fit time because the model + # can be deployed on a different machine for prediction purposes. + n_threads = _openmp_effective_n_threads() for iteration in range(len(self._predictors)): self._predict_iterations( X, self._predictors[iteration : iteration + 1], raw_predictions, is_binned=False, + n_threads=n_threads, ) yield raw_predictions.copy() @@ -949,7 +982,7 @@ def _more_tags(self): return {"allow_nan": True} @abstractmethod - def _get_loss(self, sample_weight): + def _get_loss(self, sample_weight, n_threads): pass @abstractmethod @@ -1241,7 +1274,7 @@ def _encode_y(self, y): ) return y - def _get_loss(self, sample_weight): + def _get_loss(self, sample_weight, n_threads): # TODO: Remove in v1.2 if self.loss == "least_squares": warnings.warn( @@ -1250,7 +1283,9 @@ def _get_loss(self, sample_weight): "equivalent.", FutureWarning, ) - return _LOSSES["squared_error"](sample_weight=sample_weight) + return _LOSSES["squared_error"]( + sample_weight=sample_weight, n_threads=n_threads + ) elif self.loss == "least_absolute_deviation": warnings.warn( "The loss 'least_absolute_deviation' was deprecated in v1.0 " @@ -1258,9 +1293,11 @@ def _get_loss(self, sample_weight): "which is equivalent.", FutureWarning, ) - return _LOSSES["absolute_error"](sample_weight=sample_weight) + return _LOSSES["absolute_error"]( + sample_weight=sample_weight, n_threads=n_threads + ) - return _LOSSES[self.loss](sample_weight=sample_weight) + return _LOSSES[self.loss](sample_weight=sample_weight, n_threads=n_threads) class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting): @@ -1610,7 +1647,7 @@ def _encode_y(self, y): encoded_y = encoded_y.astype(Y_DTYPE, copy=False) return encoded_y - def _get_loss(self, sample_weight): + def _get_loss(self, sample_weight, n_threads): if self.loss == "categorical_crossentropy" and self.n_trees_per_iteration_ == 1: raise ValueError( "'categorical_crossentropy' is not suitable for " @@ -1620,8 +1657,12 @@ def _get_loss(self, sample_weight): if self.loss == "auto": if self.n_trees_per_iteration_ == 1: - return _LOSSES["binary_crossentropy"](sample_weight=sample_weight) + return _LOSSES["binary_crossentropy"]( + sample_weight=sample_weight, n_threads=n_threads + ) else: - return _LOSSES["categorical_crossentropy"](sample_weight=sample_weight) + return _LOSSES["categorical_crossentropy"]( + sample_weight=sample_weight, n_threads=n_threads + ) - return _LOSSES[self.loss](sample_weight=sample_weight) + return _LOSSES[self.loss](sample_weight=sample_weight, n_threads=n_threads) diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py index 650c38f3ee3aa..1733b5745f8a2 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py @@ -20,6 +20,8 @@ from .common import Y_DTYPE from .common import MonotonicConstraint from ._bitset import set_raw_bitset_from_binned_bitset +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + EPS = np.finfo(Y_DTYPE).eps # to avoid zero division errors @@ -175,6 +177,11 @@ class TreeGrower: shrinkage : float, default=1. The shrinkage parameter to apply to the leaves values, also known as learning rate. + n_threads : int, default=None + Number of OpenMP threads to use. `_openmp_effective_n_threads` is called + to determine the effective number of threads use, which takes cgroups CPU + quotes into account. See the docstring of `_openmp_effective_n_threads` + for details. """ def __init__( @@ -194,6 +201,7 @@ def __init__( l2_regularization=0.0, min_hessian_to_split=1e-3, shrinkage=1.0, + n_threads=None, ): self._validate_parameters( @@ -205,6 +213,7 @@ def __init__( l2_regularization, min_hessian_to_split, ) + n_threads = _openmp_effective_n_threads(n_threads) if n_bins_non_missing is None: n_bins_non_missing = n_bins - 1 @@ -257,7 +266,7 @@ def __init__( hessians_are_constant = hessians.shape[0] == 1 self.histogram_builder = HistogramBuilder( - X_binned, n_bins, gradients, hessians, hessians_are_constant + X_binned, n_bins, gradients, hessians, hessians_are_constant, n_threads ) missing_values_bin_idx = n_bins - 1 self.splitter = Splitter( @@ -272,6 +281,7 @@ def __init__( min_samples_leaf, min_gain_to_split, hessians_are_constant, + n_threads, ) self.n_bins_non_missing = n_bins_non_missing self.missing_values_bin_idx = missing_values_bin_idx @@ -286,6 +296,7 @@ def __init__( self.X_binned = X_binned self.min_gain_to_split = min_gain_to_split self.shrinkage = shrinkage + self.n_threads = n_threads self.splittable_nodes = [] self.finalized_leaves = [] self.total_find_split_time = 0.0 # time spent finding the best splits @@ -366,11 +377,11 @@ def _intilialize_root(self, gradients, hessians, hessians_are_constant): """Initialize root node and finalize it if needed.""" n_samples = self.X_binned.shape[0] depth = 0 - sum_gradients = sum_parallel(gradients) + sum_gradients = sum_parallel(gradients, self.n_threads) if self.histogram_builder.hessians_are_constant: sum_hessians = hessians[0] * n_samples else: - sum_hessians = sum_parallel(hessians) + sum_hessians = sum_parallel(hessians, self.n_threads) self.root = TreeNode( depth=depth, sample_indices=self.splitter.partition, diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx index 625aecc4f09f5..068935230e900 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx @@ -85,11 +85,13 @@ cdef class HistogramBuilder: G_H_DTYPE_C [::1] ordered_gradients G_H_DTYPE_C [::1] ordered_hessians unsigned char hessians_are_constant + int n_threads def __init__(self, const X_BINNED_DTYPE_C [::1, :] X_binned, unsigned int n_bins, G_H_DTYPE_C [::1] gradients, G_H_DTYPE_C [::1] hessians, - unsigned char hessians_are_constant): + unsigned char hessians_are_constant, + int n_threads): self.X_binned = X_binned self.n_features = X_binned.shape[1] @@ -102,6 +104,7 @@ cdef class HistogramBuilder: self.ordered_gradients = gradients.copy() self.ordered_hessians = hessians.copy() self.hessians_are_constant = hessians_are_constant + self.n_threads = n_threads def compute_histograms_brute( HistogramBuilder self, @@ -137,6 +140,7 @@ cdef class HistogramBuilder: shape=(self.n_features, self.n_bins), dtype=HISTOGRAM_DTYPE ) + int n_threads = self.n_threads with nogil: n_samples = sample_indices.shape[0] @@ -146,14 +150,17 @@ cdef class HistogramBuilder: # cache hit. if sample_indices.shape[0] != gradients.shape[0]: if hessians_are_constant: - for i in prange(n_samples, schedule='static'): + for i in prange(n_samples, schedule='static', + num_threads=n_threads): ordered_gradients[i] = gradients[sample_indices[i]] else: - for i in prange(n_samples, schedule='static'): + for i in prange(n_samples, schedule='static', + num_threads=n_threads): ordered_gradients[i] = gradients[sample_indices[i]] ordered_hessians[i] = hessians[sample_indices[i]] - for feature_idx in prange(n_features, schedule='static'): + for feature_idx in prange(n_features, schedule='static', + num_threads=n_threads): # Compute histogram of each feature self._compute_histogram_brute_single_feature( feature_idx, sample_indices, histograms) @@ -238,8 +245,10 @@ cdef class HistogramBuilder: shape=(self.n_features, self.n_bins), dtype=HISTOGRAM_DTYPE ) + int n_threads = self.n_threads - for feature_idx in prange(n_features, schedule='static', nogil=True): + for feature_idx in prange(n_features, schedule='static', nogil=True, + num_threads=n_threads): # Compute histogram of each feature _subtract_histograms(feature_idx, self.n_bins, diff --git a/sklearn/ensemble/_hist_gradient_boosting/loss.py b/sklearn/ensemble/_hist_gradient_boosting/loss.py index d0bf2d969cf88..c5870f97f900e 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/loss.py +++ b/sklearn/ensemble/_hist_gradient_boosting/loss.py @@ -20,14 +20,16 @@ from ._loss import _update_gradients_hessians_binary_crossentropy from ._loss import _update_gradients_hessians_categorical_crossentropy from ._loss import _update_gradients_hessians_poisson +from ...utils._openmp_helpers import _openmp_effective_n_threads from ...utils.stats import _weighted_percentile class BaseLoss(ABC): """Base class for a loss.""" - def __init__(self, hessians_are_constant): + def __init__(self, hessians_are_constant, n_threads=None): self.hessians_are_constant = hessians_are_constant + self.n_threads = _openmp_effective_n_threads(n_threads) def __call__(self, y_true, raw_predictions, sample_weight): """Return the weighted average loss""" @@ -157,11 +159,13 @@ class LeastSquares(BaseLoss): with what is done in LightGBM). """ - def __init__(self, sample_weight): + def __init__(self, sample_weight, n_threads=None): # If sample weights are provided, the hessians and gradients # are multiplied by sample_weight, which means the hessians are # equal to sample weights. - super().__init__(hessians_are_constant=sample_weight is None) + super().__init__( + hessians_are_constant=sample_weight is None, n_threads=n_threads + ) def pointwise_loss(self, y_true, raw_predictions): # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to @@ -185,11 +189,18 @@ def update_gradients_and_hessians( raw_predictions = raw_predictions.reshape(-1) gradients = gradients.reshape(-1) if sample_weight is None: - _update_gradients_least_squares(gradients, y_true, raw_predictions) + _update_gradients_least_squares( + gradients, y_true, raw_predictions, self.n_threads + ) else: hessians = hessians.reshape(-1) _update_gradients_hessians_least_squares( - gradients, hessians, y_true, raw_predictions, sample_weight + gradients, + hessians, + y_true, + raw_predictions, + sample_weight, + self.n_threads, ) @@ -201,11 +212,13 @@ class LeastAbsoluteDeviation(BaseLoss): loss(x_i) = |y_true_i - raw_pred_i| """ - def __init__(self, sample_weight): + def __init__(self, sample_weight, n_threads=None): # If sample weights are provided, the hessians and gradients # are multiplied by sample_weight, which means the hessians are # equal to sample weights. - super().__init__(hessians_are_constant=sample_weight is None) + super().__init__( + hessians_are_constant=sample_weight is None, n_threads=n_threads + ) # This variable indicates whether the loss requires the leaves values to # be updated once the tree has been trained. The trees are trained to @@ -243,12 +256,20 @@ def update_gradients_and_hessians( gradients = gradients.reshape(-1) if sample_weight is None: _update_gradients_least_absolute_deviation( - gradients, y_true, raw_predictions + gradients, + y_true, + raw_predictions, + self.n_threads, ) else: hessians = hessians.reshape(-1) _update_gradients_hessians_least_absolute_deviation( - gradients, hessians, y_true, raw_predictions, sample_weight + gradients, + hessians, + y_true, + raw_predictions, + sample_weight, + self.n_threads, ) def update_leaves_values(self, grower, y_true, raw_predictions, sample_weight): @@ -285,8 +306,8 @@ class Poisson(BaseLoss): the computation of the gradients. """ - def __init__(self, sample_weight): - super().__init__(hessians_are_constant=False) + def __init__(self, sample_weight, n_threads=None): + super().__init__(hessians_are_constant=False, n_threads=n_threads) inverse_link_function = staticmethod(np.exp) @@ -318,7 +339,12 @@ def update_gradients_and_hessians( gradients = gradients.reshape(-1) hessians = hessians.reshape(-1) _update_gradients_hessians_poisson( - gradients, hessians, y_true, raw_predictions, sample_weight + gradients, + hessians, + y_true, + raw_predictions, + sample_weight, + self.n_threads, ) @@ -334,8 +360,8 @@ class BinaryCrossEntropy(BaseLoss): section 4.4.1 (about logistic regression). """ - def __init__(self, sample_weight): - super().__init__(hessians_are_constant=False) + def __init__(self, sample_weight, n_threads=None): + super().__init__(hessians_are_constant=False, n_threads=n_threads) inverse_link_function = staticmethod(expit) @@ -370,7 +396,7 @@ def update_gradients_and_hessians( gradients = gradients.reshape(-1) hessians = hessians.reshape(-1) _update_gradients_hessians_binary_crossentropy( - gradients, hessians, y_true, raw_predictions, sample_weight + gradients, hessians, y_true, raw_predictions, sample_weight, self.n_threads ) def predict_proba(self, raw_predictions): @@ -391,8 +417,8 @@ class CategoricalCrossEntropy(BaseLoss): cross-entropy to more than 2 classes. """ - def __init__(self, sample_weight): - super().__init__(hessians_are_constant=False) + def __init__(self, sample_weight, n_threads=None): + super().__init__(hessians_are_constant=False, n_threads=n_threads) def pointwise_loss(self, y_true, raw_predictions): one_hot_true = np.zeros_like(raw_predictions) @@ -419,7 +445,7 @@ def update_gradients_and_hessians( self, gradients, hessians, y_true, raw_predictions, sample_weight ): _update_gradients_hessians_categorical_crossentropy( - gradients, hessians, y_true, raw_predictions, sample_weight + gradients, hessians, y_true, raw_predictions, sample_weight, self.n_threads ) def predict_proba(self, raw_predictions): diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py index a356325356dc2..746fa34753121 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py @@ -42,7 +42,7 @@ def get_max_depth(self): """Return maximum depth among all leaves.""" return int(self.nodes["depth"].max()) - def predict(self, X, known_cat_bitsets, f_idx_map): + def predict(self, X, known_cat_bitsets, f_idx_map, n_threads): """Predict raw values for non-binned data. Parameters @@ -57,6 +57,9 @@ def predict(self, X, known_cat_bitsets, f_idx_map): Map from original feature index to the corresponding index in the known_cat_bitsets array. + n_threads : int + Number of OpenMP threads to use. + Returns ------- y : ndarray, shape (n_samples,) @@ -64,11 +67,17 @@ def predict(self, X, known_cat_bitsets, f_idx_map): """ out = np.empty(X.shape[0], dtype=Y_DTYPE) _predict_from_raw_data( - self.nodes, X, self.raw_left_cat_bitsets, known_cat_bitsets, f_idx_map, out + self.nodes, + X, + self.raw_left_cat_bitsets, + known_cat_bitsets, + f_idx_map, + n_threads, + out, ) return out - def predict_binned(self, X, missing_values_bin_idx): + def predict_binned(self, X, missing_values_bin_idx, n_threads): """Predict raw values for binned data. Parameters @@ -79,6 +88,8 @@ def predict_binned(self, X, missing_values_bin_idx): Index of the bin that is used for missing values. This is the index of the last bin and is always equal to max_bins (as passed to the GBDT classes), or equivalently to n_bins - 1. + n_threads : int + Number of OpenMP threads to use. Returns ------- @@ -87,7 +98,12 @@ def predict_binned(self, X, missing_values_bin_idx): """ out = np.empty(X.shape[0], dtype=Y_DTYPE) _predict_from_binned_data( - self.nodes, X, self.binned_left_cat_bitsets, missing_values_bin_idx, out + self.nodes, + X, + self.binned_left_cat_bitsets, + missing_values_bin_idx, + n_threads, + out, ) return out diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx index 39884e5e6ee63..5ddba5cd02678 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx @@ -16,8 +16,6 @@ cimport cython from cython.parallel import prange import numpy as np cimport numpy as np -IF SKLEARN_OPENMP_PARALLELISM_ENABLED: - from openmp cimport omp_get_max_threads from libc.stdlib cimport malloc, free, qsort from libc.string cimport memcpy from numpy.math cimport INFINITY @@ -177,6 +175,7 @@ cdef class Splitter: unsigned int [::1] partition unsigned int [::1] left_indices_buffer unsigned int [::1] right_indices_buffer + int n_threads def __init__(self, const X_BINNED_DTYPE_C [::1, :] X_binned, @@ -189,7 +188,8 @@ cdef class Splitter: Y_DTYPE_C min_hessian_to_split=1e-3, unsigned int min_samples_leaf=20, Y_DTYPE_C min_gain_to_split=0., - unsigned char hessians_are_constant=False): + unsigned char hessians_are_constant=False, + unsigned int n_threads=1): self.X_binned = X_binned self.n_features = X_binned.shape[1] @@ -203,6 +203,7 @@ cdef class Splitter: self.min_samples_leaf = min_samples_leaf self.min_gain_to_split = min_gain_to_split self.hessians_are_constant = hessians_are_constant + self.n_threads = n_threads # The partition array maps each sample index into the leaves of the # tree (a leaf in this context is a node that isn't splitted yet, not @@ -306,10 +307,7 @@ cdef class Splitter: # split_info.left_cat_bitset directly, so we need a tmp var BITSET_INNER_DTYPE_C [:] cat_bitset_tmp = split_info.left_cat_bitset BITSET_DTYPE_C left_cat_bitset - IF SKLEARN_OPENMP_PARALLELISM_ENABLED: - int n_threads = omp_get_max_threads() - ELSE: - int n_threads = 1 + int n_threads = self.n_threads int [:] sizes = np.full(n_threads, n_samples // n_threads, dtype=np.int32) @@ -456,13 +454,15 @@ cdef class Splitter: const unsigned char [::1] has_missing_values = self.has_missing_values const unsigned char [::1] is_categorical = self.is_categorical const signed char [::1] monotonic_cst = self.monotonic_cst + int n_threads = self.n_threads with nogil: split_infos = malloc( self.n_features * sizeof(split_info_struct)) - for feature_idx in prange(n_features, schedule='static'): + for feature_idx in prange(n_features, schedule='static', + num_threads=n_threads): split_infos[feature_idx].feature_idx = feature_idx # For each feature, find best bin to split on diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py index 57403c3792571..7cbc6603ee01f 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py @@ -10,6 +10,9 @@ from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE from sklearn.ensemble._hist_gradient_boosting.common import ALMOST_INF +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + +n_threads = _openmp_effective_n_threads() DATA = ( @@ -93,7 +96,7 @@ def test_map_to_bins(max_bins): ] binned = np.zeros_like(DATA, dtype=X_BINNED_DTYPE, order="F") last_bin_idx = max_bins - _map_to_bins(DATA, bin_thresholds, last_bin_idx, binned) + _map_to_bins(DATA, bin_thresholds, last_bin_idx, n_threads, binned) assert binned.shape == DATA.shape assert binned.dtype == np.uint8 assert binned.flags.f_contiguous diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 517a96a77044e..79581525b50bb 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -21,6 +21,9 @@ from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper from sklearn.utils import shuffle +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + +n_threads = _openmp_effective_n_threads() X_classification, y_classification = make_classification(random_state=0) @@ -693,7 +696,7 @@ def test_sum_hessians_are_sample_weight(loss_name): sample_weight = rng.normal(size=n_samples) - loss = _LOSSES[loss_name](sample_weight=sample_weight) + loss = _LOSSES[loss_name](sample_weight=sample_weight, n_threads=n_threads) gradients, hessians = loss.init_gradients_and_hessians( n_samples=n_samples, prediction_dim=1, sample_weight=sample_weight ) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py index fe4568339a9ac..6ff30a5888fe3 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py @@ -12,6 +12,9 @@ from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE from sklearn.ensemble._hist_gradient_boosting.common import X_BITSET_INNER_DTYPE +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + +n_threads = _openmp_effective_n_threads() def _make_training_data(n_bins=256, constant_hessian=True): @@ -195,12 +198,14 @@ def test_predictor_from_grower(): dtype=np.uint8, ) missing_values_bin_idx = n_bins - 1 - predictions = predictor.predict_binned(input_data, missing_values_bin_idx) + predictions = predictor.predict_binned( + input_data, missing_values_bin_idx, n_threads + ) expected_targets = [1, 1, 1, 1, 1, 1, -1, -1, -1] assert np.allclose(predictions, expected_targets) # Check that training set can be recovered exactly: - predictions = predictor.predict_binned(X_binned, missing_values_bin_idx) + predictions = predictor.predict_binned(X_binned, missing_values_bin_idx, n_threads) assert np.allclose(predictions, -all_gradients) @@ -381,7 +386,7 @@ def test_missing_value_predict_only(): known_cat_bitsets = np.zeros((0, 8), dtype=X_BITSET_INNER_DTYPE) f_idx_map = np.zeros(0, dtype=np.uint32) - y_pred = predictor.predict(all_nans, known_cat_bitsets, f_idx_map) + y_pred = predictor.predict(all_nans, known_cat_bitsets, f_idx_map, n_threads) assert np.all(y_pred == prediction_main_path) @@ -409,6 +414,7 @@ def test_split_on_nan_with_infinite_values(): n_bins_non_missing=n_bins_non_missing, has_missing_values=has_missing_values, min_samples_leaf=1, + n_threads=n_threads, ) grower.grow() @@ -424,9 +430,11 @@ def test_split_on_nan_with_infinite_values(): # Make sure in particular that the +inf sample is mapped to the left child # Note that lightgbm "fails" here and will assign the inf sample to the # right child, even though it's a "split on nan" situation. - predictions = predictor.predict(X, known_cat_bitsets, f_idx_map) + predictions = predictor.predict(X, known_cat_bitsets, f_idx_map, n_threads) predictions_binned = predictor.predict_binned( - X_binned, missing_values_bin_idx=bin_mapper.missing_values_bin_idx_ + X_binned, + missing_values_bin_idx=bin_mapper.missing_values_bin_idx_, + n_threads=n_threads, ) np.testing.assert_allclose(predictions, -gradients) np.testing.assert_allclose(predictions_binned, -gradients) @@ -450,6 +458,7 @@ def test_grow_tree_categories(): shrinkage=1.0, min_samples_leaf=1, is_categorical=is_categorical, + n_threads=n_threads, ) grower.grow() assert grower.n_nodes == 3 @@ -485,7 +494,9 @@ def test_grow_tree_categories(): # make sure binned missing values are mapped to the left child during # prediction prediction_binned = predictor.predict_binned( - np.asarray([[6]]).astype(X_BINNED_DTYPE), missing_values_bin_idx=6 + np.asarray([[6]]).astype(X_BINNED_DTYPE), + missing_values_bin_idx=6, + n_threads=n_threads, ) assert_allclose(prediction_binned, [-1]) # negative gradient @@ -493,7 +504,9 @@ def test_grow_tree_categories(): # prediction known_cat_bitsets = np.zeros((1, 8), dtype=np.uint32) # ignored anyway f_idx_map = np.array([0], dtype=np.uint32) - prediction = predictor.predict(np.array([[np.nan]]), known_cat_bitsets, f_idx_map) + prediction = predictor.predict( + np.array([[np.nan]]), known_cat_bitsets, f_idx_map, n_threads + ) assert_allclose(prediction, [-1]) @@ -535,14 +548,18 @@ def test_ohe_equivalence(min_samples_leaf, n_unique_categories, target): predictor = grower.make_predictor( binning_thresholds=np.zeros((1, n_unique_categories)) ) - preds = predictor.predict_binned(X_binned, missing_values_bin_idx=255) + preds = predictor.predict_binned( + X_binned, missing_values_bin_idx=255, n_threads=n_threads + ) grower_ohe = TreeGrower(X_ohe, gradients, hessians, **grower_params) grower_ohe.grow() predictor_ohe = grower_ohe.make_predictor( binning_thresholds=np.zeros((X_ohe.shape[1], n_unique_categories)) ) - preds_ohe = predictor_ohe.predict_binned(X_ohe, missing_values_bin_idx=255) + preds_ohe = predictor_ohe.predict_binned( + X_ohe, missing_values_bin_idx=255, n_threads=n_threads + ) assert predictor.get_max_depth() <= predictor_ohe.get_max_depth() if target == "binary" and n_unique_categories > 2: diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_loss.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_loss.py index 9081471477691..813163802f956 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_loss.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_loss.py @@ -11,6 +11,9 @@ from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE from sklearn.utils._testing import skip_if_32bit +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + +n_threads = _openmp_effective_n_threads() def get_derivatives_helper(loss): @@ -134,7 +137,7 @@ def test_numerical_gradients(loss, n_classes, prediction_dim, seed=0): else: y_true = rng.randint(0, n_classes, size=n_samples).astype(Y_DTYPE) raw_predictions = rng.normal(size=(prediction_dim, n_samples)).astype(Y_DTYPE) - loss = _LOSSES[loss](sample_weight=None) + loss = _LOSSES[loss](sample_weight=None, n_threads=n_threads) get_gradients, get_hessians = get_derivatives_helper(loss) # only take gradients and hessians of first tree / class. @@ -297,7 +300,7 @@ def test_sample_weight_multiplies_gradients(loss, problem, sample_weight): else: sample_weight = rng.normal(size=n_samples).astype(Y_DTYPE) - loss_ = _LOSSES[loss](sample_weight=sample_weight) + loss_ = _LOSSES[loss](sample_weight=sample_weight, n_threads=n_threads) baseline_prediction = loss_.get_baseline_prediction(y_true, None, prediction_dim) raw_predictions = np.zeros( diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py index fa9496d386fd8..4ab65c55a8620 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py @@ -12,6 +12,9 @@ from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + +n_threads = _openmp_effective_n_threads() def is_increasing(a): @@ -295,7 +298,7 @@ def test_bounded_value_min_gain_to_split(): hessians_are_constant = False builder = HistogramBuilder( - X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant + X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant, n_threads ) n_bins_non_missing = np.array([n_bins - 1] * X_binned.shape[1], dtype=np.uint32) has_missing_values = np.array([False] * X_binned.shape[1], dtype=np.uint8) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py index f0227969ae366..e3c725965f550 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py @@ -20,6 +20,9 @@ set_bitset_memoryview, set_raw_bitset_from_binned_bitset, ) +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + +n_threads = _openmp_effective_n_threads() @pytest.mark.parametrize("n_bins", [200, 256]) @@ -54,10 +57,10 @@ def test_regression_dataset(n_bins): known_cat_bitsets = np.zeros((0, 8), dtype=X_BITSET_INNER_DTYPE) f_idx_map = np.zeros(0, dtype=np.uint32) - y_pred_train = predictor.predict(X_train, known_cat_bitsets, f_idx_map) + y_pred_train = predictor.predict(X_train, known_cat_bitsets, f_idx_map, n_threads) assert r2_score(y_train, y_pred_train) > 0.82 - y_pred_test = predictor.predict(X_test, known_cat_bitsets, f_idx_map) + y_pred_test = predictor.predict(X_test, known_cat_bitsets, f_idx_map, n_threads) assert r2_score(y_test, y_pred_test) > 0.67 @@ -101,7 +104,7 @@ def test_infinite_values_and_thresholds(num_threshold, expected_predictions): f_idx_map = np.zeros(0, dtype=np.uint32) predictor = TreePredictor(nodes, binned_cat_bitsets, raw_categorical_bitsets) - predictions = predictor.predict(X, known_cat_bitset, f_idx_map) + predictions = predictor.predict(X, known_cat_bitset, f_idx_map, n_threads) assert np.all(predictions == expected_predictions) @@ -151,7 +154,9 @@ def test_categorical_predictor(bins_go_left, expected_predictions): predictor = TreePredictor(nodes, binned_cat_bitsets, raw_categorical_bitsets) # Check binned data gives correct predictions - prediction_binned = predictor.predict_binned(X_binned, missing_values_bin_idx=6) + prediction_binned = predictor.predict_binned( + X_binned, missing_values_bin_idx=6, n_threads=n_threads + ) assert_allclose(prediction_binned, expected_predictions) # manually construct bitset @@ -161,17 +166,22 @@ def test_categorical_predictor(bins_go_left, expected_predictions): # Check with un-binned data predictions = predictor.predict( - categories.reshape(-1, 1), known_cat_bitsets, f_idx_map + categories.reshape(-1, 1), known_cat_bitsets, f_idx_map, n_threads ) assert_allclose(predictions, expected_predictions) # Check missing goes left because missing_values_bin_idx=6 X_binned_missing = np.array([[6]], dtype=X_BINNED_DTYPE).T - predictions = predictor.predict_binned(X_binned_missing, missing_values_bin_idx=6) + predictions = predictor.predict_binned( + X_binned_missing, missing_values_bin_idx=6, n_threads=n_threads + ) assert_allclose(predictions, [1]) # missing and unknown go left predictions = predictor.predict( - np.array([[np.nan, 17]], dtype=X_DTYPE).T, known_cat_bitsets, f_idx_map + np.array([[np.nan, 17]], dtype=X_DTYPE).T, + known_cat_bitsets, + f_idx_map, + n_threads, ) assert_allclose(predictions, [1, 1]) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py index aa7befe90211e..0d19bdc6df72b 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py @@ -12,6 +12,9 @@ ) from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder from sklearn.utils._testing import skip_if_32bit +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + +n_threads = _openmp_effective_n_threads() @pytest.mark.parametrize("n_bins", [3, 32, 256]) @@ -40,7 +43,12 @@ def test_histogram_split(n_bins): sum_gradients = all_gradients.sum() builder = HistogramBuilder( - X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant + X_binned, + n_bins, + all_gradients, + all_hessians, + hessians_are_constant, + n_threads, ) n_bins_non_missing = np.array( [n_bins - 1] * X_binned.shape[1], dtype=np.uint32 @@ -120,7 +128,7 @@ def test_gradient_and_hessian_sanity(constant_hessian): sum_hessians = all_hessians.sum() builder = HistogramBuilder( - X_binned, n_bins, all_gradients, all_hessians, constant_hessian + X_binned, n_bins, all_gradients, all_hessians, constant_hessian, n_threads ) n_bins_non_missing = np.array([n_bins - 1] * X_binned.shape[1], dtype=np.uint32) has_missing_values = np.array([False] * X_binned.shape[1], dtype=np.uint8) @@ -263,7 +271,7 @@ def test_split_indices(): hessians_are_constant = True builder = HistogramBuilder( - X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant + X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant, n_threads ) n_bins_non_missing = np.array([n_bins] * X_binned.shape[1], dtype=np.uint32) has_missing_values = np.array([False] * X_binned.shape[1], dtype=np.uint8) @@ -339,7 +347,7 @@ def test_min_gain_to_split(): hessians_are_constant = False builder = HistogramBuilder( - X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant + X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant, n_threads ) n_bins_non_missing = np.array([n_bins - 1] * X_binned.shape[1], dtype=np.uint32) has_missing_values = np.array([False] * X_binned.shape[1], dtype=np.uint8) @@ -508,7 +516,7 @@ def test_splitting_missing_values( hessians_are_constant = True builder = HistogramBuilder( - X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant + X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant, n_threads ) n_bins_non_missing = np.array([n_bins_non_missing], dtype=np.uint32) @@ -612,7 +620,7 @@ def test_splitting_categorical_cat_smooth( hessians_are_constant = True builder = HistogramBuilder( - X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant + X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant, n_threads ) n_bins_non_missing = np.array([n_bins_non_missing], dtype=np.uint32) @@ -798,7 +806,7 @@ def test_splitting_categorical_sanity( hessians_are_constant = True builder = HistogramBuilder( - X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant + X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant, n_threads ) n_bins_non_missing = np.array([n_bins_non_missing], dtype=np.uint32) diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx index 3b323b3e298b8..77b3101cdb656 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx @@ -143,13 +143,14 @@ def get_equivalent_estimator(estimator, lib='lightgbm'): return CatBoostRegressor(**catboost_params) -def sum_parallel(G_H_DTYPE_C [:] array): +def sum_parallel(G_H_DTYPE_C [:] array, int n_threads): cdef: Y_DTYPE_C out = 0. int i = 0 - for i in prange(array.shape[0], schedule='static', nogil=True): + for i in prange(array.shape[0], schedule='static', nogil=True, + num_threads=n_threads): out += array[i] return out