Skip to content

Replacing np.zeroes by np.empty for Gradient Boosting for better scaling by cores #14380

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions sklearn/ensemble/_hist_gradient_boosting/grower.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,8 @@ def _intilialize_root(self, gradients, hessians, hessians_are_constant):
self._finalize_leaf(self.root)
return

self.root.histograms = self.histogram_builder.compute_histograms_brute(
self.root.sample_indices)
self.root.histograms, _ = self.histogram_builder.compute_histograms_brute(
self.root.sample_indices, None)
self._compute_best_split_and_push(self.root)

def _compute_best_split_and_push(self, node):
Expand Down Expand Up @@ -374,12 +374,10 @@ def split_next(self):
# smallest number of samples, and the subtraction trick O(n_bins)
# on the other one.
tic = time()
smallest_child.histograms = \
smallest_child.histograms, largest_child.histograms = \
self.histogram_builder.compute_histograms_brute(
smallest_child.sample_indices)
largest_child.histograms = \
self.histogram_builder.compute_histograms_subtraction(
node.histograms, smallest_child.histograms)
smallest_child.sample_indices, node.histograms)

self.total_compute_hist_time += time() - tic

tic = time()
Expand Down
37 changes: 27 additions & 10 deletions sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,9 @@ cdef class HistogramBuilder:

def compute_histograms_brute(
HistogramBuilder self,
const unsigned int [::1] sample_indices): # IN
const unsigned int [::1] sample_indices,
hist_struct [:, ::1] parent): # IN
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't fully understand why you need to change the API here.

We have 2 routines to compute histograms:

  • compute_histograms_subtraction, used when we know the histogram of the parent and the sibling of a node (using the subtraction trick)

  • compute_histograms_brute used in all other cases. With that function, we do not need the histogram of the parent.


"""Compute the histograms of the node by scanning through all the data.

For a given feature, the complexity is O(n_samples)
Expand All @@ -122,6 +124,7 @@ cdef class HistogramBuilder:
int n_samples
int feature_idx
int i

# need local views to avoid python interactions
unsigned char hessians_are_constant = \
self.hessians_are_constant
Expand All @@ -130,10 +133,13 @@ cdef class HistogramBuilder:
G_H_DTYPE_C [::1] gradients = self.gradients
G_H_DTYPE_C [::1] ordered_hessians = self.ordered_hessians
G_H_DTYPE_C [::1] hessians = self.hessians
hist_struct [:, ::1] histograms = np.zeros(
# hist_struct [:, ::1] histograms = np.zeros(
hist_struct [:, ::1] histograms = np.empty(
shape=(self.n_features, self.max_bins),
dtype=HISTOGRAM_DTYPE
)
dtype=HISTOGRAM_DTYPE)
hist_struct [:, ::1] sibling = np.empty(
shape=(self.n_features, self.max_bins),
dtype=HISTOGRAM_DTYPE)

with nogil:
n_samples = sample_indices.shape[0]
Expand All @@ -150,18 +156,21 @@ cdef class HistogramBuilder:
ordered_gradients[i] = gradients[sample_indices[i]]
ordered_hessians[i] = hessians[sample_indices[i]]

with nogil:
n_samples = sample_indices.shape[0]
for feature_idx in prange(n_features, schedule='static'):
# Compute histogram of each feature
self._compute_histogram_brute_single_feature(
feature_idx, sample_indices, histograms)

return histograms
feature_idx, sample_indices, histograms, parent, sibling)
return histograms, sibling

cdef void _compute_histogram_brute_single_feature(
HistogramBuilder self,
const int feature_idx,
const unsigned int [::1] sample_indices, # IN
hist_struct [:, ::1] histograms) nogil: # OUT
hist_struct [:, ::1] histograms,
hist_struct [:, ::1] parent,
hist_struct [:, ::1] sibling) nogil: # OUT
"""Compute the histogram for a given feature."""

cdef:
Expand All @@ -176,6 +185,11 @@ cdef class HistogramBuilder:
unsigned char hessians_are_constant = \
self.hessians_are_constant

for i in range(self.max_bins):
histograms[feature_idx, i].sum_gradients = 0
histograms[feature_idx, i].sum_hessians = 0
histograms[feature_idx, i].count = 0

if root_node:
if hessians_are_constant:
_build_histogram_root_no_hessian(feature_idx, X_binned,
Expand All @@ -194,6 +208,7 @@ cdef class HistogramBuilder:
_build_histogram(feature_idx, sample_indices,
X_binned, ordered_gradients,
ordered_hessians, histograms)
_subtract_histograms(feature_idx, self.max_bins, parent, histograms, sibling)

def compute_histograms_subtraction(
HistogramBuilder self,
Expand Down Expand Up @@ -225,12 +240,14 @@ cdef class HistogramBuilder:
cdef:
int feature_idx
int n_features = self.n_features
hist_struct [:, ::1] histograms = np.zeros(
# hist_struct [:, ::1] histograms = np.zeros(
hist_struct [:, ::1] histograms = np.empty(
shape=(self.n_features, self.max_bins),
dtype=HISTOGRAM_DTYPE
)

for feature_idx in prange(n_features, schedule='static', nogil=True):
# for feature_idx in prange(n_features, schedule='static', nogil=True):
for feature_idx in range(n_features):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why replace prange with range?

# Compute histogram of each feature
_subtract_histograms(feature_idx,
self.max_bins,
Expand Down