From e37ef50158a0c6fca7311b4604f990e30dafeb25 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Wed, 7 Dec 2016 17:28:39 +0100 Subject: [PATCH 01/17] FIX MAE reg. criterion: Use safe_realloc to avoid memory leak --- sklearn/tree/_utils.pyx | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index a4ccc71946bd1..b6d23d0e38e10 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -341,8 +341,10 @@ cdef class WeightedPQueue: cdef void reset(self) nogil: """Reset the WeightedPQueue to its state at construction""" self.array_ptr = 0 - self.array_ = calloc(self.capacity, - sizeof(WeightedPQueueRecord)) + with gil: + safe_realloc(&self.array_, self.capacity) + # self.array_ = calloc(self.capacity, + # sizeof(WeightedPQueueRecord)) cdef bint is_empty(self) nogil: return self.array_ptr <= 0 @@ -361,14 +363,16 @@ cdef class WeightedPQueue: # Resize if capacity not sufficient if array_ptr >= self.capacity: self.capacity *= 2 - array = realloc(self.array_, - self.capacity * - sizeof(WeightedPQueueRecord)) - - if array == NULL: - # no free; __dealloc__ handles that - return -1 - self.array_ = array + with gil: + safe_realloc(&self.array_, self.capacity) + # array = realloc(self.array_, + # self.capacity * + # sizeof(WeightedPQueueRecord)) + + # if array == NULL: + # # no free; __dealloc__ handles that + # return -1 + # self.array_ = array # Put element as last element of array array = self.array_ From 208c336fd9274536b43f83c83243bfc75f1436a0 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Wed, 7 Dec 2016 17:46:31 +0100 Subject: [PATCH 02/17] Release GIL in safe_realloc and clean up scaffolding --- sklearn/tree/_utils.pxd | 2 +- sklearn/tree/_utils.pyx | 23 +++++++---------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index fce3abcb734db..f5bc8d765394b 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -41,7 +41,7 @@ ctypedef fused realloc_ptr: (Node*) (Node**) -cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) except * +cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except * cdef np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size) diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index b6d23d0e38e10..8368509089a1f 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -25,17 +25,19 @@ np.import_array() # Helper functions # ============================================================================= -cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) except *: +cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except *: # sizeof(realloc_ptr[0]) would be more like idiomatic C, but causes Cython # 0.20.1 to crash. cdef size_t nbytes = nelems * sizeof(p[0][0]) if nbytes / sizeof(p[0][0]) != nelems: # Overflow in the multiplication - raise MemoryError("could not allocate (%d * %d) bytes" - % (nelems, sizeof(p[0][0]))) + with gil: + raise MemoryError("could not allocate (%d * %d) bytes" + % (nelems, sizeof(p[0][0]))) cdef realloc_ptr tmp = realloc(p[0], nbytes) if tmp == NULL: - raise MemoryError("could not allocate %d bytes" % nbytes) + with gil: + raise MemoryError("could not allocate %d bytes" % nbytes) p[0] = tmp return tmp # for convenience @@ -341,10 +343,7 @@ cdef class WeightedPQueue: cdef void reset(self) nogil: """Reset the WeightedPQueue to its state at construction""" self.array_ptr = 0 - with gil: - safe_realloc(&self.array_, self.capacity) - # self.array_ = calloc(self.capacity, - # sizeof(WeightedPQueueRecord)) + safe_realloc(&self.array_, self.capacity) cdef bint is_empty(self) nogil: return self.array_ptr <= 0 @@ -365,14 +364,6 @@ cdef class WeightedPQueue: self.capacity *= 2 with gil: safe_realloc(&self.array_, self.capacity) - # array = realloc(self.array_, - # self.capacity * - # sizeof(WeightedPQueueRecord)) - - # if array == NULL: - # # no free; __dealloc__ handles that - # return -1 - # self.array_ = array # Put element as last element of array array = self.array_ From b355ec04287d5670c6ab87a95e136b5079b6d5a3 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Thu, 8 Dec 2016 14:27:48 +0100 Subject: [PATCH 03/17] As gil is released in safe_realloc, no need of a with gil block --- sklearn/tree/_utils.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index 8368509089a1f..9c2bd1f4ea565 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -362,8 +362,7 @@ cdef class WeightedPQueue: # Resize if capacity not sufficient if array_ptr >= self.capacity: self.capacity *= 2 - with gil: - safe_realloc(&self.array_, self.capacity) + safe_realloc(&self.array_, self.capacity) # Put element as last element of array array = self.array_ From 086506c3100e9668ce2ca4f83fc15bb21af9212b Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Wed, 21 Dec 2016 13:17:35 +0100 Subject: [PATCH 04/17] Use except * to propagate error in all cdef functions --- sklearn/tree/_criterion.pxd | 18 ++++----- sklearn/tree/_criterion.pyx | 78 ++++++++++++++++++------------------- sklearn/tree/_splitter.pxd | 8 ++-- sklearn/tree/_splitter.pyx | 42 ++++++++++---------- sklearn/tree/_tree.pxd | 28 ++++++------- sklearn/tree/_tree.pyx | 29 +++++++------- sklearn/tree/_utils.pxd | 54 ++++++++++++------------- sklearn/tree/_utils.pyx | 68 ++++++++++++++++---------------- 8 files changed, 164 insertions(+), 161 deletions(-) diff --git a/sklearn/tree/_criterion.pxd b/sklearn/tree/_criterion.pxd index cf6d32d1b7fe1..382040f2d0091 100644 --- a/sklearn/tree/_criterion.pxd +++ b/sklearn/tree/_criterion.pxd @@ -55,13 +55,13 @@ cdef class Criterion: # Methods cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil - cdef void reset(self) nogil - cdef void reverse_reset(self) nogil - cdef void update(self, SIZE_t new_pos) nogil - cdef double node_impurity(self) nogil + SIZE_t end) nogil except * + cdef void reset(self) nogil except * + cdef void reverse_reset(self) nogil except * + cdef void update(self, SIZE_t new_pos) nogil except * + cdef double node_impurity(self) nogil except * cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil - cdef void node_value(self, double* dest) nogil - cdef double impurity_improvement(self, double impurity) nogil - cdef double proxy_impurity_improvement(self) nogil + double* impurity_right) nogil except * + cdef void node_value(self, double* dest) nogil except * + cdef double impurity_improvement(self, double impurity) nogil except * + cdef double proxy_impurity_improvement(self) nogil except * diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index 26c40dc8d6616..537aedd34d01e 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -53,7 +53,7 @@ cdef class Criterion: cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil: + SIZE_t end) nogil except *: """Placeholder for a method which will initialize the criterion. Parameters @@ -79,7 +79,7 @@ cdef class Criterion: pass - cdef void reset(self) nogil: + cdef void reset(self) nogil except *: """Reset the criterion at pos=start. This method must be implemented by the subclass. @@ -87,14 +87,14 @@ cdef class Criterion: pass - cdef void reverse_reset(self) nogil: + cdef void reverse_reset(self) nogil except *: """Reset the criterion at pos=end. This method must be implemented by the subclass. """ pass - cdef void update(self, SIZE_t new_pos) nogil: + cdef void update(self, SIZE_t new_pos) nogil except *: """Updated statistics by moving samples[pos:new_pos] to the left child. This updates the collected statistics by moving samples[pos:new_pos] @@ -109,7 +109,7 @@ cdef class Criterion: pass - cdef double node_impurity(self) nogil: + cdef double node_impurity(self) nogil except *: """Placeholder for calculating the impurity of the node. Placeholder for a method which will evaluate the impurity of @@ -120,7 +120,7 @@ cdef class Criterion: pass cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil: + double* impurity_right) nogil except *: """Placeholder for calculating the impurity of children. Placeholder for a method which evaluates the impurity in @@ -139,7 +139,7 @@ cdef class Criterion: pass - cdef void node_value(self, double* dest) nogil: + cdef void node_value(self, double* dest) nogil except *: """Placeholder for storing the node value. Placeholder for a method which will compute the node value @@ -153,7 +153,7 @@ cdef class Criterion: pass - cdef double proxy_impurity_improvement(self) nogil: + cdef double proxy_impurity_improvement(self) nogil except *: """Compute a proxy of the impurity reduction This method is used to speed up the search for the best split. @@ -171,7 +171,7 @@ cdef class Criterion: return (- self.weighted_n_right * impurity_right - self.weighted_n_left * impurity_left) - cdef double impurity_improvement(self, double impurity) nogil: + cdef double impurity_improvement(self, double impurity) nogil except *: """Compute the improvement in impurity This method computes the improvement in impurity when a split occurs. @@ -283,7 +283,7 @@ cdef class ClassificationCriterion(Criterion): cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, double weighted_n_samples, - SIZE_t* samples, SIZE_t start, SIZE_t end) nogil: + SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except *: """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end]. @@ -348,7 +348,7 @@ cdef class ClassificationCriterion(Criterion): # Reset to pos=start self.reset() - cdef void reset(self) nogil: + cdef void reset(self) nogil except *: """Reset the criterion at pos=start.""" self.pos = self.start @@ -371,7 +371,7 @@ cdef class ClassificationCriterion(Criterion): sum_left += self.sum_stride sum_right += self.sum_stride - cdef void reverse_reset(self) nogil: + cdef void reverse_reset(self) nogil except *: """Reset the criterion at pos=end.""" self.pos = self.end @@ -393,7 +393,7 @@ cdef class ClassificationCriterion(Criterion): sum_left += self.sum_stride sum_right += self.sum_stride - cdef void update(self, SIZE_t new_pos) nogil: + cdef void update(self, SIZE_t new_pos) nogil except *: """Updated statistics by moving samples[pos:new_pos] to the left child. Parameters @@ -471,14 +471,14 @@ cdef class ClassificationCriterion(Criterion): self.pos = new_pos - cdef double node_impurity(self) nogil: + cdef double node_impurity(self) nogil except *: pass cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil: + double* impurity_right) nogil except *: pass - cdef void node_value(self, double* dest) nogil: + cdef void node_value(self, double* dest) nogil except *: """Compute the node value of samples[start:end] and save it into dest. Parameters @@ -513,7 +513,7 @@ cdef class Entropy(ClassificationCriterion): cross-entropy = -\sum_{k=0}^{K-1} count_k log(count_k) """ - cdef double node_impurity(self) nogil: + cdef double node_impurity(self) nogil except *: """Evaluate the impurity of the current node, i.e. the impurity of samples[start:end], using the cross-entropy criterion.""" @@ -536,7 +536,7 @@ cdef class Entropy(ClassificationCriterion): return entropy / self.n_outputs cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil: + double* impurity_right) nogil except *: """Evaluate the impurity in children nodes i.e. the impurity of the left child (samples[start:pos]) and the @@ -595,7 +595,7 @@ cdef class Gini(ClassificationCriterion): = 1 - \sum_{k=0}^{K-1} count_k ** 2 """ - cdef double node_impurity(self) nogil: + cdef double node_impurity(self) nogil except *: """Evaluate the impurity of the current node, i.e. the impurity of samples[start:end] using the Gini criterion.""" @@ -623,7 +623,7 @@ cdef class Gini(ClassificationCriterion): return gini / self.n_outputs cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil: + double* impurity_right) nogil except *: """Evaluate the impurity in children nodes i.e. the impurity of the left child (samples[start:pos]) and the @@ -738,7 +738,7 @@ cdef class RegressionCriterion(Criterion): cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil: + SIZE_t end) nogil except *: """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end].""" # Initialize fields @@ -779,7 +779,7 @@ cdef class RegressionCriterion(Criterion): # Reset to pos=start self.reset() - cdef void reset(self) nogil: + cdef void reset(self) nogil except *: """Reset the criterion at pos=start.""" cdef SIZE_t n_bytes = self.n_outputs * sizeof(double) memset(self.sum_left, 0, n_bytes) @@ -789,7 +789,7 @@ cdef class RegressionCriterion(Criterion): self.weighted_n_right = self.weighted_n_node_samples self.pos = self.start - cdef void reverse_reset(self) nogil: + cdef void reverse_reset(self) nogil except *: """Reset the criterion at pos=end.""" cdef SIZE_t n_bytes = self.n_outputs * sizeof(double) memset(self.sum_right, 0, n_bytes) @@ -799,7 +799,7 @@ cdef class RegressionCriterion(Criterion): self.weighted_n_left = self.weighted_n_node_samples self.pos = self.end - cdef void update(self, SIZE_t new_pos) nogil: + cdef void update(self, SIZE_t new_pos) nogil except *: """Updated statistics by moving samples[pos:new_pos] to the left.""" cdef double* sum_left = self.sum_left @@ -860,14 +860,14 @@ cdef class RegressionCriterion(Criterion): self.pos = new_pos - cdef double node_impurity(self) nogil: + cdef double node_impurity(self) nogil except *: pass cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil: + double* impurity_right) nogil except *: pass - cdef void node_value(self, double* dest) nogil: + cdef void node_value(self, double* dest) nogil except *: """Compute the node value of samples[start:end] into dest.""" cdef SIZE_t k @@ -882,7 +882,7 @@ cdef class MSE(RegressionCriterion): MSE = var_left + var_right """ - cdef double node_impurity(self) nogil: + cdef double node_impurity(self) nogil except *: """Evaluate the impurity of the current node, i.e. the impurity of samples[start:end].""" @@ -896,7 +896,7 @@ cdef class MSE(RegressionCriterion): return impurity / self.n_outputs - cdef double proxy_impurity_improvement(self) nogil: + cdef double proxy_impurity_improvement(self) nogil except *: """Compute a proxy of the impurity reduction This method is used to speed up the search for the best split. @@ -923,7 +923,7 @@ cdef class MSE(RegressionCriterion): proxy_impurity_right / self.weighted_n_right) cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil: + double* impurity_right) nogil except *: """Evaluate the impurity in children nodes, i.e. the impurity of the left child (samples[start:pos]) and the impurity the right child (samples[pos:end]).""" @@ -1030,7 +1030,7 @@ cdef class MAE(RegressionCriterion): cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil: + SIZE_t end) nogil except *: """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end].""" @@ -1080,7 +1080,7 @@ cdef class MAE(RegressionCriterion): # Reset to pos=start self.reset() - cdef void reset(self) nogil: + cdef void reset(self) nogil except *: """Reset the criterion at pos=start.""" cdef SIZE_t i, k @@ -1106,7 +1106,7 @@ cdef class MAE(RegressionCriterion): ( right_child[k]).push(value, weight) - cdef void reverse_reset(self) nogil: + cdef void reverse_reset(self) nogil except *: """Reset the criterion at pos=end.""" self.weighted_n_right = 0.0 @@ -1129,7 +1129,7 @@ cdef class MAE(RegressionCriterion): ( left_child[k]).push(value, weight) - cdef void update(self, SIZE_t new_pos) nogil: + cdef void update(self, SIZE_t new_pos) nogil except *: """Updated statistics by moving samples[pos:new_pos] to the left.""" cdef DOUBLE_t* sample_weight = self.sample_weight @@ -1186,14 +1186,14 @@ cdef class MAE(RegressionCriterion): self.weighted_n_left) self.pos = new_pos - cdef void node_value(self, double* dest) nogil: + cdef void node_value(self, double* dest) nogil except *: """Computes the node value of samples[start:end] into dest.""" cdef SIZE_t k for k in range(self.n_outputs): dest[k] = self.node_medians[k] - cdef double node_impurity(self) nogil: + cdef double node_impurity(self) nogil except *: """Evaluate the impurity of the current node, i.e. the impurity of samples[start:end]""" @@ -1216,7 +1216,7 @@ cdef class MAE(RegressionCriterion): return impurity / (self.weighted_n_node_samples * self.n_outputs) cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil: + double* impurity_right) nogil except *: """Evaluate the impurity in children nodes, i.e. the impurity of the left child (samples[start:pos]) and the impurity the right child (samples[pos:end]). @@ -1273,7 +1273,7 @@ cdef class FriedmanMSE(MSE): improvement = n_left * n_right * diff^2 / (n_left + n_right) """ - cdef double proxy_impurity_improvement(self) nogil: + cdef double proxy_impurity_improvement(self) nogil except *: """Compute a proxy of the impurity reduction This method is used to speed up the search for the best split. @@ -1303,7 +1303,7 @@ cdef class FriedmanMSE(MSE): return diff * diff / (self.weighted_n_left * self.weighted_n_right) - cdef double impurity_improvement(self, double impurity) nogil: + cdef double impurity_improvement(self, double impurity) nogil except *: cdef double* sum_left = self.sum_left cdef double* sum_right = self.sum_right diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd index 4b97d01614b9f..a823e77654b35 100644 --- a/sklearn/tree/_splitter.pxd +++ b/sklearn/tree/_splitter.pxd @@ -86,13 +86,13 @@ cdef class Splitter: np.ndarray X_idx_sorted=*) except * cdef void node_reset(self, SIZE_t start, SIZE_t end, - double* weighted_n_node_samples) nogil + double* weighted_n_node_samples) nogil except * cdef void node_split(self, double impurity, # Impurity of the node SplitRecord* split, - SIZE_t* n_constant_features) nogil + SIZE_t* n_constant_features) nogil except * - cdef void node_value(self, double* dest) nogil + cdef void node_value(self, double* dest) nogil except * - cdef double node_impurity(self) nogil \ No newline at end of file + cdef double node_impurity(self) nogil except * \ No newline at end of file diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index 5fa7ee553fe2d..730af84aa3754 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -209,7 +209,7 @@ cdef class Splitter: weighted_n_node_samples[0] = self.criterion.weighted_n_node_samples cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil: + SIZE_t* n_constant_features) nogil except *: """Find the best split on node samples[start:end]. This is a placeholder method. The majority of computation will be done @@ -218,12 +218,12 @@ cdef class Splitter: pass - cdef void node_value(self, double* dest) nogil: + cdef void node_value(self, double* dest) nogil except *: """Copy the value of node samples[start:end] into dest.""" self.criterion.node_value(dest) - cdef double node_impurity(self) nogil: + cdef double node_impurity(self) nogil except *: """Return the impurity of the current node.""" return self.criterion.node_impurity() @@ -296,7 +296,7 @@ cdef class BestSplitter(BaseDenseSplitter): self.presort), self.__getstate__()) cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil: + SIZE_t* n_constant_features) nogil except *: """Find the best split on node samples[start:end].""" # Find the best split cdef SIZE_t* samples = self.samples @@ -513,18 +513,19 @@ cdef class BestSplitter(BaseDenseSplitter): # Sort n-element arrays pointed to by Xf and samples, simultaneously, # by the values in Xf. Algorithm: Introsort (Musser, SP&E, 1997). -cdef inline void sort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil: +cdef inline void sort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil except *: cdef int maxd = 2 * log(n) introsort(Xf, samples, n, maxd) -cdef inline void swap(DTYPE_t* Xf, SIZE_t* samples, SIZE_t i, SIZE_t j) nogil: +cdef inline void swap(DTYPE_t* Xf, SIZE_t* samples, + SIZE_t i, SIZE_t j) nogil except *: # Helper for sort Xf[i], Xf[j] = Xf[j], Xf[i] samples[i], samples[j] = samples[j], samples[i] -cdef inline DTYPE_t median3(DTYPE_t* Xf, SIZE_t n) nogil: +cdef inline DTYPE_t median3(DTYPE_t* Xf, SIZE_t n) nogil except *: # Median of three pivot selection, after Bentley and McIlroy (1993). # Engineering a sort function. SP&E. Requires 8/3 comparisons on average. cdef DTYPE_t a = Xf[0], b = Xf[n / 2], c = Xf[n - 1] @@ -546,7 +547,8 @@ cdef inline DTYPE_t median3(DTYPE_t* Xf, SIZE_t n) nogil: # Introsort with median of 3 pivot selection and 3-way partition function # (robust to repeated elements, e.g. lots of zero features). -cdef void introsort(DTYPE_t* Xf, SIZE_t *samples, SIZE_t n, int maxd) nogil: +cdef void introsort(DTYPE_t* Xf, SIZE_t *samples, + SIZE_t n, int maxd) nogil except *: cdef DTYPE_t pivot cdef SIZE_t i, l, r @@ -579,7 +581,7 @@ cdef void introsort(DTYPE_t* Xf, SIZE_t *samples, SIZE_t n, int maxd) nogil: cdef inline void sift_down(DTYPE_t* Xf, SIZE_t* samples, - SIZE_t start, SIZE_t end) nogil: + SIZE_t start, SIZE_t end) nogil except *: # Restore heap order in Xf[start:end] by moving the max element to start. cdef SIZE_t child, maxind, root @@ -601,7 +603,7 @@ cdef inline void sift_down(DTYPE_t* Xf, SIZE_t* samples, root = maxind -cdef void heapsort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil: +cdef void heapsort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil except *: cdef SIZE_t start, end # heapify @@ -632,7 +634,7 @@ cdef class RandomSplitter(BaseDenseSplitter): self.presort), self.__getstate__()) cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil: + SIZE_t* n_constant_features) nogil except *: """Find the best random split on node samples[start:end].""" # Draw random splits and pick the best cdef SIZE_t* samples = self.samples @@ -906,7 +908,7 @@ cdef class BaseSparseSplitter(Splitter): cdef inline SIZE_t _partition(self, double threshold, SIZE_t end_negative, SIZE_t start_positive, - SIZE_t zero_pos) nogil: + SIZE_t zero_pos) nogil except *: """Partition samples[start:end] based on threshold.""" cdef double value @@ -944,7 +946,7 @@ cdef class BaseSparseSplitter(Splitter): cdef inline void extract_nnz(self, SIZE_t feature, SIZE_t* end_negative, SIZE_t* start_positive, - bint* is_samples_sorted) nogil: + bint* is_samples_sorted) nogil except *: """Extract and partition values for a given feature. The extracted values are partitioned between negative values @@ -1005,7 +1007,7 @@ cdef class BaseSparseSplitter(Splitter): end_negative, start_positive) -cdef int compare_SIZE_t(const void* a, const void* b) nogil: +cdef int compare_SIZE_t(const void* a, const void* b) nogil except *: """Comparison function for sort.""" return ((a)[0] - (b)[0]) @@ -1013,7 +1015,7 @@ cdef int compare_SIZE_t(const void* a, const void* b) nogil: cdef inline void binary_search(INT32_t* sorted_array, INT32_t start, INT32_t end, SIZE_t value, SIZE_t* index, - INT32_t* new_start) nogil: + INT32_t* new_start) nogil except *: """Return the index of value in the sorted array. If not found, return -1. new_start is the last pivot + 1 @@ -1045,7 +1047,7 @@ cdef inline void extract_nnz_index_to_samples(INT32_t* X_indices, SIZE_t* index_to_samples, DTYPE_t* Xf, SIZE_t* end_negative, - SIZE_t* start_positive) nogil: + SIZE_t* start_positive) nogil except *: """Extract and partition values for a feature using index_to_samples. Complexity is O(indptr_end - indptr_start). @@ -1087,7 +1089,7 @@ cdef inline void extract_nnz_binary_search(INT32_t* X_indices, SIZE_t* end_negative, SIZE_t* start_positive, SIZE_t* sorted_samples, - bint* is_samples_sorted) nogil: + bint* is_samples_sorted) nogil except *: """Extract and partition values for a given feature using binary search. If n_samples = end - start and n_indices = indptr_end - indptr_start, @@ -1148,7 +1150,7 @@ cdef inline void extract_nnz_binary_search(INT32_t* X_indices, cdef inline void sparse_swap(SIZE_t* index_to_samples, SIZE_t* samples, - SIZE_t pos_1, SIZE_t pos_2) nogil : + SIZE_t pos_1, SIZE_t pos_2) nogil except *: """Swap sample pos_1 and pos_2 preserving sparse invariant.""" samples[pos_1], samples[pos_2] = samples[pos_2], samples[pos_1] index_to_samples[samples[pos_1]] = pos_1 @@ -1167,7 +1169,7 @@ cdef class BestSparseSplitter(BaseSparseSplitter): self.presort), self.__getstate__()) cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil: + SIZE_t* n_constant_features) nogil except *: """Find the best split on node samples[start:end], using sparse features. """ @@ -1394,7 +1396,7 @@ cdef class RandomSparseSplitter(BaseSparseSplitter): self.presort), self.__getstate__()) cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil: + SIZE_t* n_constant_features) nogil except *: """Find a random split on node samples[start:end], using sparse features. """ diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd index dbf0545b1e1d5..f6fe7638680f8 100644 --- a/sklearn/tree/_tree.pxd +++ b/sklearn/tree/_tree.pxd @@ -58,24 +58,24 @@ cdef class Tree: cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf, SIZE_t feature, double threshold, double impurity, SIZE_t n_node_samples, - double weighted_n_samples) nogil + double weighted_n_samples) nogil except * cdef void _resize(self, SIZE_t capacity) except * - cdef int _resize_c(self, SIZE_t capacity=*) nogil + cdef int _resize_c(self, SIZE_t capacity=*) nogil except * - cdef np.ndarray _get_value_ndarray(self) - cdef np.ndarray _get_node_ndarray(self) + cdef np.ndarray _get_value_ndarray(self) except * + cdef np.ndarray _get_node_ndarray(self) except * - cpdef np.ndarray predict(self, object X) + cpdef np.ndarray predict(self, object X) except * - cpdef np.ndarray apply(self, object X) - cdef np.ndarray _apply_dense(self, object X) - cdef np.ndarray _apply_sparse_csr(self, object X) + cpdef np.ndarray apply(self, object X) except * + cdef np.ndarray _apply_dense(self, object X) except * + cdef np.ndarray _apply_sparse_csr(self, object X) except * - cpdef object decision_path(self, object X) - cdef object _decision_path_dense(self, object X) - cdef object _decision_path_sparse_csr(self, object X) + cpdef object decision_path(self, object X) except * + cdef object _decision_path_dense(self, object X) except * + cdef object _decision_path_sparse_csr(self, object X) except * - cpdef compute_feature_importances(self, normalize=*) + cpdef compute_feature_importances(self, normalize=*) except * # ============================================================================= @@ -100,5 +100,5 @@ cdef class TreeBuilder: cpdef build(self, Tree tree, object X, np.ndarray y, np.ndarray sample_weight=*, - np.ndarray X_idx_sorted=*) - cdef _check_input(self, object X, np.ndarray y, np.ndarray sample_weight) + np.ndarray X_idx_sorted=*) except * + cdef _check_input(self, object X, np.ndarray y, np.ndarray sample_weight) except * diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index f8632ab1640d8..0d5ca11c20a54 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -91,12 +91,12 @@ cdef class TreeBuilder: cpdef build(self, Tree tree, object X, np.ndarray y, np.ndarray sample_weight=None, - np.ndarray X_idx_sorted=None): + np.ndarray X_idx_sorted=None) except *: """Build a decision tree from the training set (X, y).""" pass cdef inline _check_input(self, object X, np.ndarray y, - np.ndarray sample_weight): + np.ndarray sample_weight) except *: """Check input dtype, layout and format""" if issparse(X): X = X.tocsc() @@ -141,7 +141,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder): cpdef build(self, Tree tree, object X, np.ndarray y, np.ndarray sample_weight=None, - np.ndarray X_idx_sorted=None): + np.ndarray X_idx_sorted=None) except *: """Build a decision tree from the training set (X, y).""" # check input @@ -272,7 +272,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder): # Best first builder ---------------------------------------------------------- cdef inline int _add_to_frontier(PriorityHeapRecord* rec, - PriorityHeap frontier) nogil: + PriorityHeap frontier) nogil except *: """Adds record ``rec`` to the priority queue ``frontier``; returns -1 on memory-error. """ return frontier.push(rec.node_id, rec.start, rec.end, rec.pos, rec.depth, @@ -291,7 +291,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder): def __cinit__(self, Splitter splitter, SIZE_t min_samples_split, SIZE_t min_samples_leaf, min_weight_leaf, SIZE_t max_depth, SIZE_t max_leaf_nodes, - double min_impurity_split): + double min_impurity_split) except *: self.splitter = splitter self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf @@ -417,7 +417,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder): SIZE_t start, SIZE_t end, double impurity, bint is_first, bint is_left, Node* parent, SIZE_t depth, - PriorityHeapRecord* res) nogil: + PriorityHeapRecord* res) nogil except *: """Adds node w/ partition ``[start, end)`` to the frontier. """ cdef SplitRecord split cdef SIZE_t node_id @@ -665,7 +665,7 @@ cdef class Tree: # XXX using (size_t)(-1) is ugly, but SIZE_MAX is not available in C89 # (i.e., older MSVC). - cdef int _resize_c(self, SIZE_t capacity=(-1)) nogil: + cdef int _resize_c(self, SIZE_t capacity=(-1)) nogil except *: """Guts of _resize. Returns 0 for success, -1 for error.""" if capacity == self.capacity and self.nodes != NULL: return 0 @@ -702,7 +702,8 @@ cdef class Tree: cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf, SIZE_t feature, double threshold, double impurity, - SIZE_t n_node_samples, double weighted_n_node_samples) nogil: + SIZE_t n_node_samples, + double weighted_n_node_samples) nogil except *: """Add a node to the tree. The new node registers itself as the child of its parent. @@ -756,7 +757,7 @@ cdef class Tree: else: return self._apply_dense(X) - cdef inline np.ndarray _apply_dense(self, object X): + cdef inline np.ndarray _apply_dense(self, object X) except *: """Finds the terminal region (=leaf node) for each sample in X.""" # Check input @@ -798,7 +799,7 @@ cdef class Tree: return out - cdef inline np.ndarray _apply_sparse_csr(self, object X): + cdef inline np.ndarray _apply_sparse_csr(self, object X) except *: """Finds the terminal region (=leaf node) for each sample in sparse X. """ # Check input @@ -880,7 +881,7 @@ cdef class Tree: else: return self._decision_path_dense(X) - cdef inline object _decision_path_dense(self, object X): + cdef inline object _decision_path_dense(self, object X) except *: """Finds the decision path (=node) for each sample in X.""" # Check input @@ -940,7 +941,7 @@ cdef class Tree: return out - cdef inline object _decision_path_sparse_csr(self, object X): + cdef inline object _decision_path_sparse_csr(self, object X) except *: """Finds the decision path (=node) for each sample in X.""" # Check input @@ -1071,7 +1072,7 @@ cdef class Tree: return importances - cdef np.ndarray _get_value_ndarray(self): + cdef np.ndarray _get_value_ndarray(self) except *: """Wraps value as a 3-d NumPy array. The array keeps a reference to this Tree, which manages the underlying @@ -1087,7 +1088,7 @@ cdef class Tree: arr.base = self return arr - cdef np.ndarray _get_node_ndarray(self): + cdef np.ndarray _get_node_ndarray(self) except *: """Wraps nodes as a NumPy struct array. The array keeps a reference to this Tree, which manages the underlying diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index f5bc8d765394b..93f30fd435cb7 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -44,18 +44,18 @@ ctypedef fused realloc_ptr: cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except * -cdef np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size) +cdef np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size) except * cdef SIZE_t rand_int(SIZE_t low, SIZE_t high, - UINT32_t* random_state) nogil + UINT32_t* random_state) nogil except * cdef double rand_uniform(double low, double high, - UINT32_t* random_state) nogil + UINT32_t* random_state) nogil except * -cdef double log(double x) nogil +cdef double log(double x) nogil except * # ============================================================================= # Stack data structure @@ -79,8 +79,8 @@ cdef class Stack: cdef bint is_empty(self) nogil cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent, bint is_left, double impurity, - SIZE_t n_constant_features) nogil - cdef int pop(self, StackRecord* res) nogil + SIZE_t n_constant_features) nogil except * + cdef int pop(self, StackRecord* res) nogil except * # ============================================================================= @@ -105,12 +105,12 @@ cdef class PriorityHeap: cdef SIZE_t heap_ptr cdef PriorityHeapRecord* heap_ - cdef bint is_empty(self) nogil + cdef bint is_empty(self) nogil except * cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos, SIZE_t depth, bint is_leaf, double improvement, double impurity, double impurity_left, - double impurity_right) nogil - cdef int pop(self, PriorityHeapRecord* res) nogil + double impurity_right) nogil except * + cdef int pop(self, PriorityHeapRecord* res) nogil except * # ============================================================================= # WeightedPQueue data structure @@ -129,12 +129,12 @@ cdef class WeightedPQueue: cdef bint is_empty(self) nogil cdef void reset(self) nogil cdef SIZE_t size(self) nogil - cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil - cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil - cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil - cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil - cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil - cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil + cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except * + cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil except * + cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except * + cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except * + cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil except * + cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil except * # ============================================================================= @@ -149,15 +149,15 @@ cdef class WeightedMedianCalculator: cdef DOUBLE_t sum_w_0_k # represents sum(weights[0:k]) # = w[0] + w[1] + ... + w[k-1] - cdef SIZE_t size(self) nogil - cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil - cdef void reset(self) nogil - cdef int update_median_parameters_post_push(self, DOUBLE_t data, - DOUBLE_t weight, - DOUBLE_t original_median) nogil - cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil - cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil - cdef int update_median_parameters_post_remove(self, DOUBLE_t data, - DOUBLE_t weight, - DOUBLE_t original_median) nogil - cdef DOUBLE_t get_median(self) nogil + cdef SIZE_t size(self) nogil except * + cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except * + cdef void reset(self) nogil except * + cdef int update_median_parameters_post_push( + self, DOUBLE_t data, DOUBLE_t weight, + DOUBLE_t original_median) nogil except * + cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil except * + cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except * + cdef int update_median_parameters_post_remove( + self, DOUBLE_t data, DOUBLE_t weight, + DOUBLE_t original_median) nogil except * + cdef DOUBLE_t get_median(self) nogil except * diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index 9c2bd1f4ea565..e5b1af50b900c 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -55,7 +55,7 @@ def _realloc_test(): # rand_r replacement using a 32bit XorShift generator # See http://www.jstatsoft.org/v08/i14/paper for details -cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil: +cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil except *: seed[0] ^= (seed[0] << 13) seed[0] ^= (seed[0] >> 17) seed[0] ^= (seed[0] << 5) @@ -63,7 +63,7 @@ cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil: return seed[0] % (RAND_R_MAX + 1) -cdef inline np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size): +cdef inline np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size) except *: """Return copied data as 1D numpy array of intp's.""" cdef np.npy_intp shape[1] shape[0] = size @@ -71,19 +71,19 @@ cdef inline np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size): cdef inline SIZE_t rand_int(SIZE_t low, SIZE_t high, - UINT32_t* random_state) nogil: + UINT32_t* random_state) nogil except *: """Generate a random integer in [0; end).""" return low + our_rand_r(random_state) % (high - low) cdef inline double rand_uniform(double low, double high, - UINT32_t* random_state) nogil: + UINT32_t* random_state) nogil except *: """Generate a random double in [low; high).""" return ((high - low) * our_rand_r(random_state) / RAND_R_MAX) + low -cdef inline double log(double x) nogil: +cdef inline double log(double x) nogil except *: return ln(x) / ln(2.0) @@ -117,12 +117,12 @@ cdef class Stack: def __dealloc__(self): free(self.stack_) - cdef bint is_empty(self) nogil: + cdef bint is_empty(self) nogil except *: return self.top <= 0 cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent, bint is_left, double impurity, - SIZE_t n_constant_features) nogil: + SIZE_t n_constant_features) nogil except *: """Push a new element onto the stack. Returns 0 if successful; -1 on out of memory error. @@ -153,7 +153,7 @@ cdef class Stack: self.top = top + 1 return 0 - cdef int pop(self, StackRecord* res) nogil: + cdef int pop(self, StackRecord* res) nogil except *: """Remove the top element from the stack and copy to ``res``. Returns 0 if pop was successful (and ``res`` is set); -1 @@ -175,7 +175,7 @@ cdef class Stack: # PriorityHeap data structure # ============================================================================= -cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil: +cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil except *: """Restore heap invariant parent.improvement > child.improvement from ``pos`` upwards. """ if pos == 0: @@ -189,7 +189,7 @@ cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil: cdef void heapify_down(PriorityHeapRecord* heap, SIZE_t pos, - SIZE_t heap_length) nogil: + SIZE_t heap_length) nogil except *: """Restore heap invariant parent.improvement > children.improvement from ``pos`` downwards. """ cdef SIZE_t left_pos = 2 * (pos + 1) - 1 @@ -239,13 +239,13 @@ cdef class PriorityHeap: def __dealloc__(self): free(self.heap_) - cdef bint is_empty(self) nogil: + cdef bint is_empty(self) nogil except *: return self.heap_ptr <= 0 cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos, SIZE_t depth, bint is_leaf, double improvement, double impurity, double impurity_left, - double impurity_right) nogil: + double impurity_right) nogil except *: """Push record on the priority heap. Returns 0 if successful; -1 on out of memory error. @@ -284,7 +284,7 @@ cdef class PriorityHeap: self.heap_ptr = heap_ptr + 1 return 0 - cdef int pop(self, PriorityHeapRecord* res) nogil: + cdef int pop(self, PriorityHeapRecord* res) nogil except *: """Remove max element from the heap. """ cdef SIZE_t heap_ptr = self.heap_ptr cdef PriorityHeapRecord* heap = self.heap_ @@ -340,18 +340,18 @@ cdef class WeightedPQueue: def __dealloc__(self): free(self.array_) - cdef void reset(self) nogil: + cdef void reset(self) nogil except *: """Reset the WeightedPQueue to its state at construction""" self.array_ptr = 0 safe_realloc(&self.array_, self.capacity) - cdef bint is_empty(self) nogil: + cdef bint is_empty(self) nogil except *: return self.array_ptr <= 0 - cdef SIZE_t size(self) nogil: + cdef SIZE_t size(self) nogil except *: return self.array_ptr - cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil: + cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except *: """Push record on the array. Returns 0 if successful; -1 on out of memory error. """ @@ -380,7 +380,7 @@ cdef class WeightedPQueue: self.array_ptr = array_ptr + 1 return 0 - cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil: + cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil except *: """Remove a specific value/weight record from the array. Returns 0 if successful, -1 if record not found.""" cdef SIZE_t array_ptr = self.array_ptr @@ -408,7 +408,7 @@ cdef class WeightedPQueue: self.array_ptr = array_ptr - 1 return 0 - cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil: + cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except *: """Remove the top (minimum) element from array. Returns 0 if successful, -1 if nothing to remove.""" cdef SIZE_t array_ptr = self.array_ptr @@ -429,7 +429,7 @@ cdef class WeightedPQueue: self.array_ptr = array_ptr - 1 return 0 - cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil: + cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except *: """Write the top element from array to a pointer. Returns 0 if successful, -1 if nothing to write.""" cdef WeightedPQueueRecord* array = self.array_ @@ -440,7 +440,7 @@ cdef class WeightedPQueue: weight[0] = array[0].weight return 0 - cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil: + cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil except *: """Given an index between [0,self.current_capacity], access the appropriate heap and return the requested weight""" cdef WeightedPQueueRecord* array = self.array_ @@ -448,7 +448,7 @@ cdef class WeightedPQueue: # get weight at index return array[index].weight - cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil: + cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil except *: """Given an index between [0,self.current_capacity], access the appropriate heap and return the requested value""" cdef WeightedPQueueRecord* array = self.array_ @@ -501,19 +501,19 @@ cdef class WeightedMedianCalculator: self.k = 0 self.sum_w_0_k = 0 - cdef SIZE_t size(self) nogil: + cdef SIZE_t size(self) nogil except *: """Return the number of samples in the WeightedMedianCalculator""" return self.samples.size() - cdef void reset(self) nogil: + cdef void reset(self) nogil except *: """Reset the WeightedMedianCalculator to its state at construction""" self.samples.reset() self.total_weight = 0 self.k = 0 self.sum_w_0_k = 0 - cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil: + cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except *: """Push a value and its associated weight to the WeightedMedianCalculator to be considered in the median calculation. @@ -528,9 +528,9 @@ cdef class WeightedMedianCalculator: original_median) return return_value - cdef int update_median_parameters_post_push(self, DOUBLE_t data, - DOUBLE_t weight, - DOUBLE_t original_median) nogil: + cdef int update_median_parameters_post_push( + self, DOUBLE_t data, DOUBLE_t weight, + DOUBLE_t original_median) nogil except *: """Update the parameters used in the median calculation, namely `k` and `sum_w_0_k` after an insertion""" @@ -570,7 +570,7 @@ cdef class WeightedMedianCalculator: self.sum_w_0_k += self.samples.get_weight_from_index(self.k-1) return 0 - cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil: + cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil except *: """Remove a value from the MedianHeap, removing it from consideration in the median calculation """ @@ -585,7 +585,7 @@ cdef class WeightedMedianCalculator: original_median) return return_value - cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil: + cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except *: """Pop a value from the MedianHeap, starting from the left and moving to the right. """ @@ -605,9 +605,9 @@ cdef class WeightedMedianCalculator: original_median) return return_value - cdef int update_median_parameters_post_remove(self, DOUBLE_t data, - DOUBLE_t weight, - double original_median) nogil: + cdef int update_median_parameters_post_remove( + self, DOUBLE_t data, DOUBLE_t weight, + double original_median) nogil except *: """Update the parameters used in the median calculation, namely `k` and `sum_w_0_k` after a removal""" # reset parameters because it there are no elements @@ -655,7 +655,7 @@ cdef class WeightedMedianCalculator: self.sum_w_0_k -= self.samples.get_weight_from_index(self.k) return 0 - cdef DOUBLE_t get_median(self) nogil: + cdef DOUBLE_t get_median(self) nogil except *: """Write the median to a pointer, taking into account sample weights.""" if self.sum_w_0_k == (self.total_weight / 2.0): From caff5d0cc2a0135095e082d58e18741c85723617 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Wed, 21 Dec 2016 13:49:46 +0100 Subject: [PATCH 05/17] Don't use except * for functions that return python objects --- sklearn/tree/_splitter.pxd | 2 +- sklearn/tree/_splitter.pyx | 2 +- sklearn/tree/_tree.pxd | 24 ++++++++++++------------ sklearn/tree/_tree.pyx | 20 ++++++++++---------- sklearn/tree/_utils.pxd | 2 +- sklearn/tree/_utils.pyx | 2 +- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd index a823e77654b35..859a0b796fd40 100644 --- a/sklearn/tree/_splitter.pxd +++ b/sklearn/tree/_splitter.pxd @@ -95,4 +95,4 @@ cdef class Splitter: cdef void node_value(self, double* dest) nogil except * - cdef double node_impurity(self) nogil except * \ No newline at end of file + cdef double node_impurity(self) nogil except * diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index 730af84aa3754..7359da786873a 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -182,7 +182,7 @@ cdef class Splitter: self.sample_weight = sample_weight cdef void node_reset(self, SIZE_t start, SIZE_t end, - double* weighted_n_node_samples) nogil: + double* weighted_n_node_samples) nogil except *: """Reset splitter on node samples[start:end]. Parameters diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd index f6fe7638680f8..799f5b47f4654 100644 --- a/sklearn/tree/_tree.pxd +++ b/sklearn/tree/_tree.pxd @@ -62,20 +62,20 @@ cdef class Tree: cdef void _resize(self, SIZE_t capacity) except * cdef int _resize_c(self, SIZE_t capacity=*) nogil except * - cdef np.ndarray _get_value_ndarray(self) except * - cdef np.ndarray _get_node_ndarray(self) except * + cdef np.ndarray _get_value_ndarray(self) + cdef np.ndarray _get_node_ndarray(self) - cpdef np.ndarray predict(self, object X) except * + cpdef np.ndarray predict(self, object X) - cpdef np.ndarray apply(self, object X) except * - cdef np.ndarray _apply_dense(self, object X) except * - cdef np.ndarray _apply_sparse_csr(self, object X) except * + cpdef np.ndarray apply(self, object X) + cdef np.ndarray _apply_dense(self, object X) + cdef np.ndarray _apply_sparse_csr(self, object X) - cpdef object decision_path(self, object X) except * - cdef object _decision_path_dense(self, object X) except * - cdef object _decision_path_sparse_csr(self, object X) except * + cpdef object decision_path(self, object X) + cdef object _decision_path_dense(self, object X) + cdef object _decision_path_sparse_csr(self, object X) - cpdef compute_feature_importances(self, normalize=*) except * + cpdef compute_feature_importances(self, normalize=*) # ============================================================================= @@ -100,5 +100,5 @@ cdef class TreeBuilder: cpdef build(self, Tree tree, object X, np.ndarray y, np.ndarray sample_weight=*, - np.ndarray X_idx_sorted=*) except * - cdef _check_input(self, object X, np.ndarray y, np.ndarray sample_weight) except * + np.ndarray X_idx_sorted=*) + cdef _check_input(self, object X, np.ndarray y, np.ndarray sample_weight) diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index 0d5ca11c20a54..5dd7c33331ee6 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -91,12 +91,12 @@ cdef class TreeBuilder: cpdef build(self, Tree tree, object X, np.ndarray y, np.ndarray sample_weight=None, - np.ndarray X_idx_sorted=None) except *: + np.ndarray X_idx_sorted=None): """Build a decision tree from the training set (X, y).""" pass cdef inline _check_input(self, object X, np.ndarray y, - np.ndarray sample_weight) except *: + np.ndarray sample_weight): """Check input dtype, layout and format""" if issparse(X): X = X.tocsc() @@ -141,7 +141,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder): cpdef build(self, Tree tree, object X, np.ndarray y, np.ndarray sample_weight=None, - np.ndarray X_idx_sorted=None) except *: + np.ndarray X_idx_sorted=None): """Build a decision tree from the training set (X, y).""" # check input @@ -291,7 +291,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder): def __cinit__(self, Splitter splitter, SIZE_t min_samples_split, SIZE_t min_samples_leaf, min_weight_leaf, SIZE_t max_depth, SIZE_t max_leaf_nodes, - double min_impurity_split) except *: + double min_impurity_split): self.splitter = splitter self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf @@ -757,7 +757,7 @@ cdef class Tree: else: return self._apply_dense(X) - cdef inline np.ndarray _apply_dense(self, object X) except *: + cdef inline np.ndarray _apply_dense(self, object X): """Finds the terminal region (=leaf node) for each sample in X.""" # Check input @@ -799,7 +799,7 @@ cdef class Tree: return out - cdef inline np.ndarray _apply_sparse_csr(self, object X) except *: + cdef inline np.ndarray _apply_sparse_csr(self, object X): """Finds the terminal region (=leaf node) for each sample in sparse X. """ # Check input @@ -881,7 +881,7 @@ cdef class Tree: else: return self._decision_path_dense(X) - cdef inline object _decision_path_dense(self, object X) except *: + cdef inline object _decision_path_dense(self, object X): """Finds the decision path (=node) for each sample in X.""" # Check input @@ -941,7 +941,7 @@ cdef class Tree: return out - cdef inline object _decision_path_sparse_csr(self, object X) except *: + cdef inline object _decision_path_sparse_csr(self, object X): """Finds the decision path (=node) for each sample in X.""" # Check input @@ -1072,7 +1072,7 @@ cdef class Tree: return importances - cdef np.ndarray _get_value_ndarray(self) except *: + cdef np.ndarray _get_value_ndarray(self): """Wraps value as a 3-d NumPy array. The array keeps a reference to this Tree, which manages the underlying @@ -1088,7 +1088,7 @@ cdef class Tree: arr.base = self return arr - cdef np.ndarray _get_node_ndarray(self) except *: + cdef np.ndarray _get_node_ndarray(self): """Wraps nodes as a NumPy struct array. The array keeps a reference to this Tree, which manages the underlying diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index 93f30fd435cb7..9d648f67ce1a4 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -44,7 +44,7 @@ ctypedef fused realloc_ptr: cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except * -cdef np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size) except * +cdef np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size) cdef SIZE_t rand_int(SIZE_t low, SIZE_t high, diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index e5b1af50b900c..365fd2ada6533 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -63,7 +63,7 @@ cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil except *: return seed[0] % (RAND_R_MAX + 1) -cdef inline np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size) except *: +cdef inline np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size): """Return copied data as 1D numpy array of intp's.""" cdef np.npy_intp shape[1] shape[0] = size From c748c64db87e61613a16286f8d95145f88b23714 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Wed, 21 Dec 2016 13:57:07 +0100 Subject: [PATCH 06/17] Don't use except * for the comparison function passed to qsort --- sklearn/tree/_splitter.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index 7359da786873a..bd5d2e649de10 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -1007,7 +1007,7 @@ cdef class BaseSparseSplitter(Splitter): end_negative, start_positive) -cdef int compare_SIZE_t(const void* a, const void* b) nogil except *: +cdef int compare_SIZE_t(const void* a, const void* b) nogil: """Comparison function for sort.""" return ((a)[0] - (b)[0]) From 2d86faf45f229785a2d6a49aba401817f3bf4762 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Wed, 21 Dec 2016 13:57:17 +0100 Subject: [PATCH 07/17] Omissions and Errors --- sklearn/tree/_utils.pxd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index 9d648f67ce1a4..e2f37365b9ecf 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -76,7 +76,7 @@ cdef class Stack: cdef SIZE_t top cdef StackRecord* stack_ - cdef bint is_empty(self) nogil + cdef bint is_empty(self) nogil except * cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent, bint is_left, double impurity, SIZE_t n_constant_features) nogil except * @@ -126,9 +126,9 @@ cdef class WeightedPQueue: cdef SIZE_t array_ptr cdef WeightedPQueueRecord* array_ - cdef bint is_empty(self) nogil - cdef void reset(self) nogil - cdef SIZE_t size(self) nogil + cdef bint is_empty(self) nogil except * + cdef void reset(self) nogil except * + cdef SIZE_t size(self) nogil except * cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except * cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil except * cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except * From 34e69921495e8c0efe7c3ae816cec77d2a8e5b1e Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Wed, 21 Dec 2016 14:03:46 +0100 Subject: [PATCH 08/17] Use safe_realloc now that gil is released there --- sklearn/tree/_tree.pyx | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index 5dd7c33331ee6..e4fa4dca02f99 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -19,7 +19,6 @@ from cpython cimport Py_INCREF, PyObject from libc.stdlib cimport free -from libc.stdlib cimport realloc from libc.string cimport memcpy from libc.string cimport memset @@ -676,16 +675,8 @@ cdef class Tree: else: capacity = 2 * self.capacity - # XXX no safe_realloc here because we need to grab the GIL - cdef void* ptr = realloc(self.nodes, capacity * sizeof(Node)) - if ptr == NULL: - return -1 - self.nodes = ptr - ptr = realloc(self.value, - capacity * self.value_stride * sizeof(double)) - if ptr == NULL: - return -1 - self.value = ptr + safe_realloc(&self.nodes, capacity) + safe_realloc(&self.value, capacity) # value memory is initialised to 0 to enable classifier argmax if capacity > self.capacity: From 96bac832507cc89cceacae69c762da7c1fc382ea Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Mon, 26 Dec 2016 15:07:10 +0100 Subject: [PATCH 09/17] Fix realloc size --- sklearn/tree/_tree.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index e4fa4dca02f99..f61679641ebcc 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -676,7 +676,7 @@ cdef class Tree: capacity = 2 * self.capacity safe_realloc(&self.nodes, capacity) - safe_realloc(&self.value, capacity) + safe_realloc(&self.value, capacity * self.value_stride) # value memory is initialised to 0 to enable classifier argmax if capacity > self.capacity: From 1be08dfc5dda9a90c966c920f37c71144c4c9099 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Tue, 27 Dec 2016 09:36:08 +0100 Subject: [PATCH 10/17] Acquire GIL only if we need to raise --- sklearn/tree/_tree.pxd | 2 +- sklearn/tree/_tree.pyx | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd index 799f5b47f4654..894aee2a133f4 100644 --- a/sklearn/tree/_tree.pxd +++ b/sklearn/tree/_tree.pxd @@ -59,7 +59,7 @@ cdef class Tree: SIZE_t feature, double threshold, double impurity, SIZE_t n_node_samples, double weighted_n_samples) nogil except * - cdef void _resize(self, SIZE_t capacity) except * + cdef void _resize(self, SIZE_t capacity) nogil except * cdef int _resize_c(self, SIZE_t capacity=*) nogil except * cdef np.ndarray _get_value_ndarray(self) diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index f61679641ebcc..48dd95aea9c6c 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -656,11 +656,13 @@ cdef class Tree: value = memcpy(self.value, ( value_ndarray).data, self.capacity * self.value_stride * sizeof(double)) - cdef void _resize(self, SIZE_t capacity) except *: + cdef void _resize(self, SIZE_t capacity) nogil except *: """Resize all inner arrays to `capacity`, if `capacity` == -1, then double the size of the inner arrays.""" if self._resize_c(capacity) != 0: - raise MemoryError() + # Acquire gil only if we need to raise + with gil: + raise MemoryError() # XXX using (size_t)(-1) is ugly, but SIZE_MAX is not available in C89 # (i.e., older MSVC). From 8cb8b5d19ef9920c37569756a5545cc03a7ed482 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Tue, 27 Dec 2016 17:32:44 +0100 Subject: [PATCH 11/17] Use except * more judiciously; Release gil only when raising; Add comments to clarify --- sklearn/tree/_criterion.pxd | 18 +++++----- sklearn/tree/_criterion.pyx | 72 +++++++++++++++++++------------------ sklearn/tree/_splitter.pxd | 4 +-- sklearn/tree/_splitter.pyx | 28 +++++++-------- sklearn/tree/_utils.pxd | 42 +++++++++++----------- sklearn/tree/_utils.pyx | 59 +++++++++++++++++------------- 6 files changed, 118 insertions(+), 105 deletions(-) diff --git a/sklearn/tree/_criterion.pxd b/sklearn/tree/_criterion.pxd index 382040f2d0091..cf6d32d1b7fe1 100644 --- a/sklearn/tree/_criterion.pxd +++ b/sklearn/tree/_criterion.pxd @@ -55,13 +55,13 @@ cdef class Criterion: # Methods cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil except * - cdef void reset(self) nogil except * - cdef void reverse_reset(self) nogil except * - cdef void update(self, SIZE_t new_pos) nogil except * - cdef double node_impurity(self) nogil except * + SIZE_t end) nogil + cdef void reset(self) nogil + cdef void reverse_reset(self) nogil + cdef void update(self, SIZE_t new_pos) nogil + cdef double node_impurity(self) nogil cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil except * - cdef void node_value(self, double* dest) nogil except * - cdef double impurity_improvement(self, double impurity) nogil except * - cdef double proxy_impurity_improvement(self) nogil except * + double* impurity_right) nogil + cdef void node_value(self, double* dest) nogil + cdef double impurity_improvement(self, double impurity) nogil + cdef double proxy_impurity_improvement(self) nogil diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index 537aedd34d01e..52f02aa4f0658 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -53,7 +53,7 @@ cdef class Criterion: cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil except *: + SIZE_t end) nogil: """Placeholder for a method which will initialize the criterion. Parameters @@ -79,7 +79,7 @@ cdef class Criterion: pass - cdef void reset(self) nogil except *: + cdef void reset(self) nogil: """Reset the criterion at pos=start. This method must be implemented by the subclass. @@ -87,14 +87,14 @@ cdef class Criterion: pass - cdef void reverse_reset(self) nogil except *: + cdef void reverse_reset(self) nogil: """Reset the criterion at pos=end. This method must be implemented by the subclass. """ pass - cdef void update(self, SIZE_t new_pos) nogil except *: + cdef void update(self, SIZE_t new_pos) nogil: """Updated statistics by moving samples[pos:new_pos] to the left child. This updates the collected statistics by moving samples[pos:new_pos] @@ -109,7 +109,7 @@ cdef class Criterion: pass - cdef double node_impurity(self) nogil except *: + cdef double node_impurity(self) nogil: """Placeholder for calculating the impurity of the node. Placeholder for a method which will evaluate the impurity of @@ -120,7 +120,7 @@ cdef class Criterion: pass cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil except *: + double* impurity_right) nogil: """Placeholder for calculating the impurity of children. Placeholder for a method which evaluates the impurity in @@ -139,7 +139,7 @@ cdef class Criterion: pass - cdef void node_value(self, double* dest) nogil except *: + cdef void node_value(self, double* dest) nogil: """Placeholder for storing the node value. Placeholder for a method which will compute the node value @@ -153,7 +153,7 @@ cdef class Criterion: pass - cdef double proxy_impurity_improvement(self) nogil except *: + cdef double proxy_impurity_improvement(self) nogil: """Compute a proxy of the impurity reduction This method is used to speed up the search for the best split. @@ -171,7 +171,7 @@ cdef class Criterion: return (- self.weighted_n_right * impurity_right - self.weighted_n_left * impurity_left) - cdef double impurity_improvement(self, double impurity) nogil except *: + cdef double impurity_improvement(self, double impurity) nogil: """Compute the improvement in impurity This method computes the improvement in impurity when a split occurs. @@ -348,7 +348,7 @@ cdef class ClassificationCriterion(Criterion): # Reset to pos=start self.reset() - cdef void reset(self) nogil except *: + cdef void reset(self) nogil: """Reset the criterion at pos=start.""" self.pos = self.start @@ -371,7 +371,7 @@ cdef class ClassificationCriterion(Criterion): sum_left += self.sum_stride sum_right += self.sum_stride - cdef void reverse_reset(self) nogil except *: + cdef void reverse_reset(self) nogil: """Reset the criterion at pos=end.""" self.pos = self.end @@ -393,7 +393,7 @@ cdef class ClassificationCriterion(Criterion): sum_left += self.sum_stride sum_right += self.sum_stride - cdef void update(self, SIZE_t new_pos) nogil except *: + cdef void update(self, SIZE_t new_pos) nogil: """Updated statistics by moving samples[pos:new_pos] to the left child. Parameters @@ -471,14 +471,14 @@ cdef class ClassificationCriterion(Criterion): self.pos = new_pos - cdef double node_impurity(self) nogil except *: + cdef double node_impurity(self) nogil: pass cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil except *: + double* impurity_right) nogil: pass - cdef void node_value(self, double* dest) nogil except *: + cdef void node_value(self, double* dest) nogil: """Compute the node value of samples[start:end] and save it into dest. Parameters @@ -513,7 +513,7 @@ cdef class Entropy(ClassificationCriterion): cross-entropy = -\sum_{k=0}^{K-1} count_k log(count_k) """ - cdef double node_impurity(self) nogil except *: + cdef double node_impurity(self) nogil: """Evaluate the impurity of the current node, i.e. the impurity of samples[start:end], using the cross-entropy criterion.""" @@ -536,7 +536,7 @@ cdef class Entropy(ClassificationCriterion): return entropy / self.n_outputs cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil except *: + double* impurity_right) nogil: """Evaluate the impurity in children nodes i.e. the impurity of the left child (samples[start:pos]) and the @@ -595,7 +595,7 @@ cdef class Gini(ClassificationCriterion): = 1 - \sum_{k=0}^{K-1} count_k ** 2 """ - cdef double node_impurity(self) nogil except *: + cdef double node_impurity(self) nogil: """Evaluate the impurity of the current node, i.e. the impurity of samples[start:end] using the Gini criterion.""" @@ -623,7 +623,7 @@ cdef class Gini(ClassificationCriterion): return gini / self.n_outputs cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil except *: + double* impurity_right) nogil: """Evaluate the impurity in children nodes i.e. the impurity of the left child (samples[start:pos]) and the @@ -738,7 +738,7 @@ cdef class RegressionCriterion(Criterion): cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil except *: + SIZE_t end) nogil: """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end].""" # Initialize fields @@ -779,7 +779,7 @@ cdef class RegressionCriterion(Criterion): # Reset to pos=start self.reset() - cdef void reset(self) nogil except *: + cdef void reset(self) nogil: """Reset the criterion at pos=start.""" cdef SIZE_t n_bytes = self.n_outputs * sizeof(double) memset(self.sum_left, 0, n_bytes) @@ -789,7 +789,7 @@ cdef class RegressionCriterion(Criterion): self.weighted_n_right = self.weighted_n_node_samples self.pos = self.start - cdef void reverse_reset(self) nogil except *: + cdef void reverse_reset(self) nogil: """Reset the criterion at pos=end.""" cdef SIZE_t n_bytes = self.n_outputs * sizeof(double) memset(self.sum_right, 0, n_bytes) @@ -799,7 +799,7 @@ cdef class RegressionCriterion(Criterion): self.weighted_n_left = self.weighted_n_node_samples self.pos = self.end - cdef void update(self, SIZE_t new_pos) nogil except *: + cdef void update(self, SIZE_t new_pos) nogil: """Updated statistics by moving samples[pos:new_pos] to the left.""" cdef double* sum_left = self.sum_left @@ -860,14 +860,14 @@ cdef class RegressionCriterion(Criterion): self.pos = new_pos - cdef double node_impurity(self) nogil except *: + cdef double node_impurity(self) nogil: pass cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil except *: + double* impurity_right) nogil: pass - cdef void node_value(self, double* dest) nogil except *: + cdef void node_value(self, double* dest) nogil: """Compute the node value of samples[start:end] into dest.""" cdef SIZE_t k @@ -882,7 +882,7 @@ cdef class MSE(RegressionCriterion): MSE = var_left + var_right """ - cdef double node_impurity(self) nogil except *: + cdef double node_impurity(self) nogil: """Evaluate the impurity of the current node, i.e. the impurity of samples[start:end].""" @@ -896,7 +896,7 @@ cdef class MSE(RegressionCriterion): return impurity / self.n_outputs - cdef double proxy_impurity_improvement(self) nogil except *: + cdef double proxy_impurity_improvement(self) nogil: """Compute a proxy of the impurity reduction This method is used to speed up the search for the best split. @@ -923,7 +923,7 @@ cdef class MSE(RegressionCriterion): proxy_impurity_right / self.weighted_n_right) cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil except *: + double* impurity_right) nogil: """Evaluate the impurity in children nodes, i.e. the impurity of the left child (samples[start:pos]) and the impurity the right child (samples[pos:end]).""" @@ -1068,6 +1068,7 @@ cdef class MAE(RegressionCriterion): for k in range(self.n_outputs): y_ik = y[i * y_stride + k] + # push method ends up calling safe_realloc, hence `except *` # push all values to the right side, # since pos = start initially anyway ( right_child[k]).push(y_ik, w) @@ -1103,6 +1104,7 @@ cdef class MAE(RegressionCriterion): # remove everything from left and put it into right ( left_child[k]).pop(&value, &weight) + # push method ends up calling safe_realloc, hence `except *` ( right_child[k]).push(value, weight) @@ -1126,6 +1128,7 @@ cdef class MAE(RegressionCriterion): # remove everything from right and put it into left ( right_child[k]).pop(&value, &weight) + # push method ends up calling safe_realloc, hence `except *` ( left_child[k]).push(value, weight) @@ -1162,6 +1165,7 @@ cdef class MAE(RegressionCriterion): y_ik = y[i * self.y_stride + k] # remove y_ik and its weight w from right and add to left ( right_child[k]).remove(y_ik, w) + # push method ends up calling safe_realloc, hence except * ( left_child[k]).push(y_ik, w) self.weighted_n_left += w @@ -1186,14 +1190,14 @@ cdef class MAE(RegressionCriterion): self.weighted_n_left) self.pos = new_pos - cdef void node_value(self, double* dest) nogil except *: + cdef void node_value(self, double* dest) nogil: """Computes the node value of samples[start:end] into dest.""" cdef SIZE_t k for k in range(self.n_outputs): dest[k] = self.node_medians[k] - cdef double node_impurity(self) nogil except *: + cdef double node_impurity(self) nogil: """Evaluate the impurity of the current node, i.e. the impurity of samples[start:end]""" @@ -1216,7 +1220,7 @@ cdef class MAE(RegressionCriterion): return impurity / (self.weighted_n_node_samples * self.n_outputs) cdef void children_impurity(self, double* impurity_left, - double* impurity_right) nogil except *: + double* impurity_right) nogil: """Evaluate the impurity in children nodes, i.e. the impurity of the left child (samples[start:pos]) and the impurity the right child (samples[pos:end]). @@ -1273,7 +1277,7 @@ cdef class FriedmanMSE(MSE): improvement = n_left * n_right * diff^2 / (n_left + n_right) """ - cdef double proxy_impurity_improvement(self) nogil except *: + cdef double proxy_impurity_improvement(self) nogil: """Compute a proxy of the impurity reduction This method is used to speed up the search for the best split. @@ -1303,7 +1307,7 @@ cdef class FriedmanMSE(MSE): return diff * diff / (self.weighted_n_left * self.weighted_n_right) - cdef double impurity_improvement(self, double impurity) nogil except *: + cdef double impurity_improvement(self, double impurity) nogil: cdef double* sum_left = self.sum_left cdef double* sum_right = self.sum_right diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd index 859a0b796fd40..b461d5421b22b 100644 --- a/sklearn/tree/_splitter.pxd +++ b/sklearn/tree/_splitter.pxd @@ -93,6 +93,6 @@ cdef class Splitter: SplitRecord* split, SIZE_t* n_constant_features) nogil except * - cdef void node_value(self, double* dest) nogil except * + cdef void node_value(self, double* dest) nogil - cdef double node_impurity(self) nogil except * + cdef double node_impurity(self) nogil diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index bd5d2e649de10..76cf229fadb67 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -218,12 +218,12 @@ cdef class Splitter: pass - cdef void node_value(self, double* dest) nogil except *: + cdef void node_value(self, double* dest) nogil: """Copy the value of node samples[start:end] into dest.""" self.criterion.node_value(dest) - cdef double node_impurity(self) nogil except *: + cdef double node_impurity(self) nogil: """Return the impurity of the current node.""" return self.criterion.node_impurity() @@ -513,19 +513,19 @@ cdef class BestSplitter(BaseDenseSplitter): # Sort n-element arrays pointed to by Xf and samples, simultaneously, # by the values in Xf. Algorithm: Introsort (Musser, SP&E, 1997). -cdef inline void sort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil except *: +cdef inline void sort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil: cdef int maxd = 2 * log(n) introsort(Xf, samples, n, maxd) cdef inline void swap(DTYPE_t* Xf, SIZE_t* samples, - SIZE_t i, SIZE_t j) nogil except *: + SIZE_t i, SIZE_t j) nogil: # Helper for sort Xf[i], Xf[j] = Xf[j], Xf[i] samples[i], samples[j] = samples[j], samples[i] -cdef inline DTYPE_t median3(DTYPE_t* Xf, SIZE_t n) nogil except *: +cdef inline DTYPE_t median3(DTYPE_t* Xf, SIZE_t n) nogil: # Median of three pivot selection, after Bentley and McIlroy (1993). # Engineering a sort function. SP&E. Requires 8/3 comparisons on average. cdef DTYPE_t a = Xf[0], b = Xf[n / 2], c = Xf[n - 1] @@ -548,7 +548,7 @@ cdef inline DTYPE_t median3(DTYPE_t* Xf, SIZE_t n) nogil except *: # Introsort with median of 3 pivot selection and 3-way partition function # (robust to repeated elements, e.g. lots of zero features). cdef void introsort(DTYPE_t* Xf, SIZE_t *samples, - SIZE_t n, int maxd) nogil except *: + SIZE_t n, int maxd) nogil: cdef DTYPE_t pivot cdef SIZE_t i, l, r @@ -581,7 +581,7 @@ cdef void introsort(DTYPE_t* Xf, SIZE_t *samples, cdef inline void sift_down(DTYPE_t* Xf, SIZE_t* samples, - SIZE_t start, SIZE_t end) nogil except *: + SIZE_t start, SIZE_t end) nogil: # Restore heap order in Xf[start:end] by moving the max element to start. cdef SIZE_t child, maxind, root @@ -603,7 +603,7 @@ cdef inline void sift_down(DTYPE_t* Xf, SIZE_t* samples, root = maxind -cdef void heapsort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil except *: +cdef void heapsort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil: cdef SIZE_t start, end # heapify @@ -908,7 +908,7 @@ cdef class BaseSparseSplitter(Splitter): cdef inline SIZE_t _partition(self, double threshold, SIZE_t end_negative, SIZE_t start_positive, - SIZE_t zero_pos) nogil except *: + SIZE_t zero_pos) nogil: """Partition samples[start:end] based on threshold.""" cdef double value @@ -946,7 +946,7 @@ cdef class BaseSparseSplitter(Splitter): cdef inline void extract_nnz(self, SIZE_t feature, SIZE_t* end_negative, SIZE_t* start_positive, - bint* is_samples_sorted) nogil except *: + bint* is_samples_sorted) nogil: """Extract and partition values for a given feature. The extracted values are partitioned between negative values @@ -1015,7 +1015,7 @@ cdef int compare_SIZE_t(const void* a, const void* b) nogil: cdef inline void binary_search(INT32_t* sorted_array, INT32_t start, INT32_t end, SIZE_t value, SIZE_t* index, - INT32_t* new_start) nogil except *: + INT32_t* new_start) nogil: """Return the index of value in the sorted array. If not found, return -1. new_start is the last pivot + 1 @@ -1047,7 +1047,7 @@ cdef inline void extract_nnz_index_to_samples(INT32_t* X_indices, SIZE_t* index_to_samples, DTYPE_t* Xf, SIZE_t* end_negative, - SIZE_t* start_positive) nogil except *: + SIZE_t* start_positive) nogil: """Extract and partition values for a feature using index_to_samples. Complexity is O(indptr_end - indptr_start). @@ -1089,7 +1089,7 @@ cdef inline void extract_nnz_binary_search(INT32_t* X_indices, SIZE_t* end_negative, SIZE_t* start_positive, SIZE_t* sorted_samples, - bint* is_samples_sorted) nogil except *: + bint* is_samples_sorted) nogil: """Extract and partition values for a given feature using binary search. If n_samples = end - start and n_indices = indptr_end - indptr_start, @@ -1150,7 +1150,7 @@ cdef inline void extract_nnz_binary_search(INT32_t* X_indices, cdef inline void sparse_swap(SIZE_t* index_to_samples, SIZE_t* samples, - SIZE_t pos_1, SIZE_t pos_2) nogil except *: + SIZE_t pos_1, SIZE_t pos_2) nogil: """Swap sample pos_1 and pos_2 preserving sparse invariant.""" samples[pos_1], samples[pos_2] = samples[pos_2], samples[pos_1] index_to_samples[samples[pos_1]] = pos_1 diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index e2f37365b9ecf..621f1282b6be2 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -48,14 +48,14 @@ cdef np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size) cdef SIZE_t rand_int(SIZE_t low, SIZE_t high, - UINT32_t* random_state) nogil except * + UINT32_t* random_state) nogil cdef double rand_uniform(double low, double high, - UINT32_t* random_state) nogil except * + UINT32_t* random_state) nogil -cdef double log(double x) nogil except * +cdef double log(double x) nogil # ============================================================================= # Stack data structure @@ -76,11 +76,11 @@ cdef class Stack: cdef SIZE_t top cdef StackRecord* stack_ - cdef bint is_empty(self) nogil except * + cdef bint is_empty(self) nogil cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent, bint is_left, double impurity, - SIZE_t n_constant_features) nogil except * - cdef int pop(self, StackRecord* res) nogil except * + SIZE_t n_constant_features) nogil + cdef int pop(self, StackRecord* res) nogil # ============================================================================= @@ -105,12 +105,12 @@ cdef class PriorityHeap: cdef SIZE_t heap_ptr cdef PriorityHeapRecord* heap_ - cdef bint is_empty(self) nogil except * + cdef bint is_empty(self) nogil cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos, SIZE_t depth, bint is_leaf, double improvement, double impurity, double impurity_left, double impurity_right) nogil except * - cdef int pop(self, PriorityHeapRecord* res) nogil except * + cdef int pop(self, PriorityHeapRecord* res) nogil # ============================================================================= # WeightedPQueue data structure @@ -126,15 +126,15 @@ cdef class WeightedPQueue: cdef SIZE_t array_ptr cdef WeightedPQueueRecord* array_ - cdef bint is_empty(self) nogil except * + cdef bint is_empty(self) nogil cdef void reset(self) nogil except * - cdef SIZE_t size(self) nogil except * + cdef SIZE_t size(self) nogil cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except * - cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil except * - cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except * - cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except * - cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil except * - cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil except * + cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil + cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil + cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil + cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil + cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil # ============================================================================= @@ -149,15 +149,15 @@ cdef class WeightedMedianCalculator: cdef DOUBLE_t sum_w_0_k # represents sum(weights[0:k]) # = w[0] + w[1] + ... + w[k-1] - cdef SIZE_t size(self) nogil except * + cdef SIZE_t size(self) nogil cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except * cdef void reset(self) nogil except * cdef int update_median_parameters_post_push( self, DOUBLE_t data, DOUBLE_t weight, - DOUBLE_t original_median) nogil except * - cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil except * - cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except * + DOUBLE_t original_median) nogil + cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil + cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil cdef int update_median_parameters_post_remove( self, DOUBLE_t data, DOUBLE_t weight, - DOUBLE_t original_median) nogil except * - cdef DOUBLE_t get_median(self) nogil except * + DOUBLE_t original_median) nogil + cdef DOUBLE_t get_median(self) nogil diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index 365fd2ada6533..b7e8a89b8aabd 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -34,7 +34,12 @@ cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except *: with gil: raise MemoryError("could not allocate (%d * %d) bytes" % (nelems, sizeof(p[0][0]))) - cdef realloc_ptr tmp = realloc(p[0], nbytes) + cdef realloc_ptr tmp = NULL + if p[0] == NULL: + # First alloc + tmp = malloc(nbytes) + else: + tmp = realloc(p[0], nbytes) if tmp == NULL: with gil: raise MemoryError("could not allocate %d bytes" % nbytes) @@ -55,7 +60,7 @@ def _realloc_test(): # rand_r replacement using a 32bit XorShift generator # See http://www.jstatsoft.org/v08/i14/paper for details -cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil except *: +cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil: seed[0] ^= (seed[0] << 13) seed[0] ^= (seed[0] >> 17) seed[0] ^= (seed[0] << 5) @@ -71,19 +76,19 @@ cdef inline np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size): cdef inline SIZE_t rand_int(SIZE_t low, SIZE_t high, - UINT32_t* random_state) nogil except *: + UINT32_t* random_state) nogil: """Generate a random integer in [0; end).""" return low + our_rand_r(random_state) % (high - low) cdef inline double rand_uniform(double low, double high, - UINT32_t* random_state) nogil except *: + UINT32_t* random_state) nogil: """Generate a random double in [low; high).""" return ((high - low) * our_rand_r(random_state) / RAND_R_MAX) + low -cdef inline double log(double x) nogil except *: +cdef inline double log(double x) nogil: return ln(x) / ln(2.0) @@ -117,12 +122,12 @@ cdef class Stack: def __dealloc__(self): free(self.stack_) - cdef bint is_empty(self) nogil except *: + cdef bint is_empty(self) nogil: return self.top <= 0 cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent, bint is_left, double impurity, - SIZE_t n_constant_features) nogil except *: + SIZE_t n_constant_features) nogil: """Push a new element onto the stack. Returns 0 if successful; -1 on out of memory error. @@ -153,7 +158,7 @@ cdef class Stack: self.top = top + 1 return 0 - cdef int pop(self, StackRecord* res) nogil except *: + cdef int pop(self, StackRecord* res) nogil: """Remove the top element from the stack and copy to ``res``. Returns 0 if pop was successful (and ``res`` is set); -1 @@ -175,7 +180,7 @@ cdef class Stack: # PriorityHeap data structure # ============================================================================= -cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil except *: +cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil: """Restore heap invariant parent.improvement > child.improvement from ``pos`` upwards. """ if pos == 0: @@ -189,7 +194,7 @@ cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil except *: cdef void heapify_down(PriorityHeapRecord* heap, SIZE_t pos, - SIZE_t heap_length) nogil except *: + SIZE_t heap_length) nogil: """Restore heap invariant parent.improvement > children.improvement from ``pos`` downwards. """ cdef SIZE_t left_pos = 2 * (pos + 1) - 1 @@ -239,7 +244,7 @@ cdef class PriorityHeap: def __dealloc__(self): free(self.heap_) - cdef bint is_empty(self) nogil except *: + cdef bint is_empty(self) nogil: return self.heap_ptr <= 0 cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos, @@ -284,7 +289,7 @@ cdef class PriorityHeap: self.heap_ptr = heap_ptr + 1 return 0 - cdef int pop(self, PriorityHeapRecord* res) nogil except *: + cdef int pop(self, PriorityHeapRecord* res) nogil: """Remove max element from the heap. """ cdef SIZE_t heap_ptr = self.heap_ptr cdef PriorityHeapRecord* heap = self.heap_ @@ -343,12 +348,13 @@ cdef class WeightedPQueue: cdef void reset(self) nogil except *: """Reset the WeightedPQueue to its state at construction""" self.array_ptr = 0 + # Since safe_realloc can raise MemoryError, use `except *` safe_realloc(&self.array_, self.capacity) - cdef bint is_empty(self) nogil except *: + cdef bint is_empty(self) nogil: return self.array_ptr <= 0 - cdef SIZE_t size(self) nogil except *: + cdef SIZE_t size(self) nogil: return self.array_ptr cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except *: @@ -362,6 +368,7 @@ cdef class WeightedPQueue: # Resize if capacity not sufficient if array_ptr >= self.capacity: self.capacity *= 2 + # Since safe_realloc can raise MemoryError, use `except *` safe_realloc(&self.array_, self.capacity) # Put element as last element of array @@ -380,7 +387,7 @@ cdef class WeightedPQueue: self.array_ptr = array_ptr + 1 return 0 - cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil except *: + cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil: """Remove a specific value/weight record from the array. Returns 0 if successful, -1 if record not found.""" cdef SIZE_t array_ptr = self.array_ptr @@ -408,7 +415,7 @@ cdef class WeightedPQueue: self.array_ptr = array_ptr - 1 return 0 - cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except *: + cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil: """Remove the top (minimum) element from array. Returns 0 if successful, -1 if nothing to remove.""" cdef SIZE_t array_ptr = self.array_ptr @@ -429,7 +436,7 @@ cdef class WeightedPQueue: self.array_ptr = array_ptr - 1 return 0 - cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except *: + cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil: """Write the top element from array to a pointer. Returns 0 if successful, -1 if nothing to write.""" cdef WeightedPQueueRecord* array = self.array_ @@ -440,7 +447,7 @@ cdef class WeightedPQueue: weight[0] = array[0].weight return 0 - cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil except *: + cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil: """Given an index between [0,self.current_capacity], access the appropriate heap and return the requested weight""" cdef WeightedPQueueRecord* array = self.array_ @@ -448,7 +455,7 @@ cdef class WeightedPQueue: # get weight at index return array[index].weight - cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil except *: + cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil: """Given an index between [0,self.current_capacity], access the appropriate heap and return the requested value""" cdef WeightedPQueueRecord* array = self.array_ @@ -501,13 +508,14 @@ cdef class WeightedMedianCalculator: self.k = 0 self.sum_w_0_k = 0 - cdef SIZE_t size(self) nogil except *: + cdef SIZE_t size(self) nogil: """Return the number of samples in the WeightedMedianCalculator""" return self.samples.size() cdef void reset(self) nogil except *: """Reset the WeightedMedianCalculator to its state at construction""" + # samples.reset (WeightedPQueue.reset) uses safe_realloc, hence except* self.samples.reset() self.total_weight = 0 self.k = 0 @@ -523,6 +531,7 @@ cdef class WeightedMedianCalculator: if self.size() != 0: original_median = self.get_median() + # samples.push (WeightedPQueue.push) uses safe_realloc, hence except * return_value = self.samples.push(data, weight) self.update_median_parameters_post_push(data, weight, original_median) @@ -530,7 +539,7 @@ cdef class WeightedMedianCalculator: cdef int update_median_parameters_post_push( self, DOUBLE_t data, DOUBLE_t weight, - DOUBLE_t original_median) nogil except *: + DOUBLE_t original_median) nogil: """Update the parameters used in the median calculation, namely `k` and `sum_w_0_k` after an insertion""" @@ -570,7 +579,7 @@ cdef class WeightedMedianCalculator: self.sum_w_0_k += self.samples.get_weight_from_index(self.k-1) return 0 - cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil except *: + cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil: """Remove a value from the MedianHeap, removing it from consideration in the median calculation """ @@ -585,7 +594,7 @@ cdef class WeightedMedianCalculator: original_median) return return_value - cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil except *: + cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil: """Pop a value from the MedianHeap, starting from the left and moving to the right. """ @@ -607,7 +616,7 @@ cdef class WeightedMedianCalculator: cdef int update_median_parameters_post_remove( self, DOUBLE_t data, DOUBLE_t weight, - double original_median) nogil except *: + double original_median) nogil: """Update the parameters used in the median calculation, namely `k` and `sum_w_0_k` after a removal""" # reset parameters because it there are no elements @@ -655,7 +664,7 @@ cdef class WeightedMedianCalculator: self.sum_w_0_k -= self.samples.get_weight_from_index(self.k) return 0 - cdef DOUBLE_t get_median(self) nogil except *: + cdef DOUBLE_t get_median(self) nogil: """Write the median to a pointer, taking into account sample weights.""" if self.sum_w_0_k == (self.total_weight / 2.0): From f756bd7e35cfbfb899ef972d2f302aca8a9d5e31 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Tue, 27 Dec 2016 17:42:39 +0100 Subject: [PATCH 12/17] Actually that was unneeded; realloc will also allocate for the first time --- sklearn/tree/_utils.pyx | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index b7e8a89b8aabd..25d79259986d6 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -34,12 +34,7 @@ cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except *: with gil: raise MemoryError("could not allocate (%d * %d) bytes" % (nelems, sizeof(p[0][0]))) - cdef realloc_ptr tmp = NULL - if p[0] == NULL: - # First alloc - tmp = malloc(nbytes) - else: - tmp = realloc(p[0], nbytes) + cdef realloc_ptr tmp = realloc(p[0], nbytes) if tmp == NULL: with gil: raise MemoryError("could not allocate %d bytes" % nbytes) From e653f6f5f4a5fa7267b4980dfd613343910b91c6 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Tue, 27 Dec 2016 17:57:46 +0100 Subject: [PATCH 13/17] StackRecord*, PriorityHeapRecord* to fused type realloc_ptr; Use safe_realloc --- sklearn/tree/_utils.pxd | 4 +++- sklearn/tree/_utils.pyx | 24 ++++++------------------ 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index 621f1282b6be2..364b70c757a06 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -40,6 +40,8 @@ ctypedef fused realloc_ptr: (DOUBLE_t**) (Node*) (Node**) + (StackRecord*) + (PriorityHeapRecord*) cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except * @@ -79,7 +81,7 @@ cdef class Stack: cdef bint is_empty(self) nogil cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent, bint is_left, double impurity, - SIZE_t n_constant_features) nogil + SIZE_t n_constant_features) nogil except * cdef int pop(self, StackRecord* res) nogil diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index 25d79259986d6..1ae48cbfb43f1 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -13,7 +13,6 @@ from libc.stdlib cimport free from libc.stdlib cimport malloc -from libc.stdlib cimport calloc from libc.stdlib cimport realloc from libc.math cimport log as ln @@ -122,7 +121,7 @@ cdef class Stack: cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent, bint is_left, double impurity, - SIZE_t n_constant_features) nogil: + SIZE_t n_constant_features) nogil except *: """Push a new element onto the stack. Returns 0 if successful; -1 on out of memory error. @@ -133,12 +132,8 @@ cdef class Stack: # Resize if capacity not sufficient if top >= self.capacity: self.capacity *= 2 - stack = realloc(self.stack_, - self.capacity * sizeof(StackRecord)) - if stack == NULL: - # no free; __dealloc__ handles that - return -1 - self.stack_ = stack + # Since safe_realloc can raise MemoryError, use `except *` + safe_realloc(&self.stack_, self.capacity) stack = self.stack_ stack[top].start = start @@ -232,9 +227,7 @@ cdef class PriorityHeap: def __cinit__(self, SIZE_t capacity): self.capacity = capacity self.heap_ptr = 0 - self.heap_ = malloc(capacity * sizeof(PriorityHeapRecord)) - if self.heap_ == NULL: - raise MemoryError() + safe_realloc(&self.heap_, capacity) def __dealloc__(self): free(self.heap_) @@ -256,13 +249,8 @@ cdef class PriorityHeap: # Resize if capacity not sufficient if heap_ptr >= self.capacity: self.capacity *= 2 - heap = realloc(self.heap_, - self.capacity * - sizeof(PriorityHeapRecord)) - if heap == NULL: - # no free; __dealloc__ handles that - return -1 - self.heap_ = heap + # Since safe_realloc can raise MemoryError, use `except *` + safe_realloc(&self.heap_, self.capacity) # Put element as last element of heap heap = self.heap_ From 34adc809c8b44699fb5b4b1cbbb0c7ff509aba5e Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Tue, 27 Dec 2016 21:31:10 +0100 Subject: [PATCH 14/17] Use except -1 to propagate exceptions. This should avoid overheads --- sklearn/tree/_criterion.pxd | 12 +++--- sklearn/tree/_criterion.pyx | 72 +++++++++++++++++++++--------------- sklearn/tree/_splitter.pxd | 18 ++++----- sklearn/tree/_splitter.pyx | 74 ++++++++++++++++++++++--------------- sklearn/tree/_tree.pxd | 6 +-- sklearn/tree/_tree.pyx | 13 ++++--- sklearn/tree/_utils.pxd | 12 +++--- sklearn/tree/_utils.pyx | 28 ++++++++------ 8 files changed, 134 insertions(+), 101 deletions(-) diff --git a/sklearn/tree/_criterion.pxd b/sklearn/tree/_criterion.pxd index cf6d32d1b7fe1..90066ea56daf0 100644 --- a/sklearn/tree/_criterion.pxd +++ b/sklearn/tree/_criterion.pxd @@ -53,12 +53,12 @@ cdef class Criterion: # statistics correspond to samples[start:pos] and samples[pos:end]. # Methods - cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil - cdef void reset(self) nogil - cdef void reverse_reset(self) nogil - cdef void update(self, SIZE_t new_pos) nogil + cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, + double weighted_n_samples, SIZE_t* samples, SIZE_t start, + SIZE_t end) nogil except -1 + cdef int reset(self) nogil except -1 + cdef int reverse_reset(self) nogil except -1 + cdef int update(self, SIZE_t new_pos) nogil except -1 cdef double node_impurity(self) nogil cdef void children_impurity(self, double* impurity_left, double* impurity_right) nogil diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index 52f02aa4f0658..62dad8e31ea2e 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -51,9 +51,9 @@ cdef class Criterion: def __setstate__(self, d): pass - cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil: + cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, + double weighted_n_samples, SIZE_t* samples, SIZE_t start, + SIZE_t end) nogil except -1: """Placeholder for a method which will initialize the criterion. Parameters @@ -79,7 +79,7 @@ cdef class Criterion: pass - cdef void reset(self) nogil: + cdef int reset(self) nogil except -1: """Reset the criterion at pos=start. This method must be implemented by the subclass. @@ -87,14 +87,14 @@ cdef class Criterion: pass - cdef void reverse_reset(self) nogil: + cdef int reverse_reset(self) nogil except -1: """Reset the criterion at pos=end. This method must be implemented by the subclass. """ pass - cdef void update(self, SIZE_t new_pos) nogil: + cdef int update(self, SIZE_t new_pos) nogil except -1: """Updated statistics by moving samples[pos:new_pos] to the left child. This updates the collected statistics by moving samples[pos:new_pos] @@ -281,9 +281,9 @@ cdef class ClassificationCriterion(Criterion): sizet_ptr_to_ndarray(self.n_classes, self.n_outputs)), self.__getstate__()) - cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, - DOUBLE_t* sample_weight, double weighted_n_samples, - SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except *: + cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, + DOUBLE_t* sample_weight, double weighted_n_samples, + SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1: """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end]. @@ -347,8 +347,9 @@ cdef class ClassificationCriterion(Criterion): # Reset to pos=start self.reset() + return 0 - cdef void reset(self) nogil: + cdef int reset(self) nogil except -1: """Reset the criterion at pos=start.""" self.pos = self.start @@ -371,7 +372,7 @@ cdef class ClassificationCriterion(Criterion): sum_left += self.sum_stride sum_right += self.sum_stride - cdef void reverse_reset(self) nogil: + cdef int reverse_reset(self) nogil except -1: """Reset the criterion at pos=end.""" self.pos = self.end @@ -393,7 +394,7 @@ cdef class ClassificationCriterion(Criterion): sum_left += self.sum_stride sum_right += self.sum_stride - cdef void update(self, SIZE_t new_pos) nogil: + cdef int update(self, SIZE_t new_pos) nogil except -1: """Updated statistics by moving samples[pos:new_pos] to the left child. Parameters @@ -470,6 +471,7 @@ cdef class ClassificationCriterion(Criterion): sum_total += self.sum_stride self.pos = new_pos + return 0 cdef double node_impurity(self) nogil: pass @@ -736,9 +738,9 @@ cdef class RegressionCriterion(Criterion): def __reduce__(self): return (type(self), (self.n_outputs, self.n_samples), self.__getstate__()) - cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil: + cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, + double weighted_n_samples, SIZE_t* samples, SIZE_t start, + SIZE_t end) nogil except -1: """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end].""" # Initialize fields @@ -778,8 +780,9 @@ cdef class RegressionCriterion(Criterion): # Reset to pos=start self.reset() + return 0 - cdef void reset(self) nogil: + cdef int reset(self) nogil except -1: """Reset the criterion at pos=start.""" cdef SIZE_t n_bytes = self.n_outputs * sizeof(double) memset(self.sum_left, 0, n_bytes) @@ -788,8 +791,9 @@ cdef class RegressionCriterion(Criterion): self.weighted_n_left = 0.0 self.weighted_n_right = self.weighted_n_node_samples self.pos = self.start + return 0 - cdef void reverse_reset(self) nogil: + cdef int reverse_reset(self) nogil except -1: """Reset the criterion at pos=end.""" cdef SIZE_t n_bytes = self.n_outputs * sizeof(double) memset(self.sum_right, 0, n_bytes) @@ -798,8 +802,9 @@ cdef class RegressionCriterion(Criterion): self.weighted_n_right = 0.0 self.weighted_n_left = self.weighted_n_node_samples self.pos = self.end + return 0 - cdef void update(self, SIZE_t new_pos) nogil: + cdef int update(self, SIZE_t new_pos) nogil except -1: """Updated statistics by moving samples[pos:new_pos] to the left.""" cdef double* sum_left = self.sum_left @@ -859,6 +864,7 @@ cdef class RegressionCriterion(Criterion): sum_right[k] = sum_total[k] - sum_left[k] self.pos = new_pos + return 0 cdef double node_impurity(self) nogil: pass @@ -1028,9 +1034,9 @@ cdef class MAE(RegressionCriterion): self.left_child[k] = WeightedMedianCalculator(n_samples) self.right_child[k] = WeightedMedianCalculator(n_samples) - cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, - double weighted_n_samples, SIZE_t* samples, SIZE_t start, - SIZE_t end) nogil except *: + cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, + double weighted_n_samples, SIZE_t* samples, SIZE_t start, + SIZE_t end) nogil except -1: """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end].""" @@ -1068,7 +1074,7 @@ cdef class MAE(RegressionCriterion): for k in range(self.n_outputs): y_ik = y[i * y_stride + k] - # push method ends up calling safe_realloc, hence `except *` + # push method ends up calling safe_realloc, hence `except -1` # push all values to the right side, # since pos = start initially anyway ( right_child[k]).push(y_ik, w) @@ -1080,9 +1086,11 @@ cdef class MAE(RegressionCriterion): # Reset to pos=start self.reset() + return 0 - cdef void reset(self) nogil except *: - """Reset the criterion at pos=start.""" + cdef int reset(self) nogil except -1: + """Reset the criterion at pos=start. Returns -1 upon error and + 0 when succeeded.""" cdef SIZE_t i, k cdef DOUBLE_t value @@ -1104,12 +1112,14 @@ cdef class MAE(RegressionCriterion): # remove everything from left and put it into right ( left_child[k]).pop(&value, &weight) - # push method ends up calling safe_realloc, hence `except *` + # push method ends up calling safe_realloc, hence `except -1` ( right_child[k]).push(value, weight) + return 0 - cdef void reverse_reset(self) nogil except *: - """Reset the criterion at pos=end.""" + cdef int reverse_reset(self) nogil except -1: + """Reset the criterion at pos=end. Returns -1 upon error and 0 when + succeeded.""" self.weighted_n_right = 0.0 self.weighted_n_left = self.weighted_n_node_samples @@ -1128,11 +1138,12 @@ cdef class MAE(RegressionCriterion): # remove everything from right and put it into left ( right_child[k]).pop(&value, &weight) - # push method ends up calling safe_realloc, hence `except *` + # push method ends up calling safe_realloc, hence `except -1` ( left_child[k]).push(value, weight) + return 0 - cdef void update(self, SIZE_t new_pos) nogil except *: + cdef int update(self, SIZE_t new_pos) nogil except -1: """Updated statistics by moving samples[pos:new_pos] to the left.""" cdef DOUBLE_t* sample_weight = self.sample_weight @@ -1165,7 +1176,7 @@ cdef class MAE(RegressionCriterion): y_ik = y[i * self.y_stride + k] # remove y_ik and its weight w from right and add to left ( right_child[k]).remove(y_ik, w) - # push method ends up calling safe_realloc, hence except * + # push method ends up calling safe_realloc, hence except -1 ( left_child[k]).push(y_ik, w) self.weighted_n_left += w @@ -1189,6 +1200,7 @@ cdef class MAE(RegressionCriterion): self.weighted_n_right = (self.weighted_n_node_samples - self.weighted_n_left) self.pos = new_pos + return 0 cdef void node_value(self, double* dest) nogil: """Computes the node value of samples[start:end] into dest.""" diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd index b461d5421b22b..4d5c5ae46bceb 100644 --- a/sklearn/tree/_splitter.pxd +++ b/sklearn/tree/_splitter.pxd @@ -81,17 +81,17 @@ cdef class Splitter: # This allows optimization with depth-based tree building. # Methods - cdef void init(self, object X, np.ndarray y, - DOUBLE_t* sample_weight, - np.ndarray X_idx_sorted=*) except * + cdef int init(self, object X, np.ndarray y, + DOUBLE_t* sample_weight, + np.ndarray X_idx_sorted=*) except -1 - cdef void node_reset(self, SIZE_t start, SIZE_t end, - double* weighted_n_node_samples) nogil except * + cdef int node_reset(self, SIZE_t start, SIZE_t end, + double* weighted_n_node_samples) nogil except -1 - cdef void node_split(self, - double impurity, # Impurity of the node - SplitRecord* split, - SIZE_t* n_constant_features) nogil except * + cdef int node_split(self, + double impurity, # Impurity of the node + SplitRecord* split, + SIZE_t* n_constant_features) nogil except -1 cdef void node_value(self, double* dest) nogil diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index 76cf229fadb67..106526cf26c6a 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -116,15 +116,17 @@ cdef class Splitter: def __setstate__(self, d): pass - cdef void init(self, + cdef int init(self, object X, np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, DOUBLE_t* sample_weight, - np.ndarray X_idx_sorted=None) except *: + np.ndarray X_idx_sorted=None) except -1: """Initialize the splitter. Take in the input data X, the target Y, and optional sample weights. + Returns -1 if memory allocation failed. + Parameters ---------- X : object @@ -180,9 +182,10 @@ cdef class Splitter: self.y_stride = y.strides[0] / y.itemsize self.sample_weight = sample_weight + return 0 - cdef void node_reset(self, SIZE_t start, SIZE_t end, - double* weighted_n_node_samples) nogil except *: + cdef int node_reset(self, SIZE_t start, SIZE_t end, + double* weighted_n_node_samples) nogil except -1: """Reset splitter on node samples[start:end]. Parameters @@ -207,13 +210,16 @@ cdef class Splitter: end) weighted_n_node_samples[0] = self.criterion.weighted_n_node_samples + return 0 - cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil except *: + cdef int node_split(self, double impurity, SplitRecord* split, + SIZE_t* n_constant_features) nogil except -1: """Find the best split on node samples[start:end]. This is a placeholder method. The majority of computation will be done here. + + It should return -1 upon errors. """ pass @@ -257,12 +263,12 @@ cdef class BaseDenseSplitter(Splitter): if self.presort == 1: free(self.sample_mask) - cdef void init(self, - object X, - np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, - DOUBLE_t* sample_weight, - np.ndarray X_idx_sorted=None) except *: - """Initialize the splitter.""" + cdef int init(self, + object X, + np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, + DOUBLE_t* sample_weight, + np.ndarray X_idx_sorted=None) except -1: + """Initialize the splitter. Return -1 if memory allocation failed.""" # Call parent init Splitter.init(self, X, y, sample_weight) @@ -284,6 +290,8 @@ cdef class BaseDenseSplitter(Splitter): safe_realloc(&self.sample_mask, self.n_total_samples) memset(self.sample_mask, 0, self.n_total_samples*sizeof(SIZE_t)) + return 0 + cdef class BestSplitter(BaseDenseSplitter): """Splitter for finding the best split.""" @@ -295,9 +303,10 @@ cdef class BestSplitter(BaseDenseSplitter): self.random_state, self.presort), self.__getstate__()) - cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil except *: - """Find the best split on node samples[start:end].""" + cdef int node_split(self, double impurity, SplitRecord* split, + SIZE_t* n_constant_features) nogil except -1: + """Find the best split on node samples[start:end]. + Returns -1 upon errors""" # Find the best split cdef SIZE_t* samples = self.samples cdef SIZE_t start = self.start @@ -509,6 +518,7 @@ cdef class BestSplitter(BaseDenseSplitter): # Return values split[0] = best n_constant_features[0] = n_total_constants + return 0 # Sort n-element arrays pointed to by Xf and samples, simultaneously, @@ -633,9 +643,10 @@ cdef class RandomSplitter(BaseDenseSplitter): self.random_state, self.presort), self.__getstate__()) - cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil except *: - """Find the best random split on node samples[start:end].""" + cdef int node_split(self, double impurity, SplitRecord* split, + SIZE_t* n_constant_features) nogil except -1: + """Find the best random split on node samples[start:end]. + Returns -1 upon errors and 0 when succeeded""" # Draw random splits and pick the best cdef SIZE_t* samples = self.samples cdef SIZE_t start = self.start @@ -835,6 +846,7 @@ cdef class RandomSplitter(BaseDenseSplitter): # Return values split[0] = best n_constant_features[0] = n_total_constants + return 0 cdef class BaseSparseSplitter(Splitter): @@ -867,12 +879,13 @@ cdef class BaseSparseSplitter(Splitter): free(self.index_to_samples) free(self.sorted_samples) - cdef void init(self, - object X, - np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, - DOUBLE_t* sample_weight, - np.ndarray X_idx_sorted=None) except *: - """Initialize the splitter.""" + cdef int init(self, + object X, + np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, + DOUBLE_t* sample_weight, + np.ndarray X_idx_sorted=None) except -1: + """Initialize the splitter. Returns -1 upon errors and 0 when + succeeded""" # Call parent init Splitter.init(self, X, y, sample_weight) @@ -905,6 +918,7 @@ cdef class BaseSparseSplitter(Splitter): for p in range(n_samples): index_to_samples[samples[p]] = p + return 0 cdef inline SIZE_t _partition(self, double threshold, SIZE_t end_negative, SIZE_t start_positive, @@ -1168,10 +1182,10 @@ cdef class BestSparseSplitter(BaseSparseSplitter): self.random_state, self.presort), self.__getstate__()) - cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil except *: + cdef int node_split(self, double impurity, SplitRecord* split, + SIZE_t* n_constant_features) nogil except -1: """Find the best split on node samples[start:end], using sparse - features. + features. Returns -1 upon memory errors. """ # Find the best split cdef SIZE_t* samples = self.samples @@ -1382,6 +1396,7 @@ cdef class BestSparseSplitter(BaseSparseSplitter): # Return values split[0] = best n_constant_features[0] = n_total_constants + return 0 cdef class RandomSparseSplitter(BaseSparseSplitter): @@ -1395,8 +1410,8 @@ cdef class RandomSparseSplitter(BaseSparseSplitter): self.random_state, self.presort), self.__getstate__()) - cdef void node_split(self, double impurity, SplitRecord* split, - SIZE_t* n_constant_features) nogil except *: + cdef int node_split(self, double impurity, SplitRecord* split, + SIZE_t* n_constant_features) nogil except -1: """Find a random split on node samples[start:end], using sparse features. """ @@ -1610,3 +1625,4 @@ cdef class RandomSparseSplitter(BaseSparseSplitter): # Return values split[0] = best n_constant_features[0] = n_total_constants + return 0 diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd index 894aee2a133f4..4f9f359725646 100644 --- a/sklearn/tree/_tree.pxd +++ b/sklearn/tree/_tree.pxd @@ -58,9 +58,9 @@ cdef class Tree: cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf, SIZE_t feature, double threshold, double impurity, SIZE_t n_node_samples, - double weighted_n_samples) nogil except * - cdef void _resize(self, SIZE_t capacity) nogil except * - cdef int _resize_c(self, SIZE_t capacity=*) nogil except * + double weighted_n_samples) nogil except -1 + cdef int _resize(self, SIZE_t capacity) nogil except -1 + cdef int _resize_c(self, SIZE_t capacity=*) nogil except -1 cdef np.ndarray _get_value_ndarray(self) cdef np.ndarray _get_node_ndarray(self) diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index 48dd95aea9c6c..26b2cb5c88272 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -271,7 +271,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder): # Best first builder ---------------------------------------------------------- cdef inline int _add_to_frontier(PriorityHeapRecord* rec, - PriorityHeap frontier) nogil except *: + PriorityHeap frontier) nogil except -1: """Adds record ``rec`` to the priority queue ``frontier``; returns -1 on memory-error. """ return frontier.push(rec.node_id, rec.start, rec.end, rec.pos, rec.depth, @@ -416,7 +416,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder): SIZE_t start, SIZE_t end, double impurity, bint is_first, bint is_left, Node* parent, SIZE_t depth, - PriorityHeapRecord* res) nogil except *: + PriorityHeapRecord* res) nogil except -1: """Adds node w/ partition ``[start, end)`` to the frontier. """ cdef SplitRecord split cdef SIZE_t node_id @@ -656,9 +656,10 @@ cdef class Tree: value = memcpy(self.value, ( value_ndarray).data, self.capacity * self.value_stride * sizeof(double)) - cdef void _resize(self, SIZE_t capacity) nogil except *: + cdef int _resize(self, SIZE_t capacity) nogil except -1: """Resize all inner arrays to `capacity`, if `capacity` == -1, then - double the size of the inner arrays.""" + double the size of the inner arrays. Returns -1 if memory + allocation failed.""" if self._resize_c(capacity) != 0: # Acquire gil only if we need to raise with gil: @@ -666,7 +667,7 @@ cdef class Tree: # XXX using (size_t)(-1) is ugly, but SIZE_MAX is not available in C89 # (i.e., older MSVC). - cdef int _resize_c(self, SIZE_t capacity=(-1)) nogil except *: + cdef int _resize_c(self, SIZE_t capacity=(-1)) nogil except -1: """Guts of _resize. Returns 0 for success, -1 for error.""" if capacity == self.capacity and self.nodes != NULL: return 0 @@ -696,7 +697,7 @@ cdef class Tree: cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf, SIZE_t feature, double threshold, double impurity, SIZE_t n_node_samples, - double weighted_n_node_samples) nogil except *: + double weighted_n_node_samples) nogil except -1: """Add a node to the tree. The new node registers itself as the child of its parent. diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index 364b70c757a06..70c2a95be4fe8 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -81,7 +81,7 @@ cdef class Stack: cdef bint is_empty(self) nogil cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent, bint is_left, double impurity, - SIZE_t n_constant_features) nogil except * + SIZE_t n_constant_features) nogil except -1 cdef int pop(self, StackRecord* res) nogil @@ -111,7 +111,7 @@ cdef class PriorityHeap: cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos, SIZE_t depth, bint is_leaf, double improvement, double impurity, double impurity_left, - double impurity_right) nogil except * + double impurity_right) nogil except -1 cdef int pop(self, PriorityHeapRecord* res) nogil # ============================================================================= @@ -129,9 +129,9 @@ cdef class WeightedPQueue: cdef WeightedPQueueRecord* array_ cdef bint is_empty(self) nogil - cdef void reset(self) nogil except * + cdef int reset(self) nogil except -1 cdef SIZE_t size(self) nogil - cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except * + cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1 cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil @@ -152,8 +152,8 @@ cdef class WeightedMedianCalculator: # = w[0] + w[1] + ... + w[k-1] cdef SIZE_t size(self) nogil - cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except * - cdef void reset(self) nogil except * + cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1 + cdef int reset(self) nogil except -1 cdef int update_median_parameters_post_push( self, DOUBLE_t data, DOUBLE_t weight, DOUBLE_t original_median) nogil diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index 1ae48cbfb43f1..cd4ecc70ea67d 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -121,7 +121,7 @@ cdef class Stack: cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent, bint is_left, double impurity, - SIZE_t n_constant_features) nogil except *: + SIZE_t n_constant_features) nogil except -1: """Push a new element onto the stack. Returns 0 if successful; -1 on out of memory error. @@ -132,7 +132,7 @@ cdef class Stack: # Resize if capacity not sufficient if top >= self.capacity: self.capacity *= 2 - # Since safe_realloc can raise MemoryError, use `except *` + # Since safe_realloc can raise MemoryError, use `except -1` safe_realloc(&self.stack_, self.capacity) stack = self.stack_ @@ -238,7 +238,7 @@ cdef class PriorityHeap: cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos, SIZE_t depth, bint is_leaf, double improvement, double impurity, double impurity_left, - double impurity_right) nogil except *: + double impurity_right) nogil except -1: """Push record on the priority heap. Returns 0 if successful; -1 on out of memory error. @@ -249,7 +249,7 @@ cdef class PriorityHeap: # Resize if capacity not sufficient if heap_ptr >= self.capacity: self.capacity *= 2 - # Since safe_realloc can raise MemoryError, use `except *` + # Since safe_realloc can raise MemoryError, use `except -1` safe_realloc(&self.heap_, self.capacity) # Put element as last element of heap @@ -328,11 +328,13 @@ cdef class WeightedPQueue: def __dealloc__(self): free(self.array_) - cdef void reset(self) nogil except *: - """Reset the WeightedPQueue to its state at construction""" + cdef int reset(self) nogil except -1: + """Reset the WeightedPQueue to its state at construction returns 0 + upon success and -1 upon memory error.""" self.array_ptr = 0 # Since safe_realloc can raise MemoryError, use `except *` safe_realloc(&self.array_, self.capacity) + return 0 cdef bint is_empty(self) nogil: return self.array_ptr <= 0 @@ -340,7 +342,7 @@ cdef class WeightedPQueue: cdef SIZE_t size(self) nogil: return self.array_ptr - cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except *: + cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1: """Push record on the array. Returns 0 if successful; -1 on out of memory error. """ @@ -351,7 +353,7 @@ cdef class WeightedPQueue: # Resize if capacity not sufficient if array_ptr >= self.capacity: self.capacity *= 2 - # Since safe_realloc can raise MemoryError, use `except *` + # Since safe_realloc can raise MemoryError, use `except -1` safe_realloc(&self.array_, self.capacity) # Put element as last element of array @@ -496,15 +498,17 @@ cdef class WeightedMedianCalculator: WeightedMedianCalculator""" return self.samples.size() - cdef void reset(self) nogil except *: + cdef int reset(self) nogil except -1: """Reset the WeightedMedianCalculator to its state at construction""" - # samples.reset (WeightedPQueue.reset) uses safe_realloc, hence except* + # samples.reset (WeightedPQueue.reset) uses safe_realloc, hence + # except -1 self.samples.reset() self.total_weight = 0 self.k = 0 self.sum_w_0_k = 0 + return 0 - cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except *: + cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1: """Push a value and its associated weight to the WeightedMedianCalculator to be considered in the median calculation. @@ -514,7 +518,7 @@ cdef class WeightedMedianCalculator: if self.size() != 0: original_median = self.get_median() - # samples.push (WeightedPQueue.push) uses safe_realloc, hence except * + # samples.push (WeightedPQueue.push) uses safe_realloc, hence except -1 return_value = self.samples.push(data, weight) self.update_median_parameters_post_push(data, weight, original_median) From 2f3bcede544fb1e17ed8232b3eba44ecb45f205d Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Tue, 17 Jan 2017 03:13:02 +0100 Subject: [PATCH 15/17] Fix docstrings and add return 0 to reset methods --- sklearn/tree/_criterion.pyx | 42 +++++++++++++++++++++++++++++------ sklearn/tree/_splitter.pyx | 44 +++++++++++++++++++++++++++---------- sklearn/tree/_utils.pyx | 30 +++++++++++++++++-------- 3 files changed, 88 insertions(+), 28 deletions(-) diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index 62dad8e31ea2e..18360f65a1ab4 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -56,6 +56,9 @@ cdef class Criterion: SIZE_t end) nogil except -1: """Placeholder for a method which will initialize the criterion. + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + Parameters ---------- y : array-like, dtype=DOUBLE_t @@ -287,6 +290,9 @@ cdef class ClassificationCriterion(Criterion): """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end]. + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + Parameters ---------- y : array-like, dtype=DOUBLE_t @@ -350,8 +356,11 @@ cdef class ClassificationCriterion(Criterion): return 0 cdef int reset(self) nogil except -1: - """Reset the criterion at pos=start.""" + """Reset the criterion at pos=start + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ self.pos = self.start self.weighted_n_left = 0.0 @@ -371,9 +380,14 @@ cdef class ClassificationCriterion(Criterion): sum_total += self.sum_stride sum_left += self.sum_stride sum_right += self.sum_stride + return 0 cdef int reverse_reset(self) nogil except -1: - """Reset the criterion at pos=end.""" + """Reset the criterion at pos=end + + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ self.pos = self.end self.weighted_n_left = self.weighted_n_node_samples @@ -393,10 +407,14 @@ cdef class ClassificationCriterion(Criterion): sum_total += self.sum_stride sum_left += self.sum_stride sum_right += self.sum_stride + return 0 cdef int update(self, SIZE_t new_pos) nogil except -1: """Updated statistics by moving samples[pos:new_pos] to the left child. + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + Parameters ---------- new_pos : SIZE_t @@ -1089,8 +1107,11 @@ cdef class MAE(RegressionCriterion): return 0 cdef int reset(self) nogil except -1: - """Reset the criterion at pos=start. Returns -1 upon error and - 0 when succeeded.""" + """Reset the criterion at pos=start + + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ cdef SIZE_t i, k cdef DOUBLE_t value @@ -1118,8 +1139,11 @@ cdef class MAE(RegressionCriterion): return 0 cdef int reverse_reset(self) nogil except -1: - """Reset the criterion at pos=end. Returns -1 upon error and 0 when - succeeded.""" + """Reset the criterion at pos=end + + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ self.weighted_n_right = 0.0 self.weighted_n_left = self.weighted_n_node_samples @@ -1144,7 +1168,11 @@ cdef class MAE(RegressionCriterion): return 0 cdef int update(self, SIZE_t new_pos) nogil except -1: - """Updated statistics by moving samples[pos:new_pos] to the left.""" + """Updated statistics by moving samples[pos:new_pos] to the left + + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ cdef DOUBLE_t* sample_weight = self.sample_weight cdef SIZE_t* samples = self.samples diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index 106526cf26c6a..f8bf3db982cd8 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -125,7 +125,8 @@ cdef class Splitter: Take in the input data X, the target Y, and optional sample weights. - Returns -1 if memory allocation failed. + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. Parameters ---------- @@ -188,6 +189,9 @@ cdef class Splitter: double* weighted_n_node_samples) nogil except -1: """Reset splitter on node samples[start:end]. + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + Parameters ---------- start : SIZE_t @@ -268,7 +272,11 @@ cdef class BaseDenseSplitter(Splitter): np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, DOUBLE_t* sample_weight, np.ndarray X_idx_sorted=None) except -1: - """Initialize the splitter. Return -1 if memory allocation failed.""" + """Initialize the splitter + + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ # Call parent init Splitter.init(self, X, y, sample_weight) @@ -305,8 +313,11 @@ cdef class BestSplitter(BaseDenseSplitter): cdef int node_split(self, double impurity, SplitRecord* split, SIZE_t* n_constant_features) nogil except -1: - """Find the best split on node samples[start:end]. - Returns -1 upon errors""" + """Find the best split on node samples[start:end] + + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ # Find the best split cdef SIZE_t* samples = self.samples cdef SIZE_t start = self.start @@ -645,8 +656,11 @@ cdef class RandomSplitter(BaseDenseSplitter): cdef int node_split(self, double impurity, SplitRecord* split, SIZE_t* n_constant_features) nogil except -1: - """Find the best random split on node samples[start:end]. - Returns -1 upon errors and 0 when succeeded""" + """Find the best random split on node samples[start:end] + + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ # Draw random splits and pick the best cdef SIZE_t* samples = self.samples cdef SIZE_t start = self.start @@ -884,9 +898,11 @@ cdef class BaseSparseSplitter(Splitter): np.ndarray[DOUBLE_t, ndim=2, mode="c"] y, DOUBLE_t* sample_weight, np.ndarray X_idx_sorted=None) except -1: - """Initialize the splitter. Returns -1 upon errors and 0 when - succeeded""" + """Initialize the splitter + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ # Call parent init Splitter.init(self, X, y, sample_weight) @@ -1184,8 +1200,10 @@ cdef class BestSparseSplitter(BaseSparseSplitter): cdef int node_split(self, double impurity, SplitRecord* split, SIZE_t* n_constant_features) nogil except -1: - """Find the best split on node samples[start:end], using sparse - features. Returns -1 upon memory errors. + """Find the best split on node samples[start:end], using sparse features + + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. """ # Find the best split cdef SIZE_t* samples = self.samples @@ -1412,8 +1430,10 @@ cdef class RandomSparseSplitter(BaseSparseSplitter): cdef int node_split(self, double impurity, SplitRecord* split, SIZE_t* n_constant_features) nogil except -1: - """Find a random split on node samples[start:end], using sparse - features. + """Find a random split on node samples[start:end], using sparse features + + Returns -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. """ # Find the best split cdef SIZE_t* samples = self.samples diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index cd4ecc70ea67d..9f0dbd96dc8a2 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -124,7 +124,8 @@ cdef class Stack: SIZE_t n_constant_features) nogil except -1: """Push a new element onto the stack. - Returns 0 if successful; -1 on out of memory error. + Return -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. """ cdef SIZE_t top = self.top cdef StackRecord* stack = NULL @@ -241,7 +242,8 @@ cdef class PriorityHeap: double impurity_right) nogil except -1: """Push record on the priority heap. - Returns 0 if successful; -1 on out of memory error. + Return -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. """ cdef SIZE_t heap_ptr = self.heap_ptr cdef PriorityHeapRecord* heap = NULL @@ -329,8 +331,11 @@ cdef class WeightedPQueue: free(self.array_) cdef int reset(self) nogil except -1: - """Reset the WeightedPQueue to its state at construction returns 0 - upon success and -1 upon memory error.""" + """Reset the WeightedPQueue to its state at construction + + Return -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ self.array_ptr = 0 # Since safe_realloc can raise MemoryError, use `except *` safe_realloc(&self.array_, self.capacity) @@ -344,7 +349,9 @@ cdef class WeightedPQueue: cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1: """Push record on the array. - Returns 0 if successful; -1 on out of memory error. + + Return -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. """ cdef SIZE_t array_ptr = self.array_ptr cdef WeightedPQueueRecord* array = NULL @@ -499,7 +506,11 @@ cdef class WeightedMedianCalculator: return self.samples.size() cdef int reset(self) nogil except -1: - """Reset the WeightedMedianCalculator to its state at construction""" + """Reset the WeightedMedianCalculator to its state at construction + + Return -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. + """ # samples.reset (WeightedPQueue.reset) uses safe_realloc, hence # except -1 self.samples.reset() @@ -509,9 +520,10 @@ cdef class WeightedMedianCalculator: return 0 cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1: - """Push a value and its associated weight - to the WeightedMedianCalculator to be considered - in the median calculation. + """Push a value and its associated weight to the WeightedMedianCalculator + + Return -1 in case of failure to alocate memory (and raise MemoryError) + or 0 otherwise. """ cdef int return_value cdef DOUBLE_t original_median From 9b12a69ee072a08f1a34e961e903bcba1e9b260e Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Tue, 17 Jan 2017 04:29:22 +0100 Subject: [PATCH 16/17] TYPO --- sklearn/tree/_criterion.pyx | 16 ++++++++-------- sklearn/tree/_splitter.pyx | 16 ++++++++-------- sklearn/tree/_tree.pyx | 20 +++++++++++++++----- sklearn/tree/_utils.pyx | 12 ++++++------ 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index 18360f65a1ab4..9cebb35e85ad3 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -56,7 +56,7 @@ cdef class Criterion: SIZE_t end) nogil except -1: """Placeholder for a method which will initialize the criterion. - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. Parameters @@ -290,7 +290,7 @@ cdef class ClassificationCriterion(Criterion): """Initialize the criterion at node samples[start:end] and children samples[start:start] and samples[start:end]. - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. Parameters @@ -358,7 +358,7 @@ cdef class ClassificationCriterion(Criterion): cdef int reset(self) nogil except -1: """Reset the criterion at pos=start - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ self.pos = self.start @@ -385,7 +385,7 @@ cdef class ClassificationCriterion(Criterion): cdef int reverse_reset(self) nogil except -1: """Reset the criterion at pos=end - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ self.pos = self.end @@ -412,7 +412,7 @@ cdef class ClassificationCriterion(Criterion): cdef int update(self, SIZE_t new_pos) nogil except -1: """Updated statistics by moving samples[pos:new_pos] to the left child. - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. Parameters @@ -1109,7 +1109,7 @@ cdef class MAE(RegressionCriterion): cdef int reset(self) nogil except -1: """Reset the criterion at pos=start - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ @@ -1141,7 +1141,7 @@ cdef class MAE(RegressionCriterion): cdef int reverse_reset(self) nogil except -1: """Reset the criterion at pos=end - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ @@ -1170,7 +1170,7 @@ cdef class MAE(RegressionCriterion): cdef int update(self, SIZE_t new_pos) nogil except -1: """Updated statistics by moving samples[pos:new_pos] to the left - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index f8bf3db982cd8..06dfab587493c 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -125,7 +125,7 @@ cdef class Splitter: Take in the input data X, the target Y, and optional sample weights. - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. Parameters @@ -189,7 +189,7 @@ cdef class Splitter: double* weighted_n_node_samples) nogil except -1: """Reset splitter on node samples[start:end]. - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. Parameters @@ -274,7 +274,7 @@ cdef class BaseDenseSplitter(Splitter): np.ndarray X_idx_sorted=None) except -1: """Initialize the splitter - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ @@ -315,7 +315,7 @@ cdef class BestSplitter(BaseDenseSplitter): SIZE_t* n_constant_features) nogil except -1: """Find the best split on node samples[start:end] - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ # Find the best split @@ -658,7 +658,7 @@ cdef class RandomSplitter(BaseDenseSplitter): SIZE_t* n_constant_features) nogil except -1: """Find the best random split on node samples[start:end] - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ # Draw random splits and pick the best @@ -900,7 +900,7 @@ cdef class BaseSparseSplitter(Splitter): np.ndarray X_idx_sorted=None) except -1: """Initialize the splitter - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ # Call parent init @@ -1202,7 +1202,7 @@ cdef class BestSparseSplitter(BaseSparseSplitter): SIZE_t* n_constant_features) nogil except -1: """Find the best split on node samples[start:end], using sparse features - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ # Find the best split @@ -1432,7 +1432,7 @@ cdef class RandomSparseSplitter(BaseSparseSplitter): SIZE_t* n_constant_features) nogil except -1: """Find a random split on node samples[start:end], using sparse features - Returns -1 in case of failure to alocate memory (and raise MemoryError) + Returns -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ # Find the best split diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index 26b2cb5c88272..69ab8572d2ae5 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -272,8 +272,11 @@ cdef class DepthFirstTreeBuilder(TreeBuilder): cdef inline int _add_to_frontier(PriorityHeapRecord* rec, PriorityHeap frontier) nogil except -1: - """Adds record ``rec`` to the priority queue ``frontier``; returns -1 - on memory-error. """ + """Adds record ``rec`` to the priority queue ``frontier`` + + Returns -1 in case of failure to allocate memory (and raise MemoryError) + or 0 otherwise. + """ return frontier.push(rec.node_id, rec.start, rec.end, rec.pos, rec.depth, rec.is_leaf, rec.improvement, rec.impurity, rec.impurity_left, rec.impurity_right) @@ -658,8 +661,11 @@ cdef class Tree: cdef int _resize(self, SIZE_t capacity) nogil except -1: """Resize all inner arrays to `capacity`, if `capacity` == -1, then - double the size of the inner arrays. Returns -1 if memory - allocation failed.""" + double the size of the inner arrays. + + Returns -1 in case of failure to allocate memory (and raise MemoryError) + or 0 otherwise. + """ if self._resize_c(capacity) != 0: # Acquire gil only if we need to raise with gil: @@ -668,7 +674,11 @@ cdef class Tree: # XXX using (size_t)(-1) is ugly, but SIZE_MAX is not available in C89 # (i.e., older MSVC). cdef int _resize_c(self, SIZE_t capacity=(-1)) nogil except -1: - """Guts of _resize. Returns 0 for success, -1 for error.""" + """Guts of _resize + + Returns -1 in case of failure to allocate memory (and raise MemoryError) + or 0 otherwise. + """ if capacity == self.capacity and self.nodes != NULL: return 0 diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index 9f0dbd96dc8a2..be107ed5487db 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -124,7 +124,7 @@ cdef class Stack: SIZE_t n_constant_features) nogil except -1: """Push a new element onto the stack. - Return -1 in case of failure to alocate memory (and raise MemoryError) + Return -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ cdef SIZE_t top = self.top @@ -242,7 +242,7 @@ cdef class PriorityHeap: double impurity_right) nogil except -1: """Push record on the priority heap. - Return -1 in case of failure to alocate memory (and raise MemoryError) + Return -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ cdef SIZE_t heap_ptr = self.heap_ptr @@ -333,7 +333,7 @@ cdef class WeightedPQueue: cdef int reset(self) nogil except -1: """Reset the WeightedPQueue to its state at construction - Return -1 in case of failure to alocate memory (and raise MemoryError) + Return -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ self.array_ptr = 0 @@ -350,7 +350,7 @@ cdef class WeightedPQueue: cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1: """Push record on the array. - Return -1 in case of failure to alocate memory (and raise MemoryError) + Return -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ cdef SIZE_t array_ptr = self.array_ptr @@ -508,7 +508,7 @@ cdef class WeightedMedianCalculator: cdef int reset(self) nogil except -1: """Reset the WeightedMedianCalculator to its state at construction - Return -1 in case of failure to alocate memory (and raise MemoryError) + Return -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ # samples.reset (WeightedPQueue.reset) uses safe_realloc, hence @@ -522,7 +522,7 @@ cdef class WeightedMedianCalculator: cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1: """Push a value and its associated weight to the WeightedMedianCalculator - Return -1 in case of failure to alocate memory (and raise MemoryError) + Return -1 in case of failure to allocate memory (and raise MemoryError) or 0 otherwise. """ cdef int return_value From c67ebd78aae0a25b6c1f733dba595017f2e11230 Mon Sep 17 00:00:00 2001 From: Raghav RV Date: Wed, 18 Jan 2017 18:29:00 +0100 Subject: [PATCH 17/17] REVIEW Remove redundant MemoryError raising calls --- sklearn/tree/_criterion.pyx | 3 --- sklearn/tree/_utils.pyx | 5 ----- 2 files changed, 8 deletions(-) diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index 41a850290d592..5187a5066bb2e 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -1042,9 +1042,6 @@ cdef class MAE(RegressionCriterion): # Allocate memory for the accumulators safe_realloc(&self.node_medians, n_outputs) - if (self.node_medians == NULL): - raise MemoryError() - self.left_child = np.empty(n_outputs, dtype='object') self.right_child = np.empty(n_outputs, dtype='object') # initialize WeightedMedianCalculators diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index 6f860adc3a2f5..faf2e5b777448 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -110,8 +110,6 @@ cdef class Stack: self.capacity = capacity self.top = 0 self.stack_ = malloc(capacity * sizeof(StackRecord)) - if self.stack_ == NULL: - raise MemoryError() def __dealloc__(self): free(self.stack_) @@ -322,9 +320,6 @@ cdef class WeightedPQueue: self.array_ptr = 0 safe_realloc(&self.array_, capacity) - if self.array_ == NULL: - raise MemoryError() - def __dealloc__(self): free(self.array_)