scikit-learn · raghavrv · Jan 18, 2017 · Dec 7, 2016 · Dec 7, 2016 · Dec 8, 2016
diff --git a/sklearn/tree/_criterion.pxd b/sklearn/tree/_criterion.pxd
@@ -53,12 +53,12 @@ cdef class Criterion:
     # statistics correspond to samples[start:pos] and samples[pos:end].
 
     # Methods
-    cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
-                   double weighted_n_samples, SIZE_t* samples, SIZE_t start,
-                   SIZE_t end) nogil
-    cdef void reset(self) nogil
-    cdef void reverse_reset(self) nogil
-    cdef void update(self, SIZE_t new_pos) nogil
+    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
+                  double weighted_n_samples, SIZE_t* samples, SIZE_t start,
+                  SIZE_t end) nogil except -1
+    cdef int reset(self) nogil except -1
+    cdef int reverse_reset(self) nogil except -1
+    cdef int update(self, SIZE_t new_pos) nogil except -1
     cdef double node_impurity(self) nogil
     cdef void children_impurity(self, double* impurity_left,
                                 double* impurity_right) nogil

diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx
@@ -51,11 +51,14 @@ cdef class Criterion:
     def __setstate__(self, d):
         pass
 
-    cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
-                   double weighted_n_samples, SIZE_t* samples, SIZE_t start,
-                   SIZE_t end) nogil:
+    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
+                  double weighted_n_samples, SIZE_t* samples, SIZE_t start,
+                  SIZE_t end) nogil except -1:
         """Placeholder for a method which will initialize the criterion.
 
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+
         Parameters
         ----------
         y : array-like, dtype=DOUBLE_t
@@ -79,22 +82,22 @@ cdef class Criterion:
 
         pass
 
-    cdef void reset(self) nogil:
+    cdef int reset(self) nogil except -1:
         """Reset the criterion at pos=start.
 
         This method must be implemented by the subclass.
         """
 
         pass
 
-    cdef void reverse_reset(self) nogil:
+    cdef int reverse_reset(self) nogil except -1:
         """Reset the criterion at pos=end.
 
         This method must be implemented by the subclass.
         """
         pass
 
-    cdef void update(self, SIZE_t new_pos) nogil:
+    cdef int update(self, SIZE_t new_pos) nogil except -1:
         """Updated statistics by moving samples[pos:new_pos] to the left child.
 
         This updates the collected statistics by moving samples[pos:new_pos]
@@ -281,12 +284,15 @@ cdef class ClassificationCriterion(Criterion):
                  sizet_ptr_to_ndarray(self.n_classes, self.n_outputs)),
                 self.__getstate__())
 
-    cdef void init(self, DOUBLE_t* y, SIZE_t y_stride,
-                   DOUBLE_t* sample_weight, double weighted_n_samples,
-                   SIZE_t* samples, SIZE_t start, SIZE_t end) nogil:
+    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride,
+                  DOUBLE_t* sample_weight, double weighted_n_samples,
+                  SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1:
         """Initialize the criterion at node samples[start:end] and
         children samples[start:start] and samples[start:end].
 
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+
         Parameters
         ----------
         y : array-like, dtype=DOUBLE_t
@@ -347,10 +353,14 @@ cdef class ClassificationCriterion(Criterion):
 
         # Reset to pos=start
         self.reset()
+        return 0
 
-    cdef void reset(self) nogil:
-        """Reset the criterion at pos=start."""
+    cdef int reset(self) nogil except -1:
+        """Reset the criterion at pos=start
 
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+        """
         self.pos = self.start
 
         self.weighted_n_left = 0.0
@@ -370,9 +380,14 @@ cdef class ClassificationCriterion(Criterion):
             sum_total += self.sum_stride
             sum_left += self.sum_stride
             sum_right += self.sum_stride
+        return 0
 
-    cdef void reverse_reset(self) nogil:
-        """Reset the criterion at pos=end."""
+    cdef int reverse_reset(self) nogil except -1:
+        """Reset the criterion at pos=end
+
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+        """
         self.pos = self.end
 
         self.weighted_n_left = self.weighted_n_node_samples
@@ -392,10 +407,14 @@ cdef class ClassificationCriterion(Criterion):
             sum_total += self.sum_stride
             sum_left += self.sum_stride
             sum_right += self.sum_stride
+        return 0
 
-    cdef void update(self, SIZE_t new_pos) nogil:
+    cdef int update(self, SIZE_t new_pos) nogil except -1:
         """Updated statistics by moving samples[pos:new_pos] to the left child.
 
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+
         Parameters
         ----------
         new_pos : SIZE_t
@@ -470,6 +489,7 @@ cdef class ClassificationCriterion(Criterion):
             sum_total += self.sum_stride
 
         self.pos = new_pos
+        return 0
 
     cdef double node_impurity(self) nogil:
         pass
@@ -736,9 +756,9 @@ cdef class RegressionCriterion(Criterion):
     def __reduce__(self):
         return (type(self), (self.n_outputs, self.n_samples), self.__getstate__())
 
-    cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
-                   double weighted_n_samples, SIZE_t* samples, SIZE_t start,
-                   SIZE_t end) nogil:
+    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
+                  double weighted_n_samples, SIZE_t* samples, SIZE_t start,
+                  SIZE_t end) nogil except -1:
         """Initialize the criterion at node samples[start:end] and
            children samples[start:start] and samples[start:end]."""
         # Initialize fields
@@ -778,8 +798,9 @@ cdef class RegressionCriterion(Criterion):
 
         # Reset to pos=start
         self.reset()
+        return 0
 
-    cdef void reset(self) nogil:
+    cdef int reset(self) nogil except -1:
         """Reset the criterion at pos=start."""
         cdef SIZE_t n_bytes = self.n_outputs * sizeof(double)
         memset(self.sum_left, 0, n_bytes)
@@ -788,8 +809,9 @@ cdef class RegressionCriterion(Criterion):
         self.weighted_n_left = 0.0
         self.weighted_n_right = self.weighted_n_node_samples
         self.pos = self.start
+        return 0
 
-    cdef void reverse_reset(self) nogil:
+    cdef int reverse_reset(self) nogil except -1:
         """Reset the criterion at pos=end."""
         cdef SIZE_t n_bytes = self.n_outputs * sizeof(double)
         memset(self.sum_right, 0, n_bytes)
@@ -798,8 +820,9 @@ cdef class RegressionCriterion(Criterion):
         self.weighted_n_right = 0.0
         self.weighted_n_left = self.weighted_n_node_samples
         self.pos = self.end
+        return 0
 
-    cdef void update(self, SIZE_t new_pos) nogil:
+    cdef int update(self, SIZE_t new_pos) nogil except -1:
         """Updated statistics by moving samples[pos:new_pos] to the left."""
 
         cdef double* sum_left = self.sum_left
@@ -859,6 +882,7 @@ cdef class RegressionCriterion(Criterion):
             sum_right[k] = sum_total[k] - sum_left[k]
 
         self.pos = new_pos
+        return 0
 
     cdef double node_impurity(self) nogil:
         pass
@@ -1018,19 +1042,16 @@ cdef class MAE(RegressionCriterion):
         # Allocate memory for the accumulators
         safe_realloc(&self.node_medians, n_outputs)
 
-        if (self.node_medians == NULL):
-            raise MemoryError()
-
         self.left_child = np.empty(n_outputs, dtype='object')
         self.right_child = np.empty(n_outputs, dtype='object')
         # initialize WeightedMedianCalculators
         for k in range(n_outputs):
             self.left_child[k] = WeightedMedianCalculator(n_samples)
             self.right_child[k] = WeightedMedianCalculator(n_samples)
 
-    cdef void init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
-                   double weighted_n_samples, SIZE_t* samples, SIZE_t start,
-                   SIZE_t end) nogil:
+    cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight,
+                  double weighted_n_samples, SIZE_t* samples, SIZE_t start,
+                  SIZE_t end) nogil except -1:
         """Initialize the criterion at node samples[start:end] and
            children samples[start:start] and samples[start:end]."""
 
@@ -1068,6 +1089,7 @@ cdef class MAE(RegressionCriterion):
             for k in range(self.n_outputs):
                 y_ik = y[i * y_stride + k]
 
+                # push method ends up calling safe_realloc, hence `except -1`
                 # push all values to the right side,
                 # since pos = start initially anyway
                 (<WeightedMedianCalculator> right_child[k]).push(y_ik, w)
@@ -1079,9 +1101,14 @@ cdef class MAE(RegressionCriterion):
 
         # Reset to pos=start
         self.reset()
+        return 0
 
-    cdef void reset(self) nogil:
-        """Reset the criterion at pos=start."""
+    cdef int reset(self) nogil except -1:
+        """Reset the criterion at pos=start
+
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+        """
 
         cdef SIZE_t i, k
         cdef DOUBLE_t value
@@ -1103,11 +1130,17 @@ cdef class MAE(RegressionCriterion):
                 # remove everything from left and put it into right
                 (<WeightedMedianCalculator> left_child[k]).pop(&value,
                                                                &weight)
+                # push method ends up calling safe_realloc, hence `except -1`
                 (<WeightedMedianCalculator> right_child[k]).push(value,
                                                                  weight)
+        return 0
 
-    cdef void reverse_reset(self) nogil:
-        """Reset the criterion at pos=end."""
+    cdef int reverse_reset(self) nogil except -1:
+        """Reset the criterion at pos=end
+
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+        """
 
         self.weighted_n_right = 0.0
         self.weighted_n_left = self.weighted_n_node_samples
@@ -1126,11 +1159,17 @@ cdef class MAE(RegressionCriterion):
                 # remove everything from right and put it into left
                 (<WeightedMedianCalculator> right_child[k]).pop(&value,
                                                                 &weight)
+                # push method ends up calling safe_realloc, hence `except -1`
                 (<WeightedMedianCalculator> left_child[k]).push(value,
                                                                 weight)
+        return 0
 
-    cdef void update(self, SIZE_t new_pos) nogil:
-        """Updated statistics by moving samples[pos:new_pos] to the left."""
+    cdef int update(self, SIZE_t new_pos) nogil except -1:
+        """Updated statistics by moving samples[pos:new_pos] to the left
+
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+        """
 
         cdef DOUBLE_t* sample_weight = self.sample_weight
         cdef SIZE_t* samples = self.samples
@@ -1162,6 +1201,7 @@ cdef class MAE(RegressionCriterion):
                     y_ik = y[i * self.y_stride + k]
                     # remove y_ik and its weight w from right and add to left
                     (<WeightedMedianCalculator> right_child[k]).remove(y_ik, w)
+                    # push method ends up calling safe_realloc, hence except -1
                     (<WeightedMedianCalculator> left_child[k]).push(y_ik, w)
 
                 self.weighted_n_left += w
@@ -1185,6 +1225,7 @@ cdef class MAE(RegressionCriterion):
         self.weighted_n_right = (self.weighted_n_node_samples -
                                  self.weighted_n_left)
         self.pos = new_pos
+        return 0
 
     cdef void node_value(self, double* dest) nogil:
         """Computes the node value of samples[start:end] into dest."""

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
@@ -81,18 +81,18 @@ cdef class Splitter:
     # This allows optimization with depth-based tree building.
 
     # Methods
-    cdef void init(self, object X, np.ndarray y,
-                   DOUBLE_t* sample_weight,
-                   np.ndarray X_idx_sorted=*) except *
+    cdef int init(self, object X, np.ndarray y,
+                  DOUBLE_t* sample_weight,
+                  np.ndarray X_idx_sorted=*) except -1
 
-    cdef void node_reset(self, SIZE_t start, SIZE_t end,
-                         double* weighted_n_node_samples) nogil
+    cdef int node_reset(self, SIZE_t start, SIZE_t end,
+                        double* weighted_n_node_samples) nogil except -1
 
-    cdef void node_split(self,
-                         double impurity,   # Impurity of the node
-                         SplitRecord* split,
-                         SIZE_t* n_constant_features) nogil
+    cdef int node_split(self,
+                        double impurity,   # Impurity of the node
+                        SplitRecord* split,
+                        SIZE_t* n_constant_features) nogil except -1
 
     cdef void node_value(self, double* dest) nogil
 
-    cdef double node_impurity(self) nogil
+    cdef double node_impurity(self) nogil