scikit-learn · jjerphan · Oct 5, 2023 · Sep 12, 2023 · Sep 12, 2023 · jjerphan
diff --git a/sklearn/ensemble/_gradient_boosting.pyx b/sklearn/ensemble/_gradient_boosting.pyx
@@ -11,11 +11,9 @@ cnp.import_array()
 
 from scipy.sparse import issparse
 
+from ..utils._typedefs cimport float32_t, float64_t, intp_t, int32_t
 from ..tree._tree cimport Node
 from ..tree._tree cimport Tree
-from ..tree._tree cimport DTYPE_t
-from ..tree._tree cimport SIZE_t
-from ..tree._tree cimport INT32_t
 from ..tree._utils cimport safe_realloc
 
 
@@ -24,10 +22,10 @@ from numpy import zeros as np_zeros
 
 
 # constant to mark tree leafs
-cdef SIZE_t TREE_LEAF = -1
+cdef intp_t TREE_LEAF = -1
 
 cdef void _predict_regression_tree_inplace_fast_dense(
-    const DTYPE_t[:, ::1] X,
+    const float32_t[:, ::1] X,
     Node* root_node,
     double *value,
     double scale,
@@ -45,7 +43,7 @@ cdef void _predict_regression_tree_inplace_fast_dense(
 
     Parameters
     ----------
-    X : DTYPE_t 2d memory view
+    X : float32_t 2d memory view
         The memory view on the data ndarray of the input ``X``.
         Assumes that the array is c-continuous.
     root_node : tree Node pointer
@@ -63,7 +61,7 @@ cdef void _predict_regression_tree_inplace_fast_dense(
         ``out`` is assumed to be a two-dimensional array of
         shape ``(n_samples, K)``.
     """
-    cdef SIZE_t n_samples = X.shape[0]
+    cdef intp_t n_samples = X.shape[0]
     cdef Py_ssize_t i
     cdef Node *node
     for i in range(n_samples):
@@ -87,20 +85,20 @@ def _predict_regression_tree_stages_sparse(
 
     The function assumes that the ndarray that wraps ``X`` is csr_matrix.
     """
-    cdef const DTYPE_t[::1] X_data = X.data
-    cdef const INT32_t[::1] X_indices = X.indices
-    cdef const INT32_t[::1] X_indptr = X.indptr
+    cdef const float32_t[::1] X_data = X.data
+    cdef const int32_t[::1] X_indices = X.indices
+    cdef const int32_t[::1] X_indptr = X.indptr
 
-    cdef SIZE_t n_samples = X.shape[0]
-    cdef SIZE_t n_features = X.shape[1]
-    cdef SIZE_t n_stages = estimators.shape[0]
-    cdef SIZE_t n_outputs = estimators.shape[1]
+    cdef intp_t n_samples = X.shape[0]
+    cdef intp_t n_features = X.shape[1]
+    cdef intp_t n_stages = estimators.shape[0]
+    cdef intp_t n_outputs = estimators.shape[1]
 
     # Indices and temporary variables
-    cdef SIZE_t sample_i
-    cdef SIZE_t feature_i
-    cdef SIZE_t stage_i
-    cdef SIZE_t output_i
+    cdef intp_t sample_i
+    cdef intp_t feature_i
+    cdef intp_t stage_i
+    cdef intp_t output_i
     cdef Node *root_node = NULL
     cdef Node *node = NULL
     cdef double *value = NULL
@@ -117,18 +115,18 @@ def _predict_regression_tree_stages_sparse(
             values[stage_i * n_outputs + output_i] = tree.value
 
     # Initialize auxiliary data-structure
-    cdef DTYPE_t feature_value = 0.
-    cdef DTYPE_t* X_sample = NULL
+    cdef float32_t feature_value = 0.
+    cdef float32_t* X_sample = NULL
 
     # feature_to_sample as a data structure records the last seen sample
     # for each feature; functionally, it is an efficient way to identify
     # which features are nonzero in the present sample.
-    cdef SIZE_t* feature_to_sample = NULL
+    cdef intp_t* feature_to_sample = NULL
 
     safe_realloc(&X_sample, n_features)
     safe_realloc(&feature_to_sample, n_features)
 
-    memset(feature_to_sample, -1, n_features * sizeof(SIZE_t))
+    memset(feature_to_sample, -1, n_features * sizeof(intp_t))
 
     # Cycle through all samples
     for sample_i in range(n_samples):

diff --git a/sklearn/tree/_criterion.pxd b/sklearn/tree/_criterion.pxd
@@ -10,55 +10,52 @@
 # See _criterion.pyx for implementation details.
 cimport numpy as cnp
 
-from ._tree cimport DTYPE_t          # Type of X
-from ._tree cimport DOUBLE_t         # Type of y, sample_weight
-from ._tree cimport SIZE_t           # Type for indices and counters
-from ._tree cimport INT32_t          # Signed 32 bit integer
-from ._tree cimport UINT32_t         # Unsigned 32 bit integer
+from ..utils._typedefs cimport float64_t, intp_t
+
 
 cdef class Criterion:
     # The criterion computes the impurity of a node and the reduction of
     # impurity of a split on that node. It also computes the output statistics
     # such as the mean in regression and class probabilities in classification.
 
     # Internal structures
-    cdef const DOUBLE_t[:, ::1] y         # Values of y
-    cdef const DOUBLE_t[:] sample_weight  # Sample weights
+    cdef const float64_t[:, ::1] y         # Values of y
+    cdef const float64_t[:] sample_weight  # Sample weights
 
-    cdef const SIZE_t[:] sample_indices   # Sample indices in X, y
-    cdef SIZE_t start                     # samples[start:pos] are the samples in the left node
-    cdef SIZE_t pos                       # samples[pos:end] are the samples in the right node
-    cdef SIZE_t end
-    cdef SIZE_t n_missing                # Number of missing values for the feature being evaluated
-    cdef bint missing_go_to_left         # Whether missing values go to the left node
+    cdef const intp_t[:] sample_indices    # Sample indices in X, y
+    cdef intp_t start                      # samples[start:pos] are the samples in the left node
+    cdef intp_t pos                        # samples[pos:end] are the samples in the right node
+    cdef intp_t end
+    cdef intp_t n_missing                  # Number of missing values for the feature being evaluated
+    cdef bint missing_go_to_left           # Whether missing values go to the left node
 
-    cdef SIZE_t n_outputs                 # Number of outputs
-    cdef SIZE_t n_samples                 # Number of samples
-    cdef SIZE_t n_node_samples            # Number of samples in the node (end-start)
-    cdef double weighted_n_samples        # Weighted number of samples (in total)
-    cdef double weighted_n_node_samples   # Weighted number of samples in the node
-    cdef double weighted_n_left           # Weighted number of samples in the left node
-    cdef double weighted_n_right          # Weighted number of samples in the right node
-    cdef double weighted_n_missing       # Weighted number of samples that are missing
+    cdef intp_t n_outputs                  # Number of outputs
+    cdef intp_t n_samples                  # Number of samples
+    cdef intp_t n_node_samples             # Number of samples in the node (end-start)
+    cdef double weighted_n_samples         # Weighted number of samples (in total)
+    cdef double weighted_n_node_samples    # Weighted number of samples in the node
+    cdef double weighted_n_left            # Weighted number of samples in the left node
+    cdef double weighted_n_right           # Weighted number of samples in the right node
+    cdef double weighted_n_missing         # Weighted number of samples that are missing
 
     # The criterion object is maintained such that left and right collected
     # statistics correspond to samples[start:pos] and samples[pos:end].
 
     # Methods
     cdef int init(
         self,
-        const DOUBLE_t[:, ::1] y,
-        const DOUBLE_t[:] sample_weight,
+        const float64_t[:, ::1] y,
+        const float64_t[:] sample_weight,
         double weighted_n_samples,
-        const SIZE_t[:] sample_indices,
-        SIZE_t start,
-        SIZE_t end
+        const intp_t[:] sample_indices,
+        intp_t start,
+        intp_t end
     ) except -1 nogil
     cdef void init_sum_missing(self)
-    cdef void init_missing(self, SIZE_t n_missing) noexcept nogil
+    cdef void init_missing(self, intp_t n_missing) noexcept nogil
     cdef int reset(self) except -1 nogil
     cdef int reverse_reset(self) except -1 nogil
-    cdef int update(self, SIZE_t new_pos) except -1 nogil
+    cdef int update(self, intp_t new_pos) except -1 nogil
     cdef double node_impurity(self) noexcept nogil
     cdef void children_impurity(
         self,
@@ -101,8 +98,8 @@ cdef class Criterion:
 cdef class ClassificationCriterion(Criterion):
     """Abstract criterion for classification."""
 
-    cdef SIZE_t[::1] n_classes
-    cdef SIZE_t max_n_classes
+    cdef intp_t[::1] n_classes
+    cdef intp_t max_n_classes
 
     cdef double[:, ::1] sum_total    # The sum of the weighted count of each label.
     cdef double[:, ::1] sum_left     # Same as above, but for the left side of the split