From bf9e2641c811e177870880a28b8b3a1e0b19e6a9 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Sat, 4 Feb 2023 19:21:45 +0500
Subject: [PATCH 01/14] MAINT Remove -Wcpp warnings when compiling
 sklearn.tree._tree

---
 sklearn/tree/_tree.pxd |  8 ++++----
 sklearn/tree/_tree.pyx | 34 +++++++++++++++++-----------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 11c848881f6d3..f6bb354ae0cc2 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -12,7 +12,7 @@
 
 import numpy as np
 cimport numpy as cnp
-
+from cython cimport floating
 ctypedef cnp.npy_float32 DTYPE_t          # Type of X
 ctypedef cnp.npy_float64 DOUBLE_t         # Type of y, sample_weight
 ctypedef cnp.npy_intp SIZE_t              # Type for indices and counters
@@ -99,6 +99,6 @@ cdef class TreeBuilder:
     cdef SIZE_t max_depth               # Maximal tree depth
     cdef double min_impurity_decrease   # Impurity threshold for early stopping
 
-    cpdef build(self, Tree tree, object X, cnp.ndarray y,
-                cnp.ndarray sample_weight=*)
-    cdef _check_input(self, object X, cnp.ndarray y, cnp.ndarray sample_weight)
+    cpdef build(self, Tree tree, object X, const floating[:] y,
+                const floating[:] sample_weight=*)
+    cdef _check_input(self, object X, const floating[:] y, const floating[:] sample_weight)
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 730b2c1c3c9c6..f51f740b1a47a 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -13,7 +13,7 @@
 # License: BSD 3 clause
 
 from cpython cimport Py_INCREF, PyObject, PyTypeObject
-
+from cython cimport floating
 from libc.stdlib cimport free
 from libc.string cimport memcpy
 from libc.string cimport memset
@@ -86,13 +86,13 @@ NODE_DTYPE = np.asarray(<Node[:1]>(&dummy)).dtype
 cdef class TreeBuilder:
     """Interface for different tree building strategies."""
 
-    cpdef build(self, Tree tree, object X, cnp.ndarray y,
-                cnp.ndarray sample_weight=None):
+    cpdef build(self, Tree tree, object X, const floating[:] y,
+                const floating[:] sample_weight=None):
         """Build a decision tree from the training set (X, y)."""
         pass
 
-    cdef inline _check_input(self, object X, cnp.ndarray y,
-                             cnp.ndarray sample_weight):
+    cdef inline _check_input(self, object X, const floating[:] y,
+                             const floating[:] sample_weight):
         """Check input dtype, layout and format"""
         if issparse(X):
             X = X.tocsc()
@@ -109,16 +109,16 @@ cdef class TreeBuilder:
             # since we have to copy we will make it fortran for efficiency
             X = np.asfortranarray(X, dtype=DTYPE)
 
-        if y.dtype != DOUBLE or not y.flags.contiguous:
+        if y.base.dtype != DOUBLE or not y.base.flags.contiguous:
             y = np.ascontiguousarray(y, dtype=DOUBLE)
 
         if (sample_weight is not None and
-            (sample_weight.dtype != DOUBLE or
-            not sample_weight.flags.contiguous)):
+            (sample_weight.base.dtype != DOUBLE or
+            not sample_weight.base.flags.contiguous)):
                 sample_weight = np.asarray(sample_weight, dtype=DOUBLE,
                                            order="C")
 
-        return X, y, sample_weight
+        return X, y.base, sample_weight.base
 
 # Depth first builder ---------------------------------------------------------
 # A record on the stack for depth-first tree growing
@@ -144,12 +144,12 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         self.max_depth = max_depth
         self.min_impurity_decrease = min_impurity_decrease
 
-    cpdef build(self, Tree tree, object X, cnp.ndarray y,
-                cnp.ndarray sample_weight=None):
+    cpdef build(self, Tree tree, object X, const floating[:] y,
+                const floating[:] sample_weight=None):
         """Build a decision tree from the training set (X, y)."""
 
         # check input
-        X, y, sample_weight = self._check_input(X, y, sample_weight)
+        X, _y, _sample_weight = self._check_input(X, y, sample_weight)
 
         # Initial capacity
         cdef int init_capacity
@@ -170,7 +170,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         cdef double min_impurity_decrease = self.min_impurity_decrease
 
         # Recursive partition (without actual recursion)
-        splitter.init(X, y, sample_weight)
+        splitter.init(X, _y, _sample_weight)
 
         cdef SIZE_t start
         cdef SIZE_t end
@@ -335,19 +335,19 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
         self.max_leaf_nodes = max_leaf_nodes
         self.min_impurity_decrease = min_impurity_decrease
 
-    cpdef build(self, Tree tree, object X, cnp.ndarray y,
-                cnp.ndarray sample_weight=None):
+    cpdef build(self, Tree tree, object X, const floating[:] y,
+                const floating[:] sample_weight=None):
         """Build a decision tree from the training set (X, y)."""
 
         # check input
-        X, y, sample_weight = self._check_input(X, y, sample_weight)
+        X, _y, _sample_weight = self._check_input(X, y, sample_weight)
 
         # Parameters
         cdef Splitter splitter = self.splitter
         cdef SIZE_t max_leaf_nodes = self.max_leaf_nodes
 
         # Recursive partition (without actual recursion)
-        splitter.init(X, y, sample_weight)
+        splitter.init(X, _y, _sample_weight)
 
         cdef vector[FrontierRecord] frontier
         cdef FrontierRecord record

From eb6a658b0ada3464e0fa79da716f57af053141fe Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Mon, 6 Feb 2023 12:55:24 +0500
Subject: [PATCH 02/14] * Set y and sample_weight as DOUBLE_t memory views in
 build * Remove condition for checking y in _check_input as it seems redundant

---
 sklearn/tree/_tree.pxd |  8 ++++----
 sklearn/tree/_tree.pyx | 31 ++++++++++++++-----------------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index f6bb354ae0cc2..986c52aacd17e 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -12,7 +12,7 @@
 
 import numpy as np
 cimport numpy as cnp
-from cython cimport floating
+
 ctypedef cnp.npy_float32 DTYPE_t          # Type of X
 ctypedef cnp.npy_float64 DOUBLE_t         # Type of y, sample_weight
 ctypedef cnp.npy_intp SIZE_t              # Type for indices and counters
@@ -99,6 +99,6 @@ cdef class TreeBuilder:
     cdef SIZE_t max_depth               # Maximal tree depth
     cdef double min_impurity_decrease   # Impurity threshold for early stopping
 
-    cpdef build(self, Tree tree, object X, const floating[:] y,
-                const floating[:] sample_weight=*)
-    cdef _check_input(self, object X, const floating[:] y, const floating[:] sample_weight)
+    cpdef build(self, Tree tree, object X, const DOUBLE_t[:, ::1] y,
+                const DOUBLE_t[:] sample_weight=*)
+    cdef _check_input(self, object X, const DOUBLE_t[:] sample_weight)
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index f51f740b1a47a..335499cd3e9a2 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -13,7 +13,7 @@
 # License: BSD 3 clause
 
 from cpython cimport Py_INCREF, PyObject, PyTypeObject
-from cython cimport floating
+
 from libc.stdlib cimport free
 from libc.string cimport memcpy
 from libc.string cimport memset
@@ -86,13 +86,12 @@ NODE_DTYPE = np.asarray(<Node[:1]>(&dummy)).dtype
 cdef class TreeBuilder:
     """Interface for different tree building strategies."""
 
-    cpdef build(self, Tree tree, object X, const floating[:] y,
-                const floating[:] sample_weight=None):
+    cpdef build(self, Tree tree, object X, const DOUBLE_t[:, ::1] y,
+                const DOUBLE_t[:] sample_weight=None):
         """Build a decision tree from the training set (X, y)."""
         pass
 
-    cdef inline _check_input(self, object X, const floating[:] y,
-                             const floating[:] sample_weight):
+    cdef inline _check_input(self, object X, const DOUBLE_t[:] sample_weight):
         """Check input dtype, layout and format"""
         if issparse(X):
             X = X.tocsc()
@@ -109,16 +108,14 @@ cdef class TreeBuilder:
             # since we have to copy we will make it fortran for efficiency
             X = np.asfortranarray(X, dtype=DTYPE)
 
-        if y.base.dtype != DOUBLE or not y.base.flags.contiguous:
-            y = np.ascontiguousarray(y, dtype=DOUBLE)
-
         if (sample_weight is not None and
             (sample_weight.base.dtype != DOUBLE or
             not sample_weight.base.flags.contiguous)):
                 sample_weight = np.asarray(sample_weight, dtype=DOUBLE,
                                            order="C")
 
-        return X, y.base, sample_weight.base
+        sample_weight_exists = sample_weight is not None and sample_weight.size > 0
+        return X, sample_weight.base if sample_weight_exists else None
 
 # Depth first builder ---------------------------------------------------------
 # A record on the stack for depth-first tree growing
@@ -144,12 +141,12 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         self.max_depth = max_depth
         self.min_impurity_decrease = min_impurity_decrease
 
-    cpdef build(self, Tree tree, object X, const floating[:] y,
-                const floating[:] sample_weight=None):
+    cpdef build(self, Tree tree, object X, const DOUBLE_t[:, ::1] y,
+                const DOUBLE_t[:] sample_weight=None):
         """Build a decision tree from the training set (X, y)."""
 
         # check input
-        X, _y, _sample_weight = self._check_input(X, y, sample_weight)
+        X, sample_weight = self._check_input(X, sample_weight)
 
         # Initial capacity
         cdef int init_capacity
@@ -170,7 +167,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         cdef double min_impurity_decrease = self.min_impurity_decrease
 
         # Recursive partition (without actual recursion)
-        splitter.init(X, _y, _sample_weight)
+        splitter.init(X, y, sample_weight)
 
         cdef SIZE_t start
         cdef SIZE_t end
@@ -335,19 +332,19 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
         self.max_leaf_nodes = max_leaf_nodes
         self.min_impurity_decrease = min_impurity_decrease
 
-    cpdef build(self, Tree tree, object X, const floating[:] y,
-                const floating[:] sample_weight=None):
+    cpdef build(self, Tree tree, object X, const DOUBLE_t[:, ::1] y,
+                const DOUBLE_t[:] sample_weight=None):
         """Build a decision tree from the training set (X, y)."""
 
         # check input
-        X, _y, _sample_weight = self._check_input(X, y, sample_weight)
+        X, sample_weight = self._check_input(X, sample_weight)
 
         # Parameters
         cdef Splitter splitter = self.splitter
         cdef SIZE_t max_leaf_nodes = self.max_leaf_nodes
 
         # Recursive partition (without actual recursion)
-        splitter.init(X, _y, _sample_weight)
+        splitter.init(X, y, sample_weight)
 
         cdef vector[FrontierRecord] frontier
         cdef FrontierRecord record

From 73d542d979ff3544b4770403ec09da9a5f535b74 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Mon, 6 Feb 2023 16:00:44 +0500
Subject: [PATCH 03/14] Use SIZE_t memory view for num_classes in tree
 __cinit__

---
 sklearn/tree/_tree.pxd |  2 +-
 sklearn/tree/_tree.pyx | 16 ++++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 986c52aacd17e..9c6d8213b0e13 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -101,4 +101,4 @@ cdef class TreeBuilder:
 
     cpdef build(self, Tree tree, object X, const DOUBLE_t[:, ::1] y,
                 const DOUBLE_t[:] sample_weight=*)
-    cdef _check_input(self, object X, const DOUBLE_t[:] sample_weight)
+    cdef _check_input(self, object X, const DOUBLE_t[:, ::1] y, const DOUBLE_t[:] sample_weight)
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 335499cd3e9a2..536d1c8949d29 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -91,7 +91,8 @@ cdef class TreeBuilder:
         """Build a decision tree from the training set (X, y)."""
         pass
 
-    cdef inline _check_input(self, object X, const DOUBLE_t[:] sample_weight):
+    cdef inline _check_input(self, object X, const DOUBLE_t[:, ::1] y,
+                             const DOUBLE_t[:] sample_weight):
         """Check input dtype, layout and format"""
         if issparse(X):
             X = X.tocsc()
@@ -108,6 +109,9 @@ cdef class TreeBuilder:
             # since we have to copy we will make it fortran for efficiency
             X = np.asfortranarray(X, dtype=DTYPE)
 
+        if y.dtype != DOUBLE or not y.flags.contiguous:
+            y = np.ascontiguousarray(y, dtype=DOUBLE)
+
         if (sample_weight is not None and
             (sample_weight.base.dtype != DOUBLE or
             not sample_weight.base.flags.contiguous)):
@@ -115,7 +119,7 @@ cdef class TreeBuilder:
                                            order="C")
 
         sample_weight_exists = sample_weight is not None and sample_weight.size > 0
-        return X, sample_weight.base if sample_weight_exists else None
+        return X, y.base, sample_weight.base if sample_weight_exists else None
 
 # Depth first builder ---------------------------------------------------------
 # A record on the stack for depth-first tree growing
@@ -146,7 +150,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         """Build a decision tree from the training set (X, y)."""
 
         # check input
-        X, sample_weight = self._check_input(X, sample_weight)
+        X, y, sample_weight = self._check_input(X, y, sample_weight)
 
         # Initial capacity
         cdef int init_capacity
@@ -337,7 +341,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
         """Build a decision tree from the training set (X, y)."""
 
         # check input
-        X, sample_weight = self._check_input(X, sample_weight)
+        X, y, sample_weight = self._check_input(X, y, sample_weight)
 
         # Parameters
         cdef Splitter splitter = self.splitter
@@ -605,12 +609,12 @@ cdef class Tree:
         def __get__(self):
             return self._get_value_ndarray()[:self.node_count]
 
-    def __cinit__(self, int n_features, cnp.ndarray n_classes, int n_outputs):
+    def __cinit__(self, int n_features, SIZE_t[:] n_classes, int n_outputs):
         """Constructor."""
         cdef SIZE_t dummy = 0
         size_t_dtype = np.array(dummy).dtype
 
-        n_classes = _check_n_classes(n_classes, size_t_dtype)
+        n_classes = _check_n_classes(n_classes.base, size_t_dtype)
 
         # Input/Output layout
         self.n_features = n_features

From bb9b1f767b5f4b14c063933561e326241311788e Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Mon, 6 Feb 2023 17:18:38 +0500
Subject: [PATCH 04/14] y.base in _check_input

---
 sklearn/tree/_tree.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 536d1c8949d29..bae2058323883 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -109,7 +109,7 @@ cdef class TreeBuilder:
             # since we have to copy we will make it fortran for efficiency
             X = np.asfortranarray(X, dtype=DTYPE)
 
-        if y.dtype != DOUBLE or not y.flags.contiguous:
+        if y.base.dtype != DOUBLE or not y.base.flags.contiguous:
             y = np.ascontiguousarray(y, dtype=DOUBLE)
 
         if (sample_weight is not None and

From a1dbdc1d8aa1060f60bae2ddd8b7423e0288d33f Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Mon, 6 Feb 2023 19:19:22 +0500
Subject: [PATCH 05/14] Fix the PR by keeping cnp.ndarray for n_classes in Tree
 __cinit__

---
 sklearn/tree/_tree.pyx | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index bae2058323883..72429eb5753a2 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -118,8 +118,7 @@ cdef class TreeBuilder:
                 sample_weight = np.asarray(sample_weight, dtype=DOUBLE,
                                            order="C")
 
-        sample_weight_exists = sample_weight is not None and sample_weight.size > 0
-        return X, y.base, sample_weight.base if sample_weight_exists else None
+        return X, y, sample_weight
 
 # Depth first builder ---------------------------------------------------------
 # A record on the stack for depth-first tree growing
@@ -609,12 +608,12 @@ cdef class Tree:
         def __get__(self):
             return self._get_value_ndarray()[:self.node_count]
 
-    def __cinit__(self, int n_features, SIZE_t[:] n_classes, int n_outputs):
+    def __cinit__(self, int n_features, cnp.ndarray n_classes, int n_outputs):
         """Constructor."""
         cdef SIZE_t dummy = 0
         size_t_dtype = np.array(dummy).dtype
 
-        n_classes = _check_n_classes(n_classes.base, size_t_dtype)
+        n_classes = _check_n_classes(n_classes, size_t_dtype)
 
         # Input/Output layout
         self.n_features = n_features

From 9401757727d056364b6d56f3d83dbf69dafd6c44 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Mon, 6 Feb 2023 20:01:57 +0500
Subject: [PATCH 06/14] Add TODO

---
 sklearn/tree/_tree.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 72429eb5753a2..249452b1e12a9 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -608,6 +608,8 @@ cdef class Tree:
         def __get__(self):
             return self._get_value_ndarray()[:self.node_count]
 
+    # TODO: Convert n_classes to cython.integral memory view once
+    #  https://github.com/cython/cython/issues/5243 is fixed
     def __cinit__(self, int n_features, cnp.ndarray n_classes, int n_outputs):
         """Constructor."""
         cdef SIZE_t dummy = 0

From ae7a7d6b2e8502aa9e7ca308f18e2e901449e66b Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Tue, 7 Feb 2023 16:53:27 +0500
Subject: [PATCH 07/14] Replace cnp.ndarray with memory views

---
 sklearn/tree/_tree.pxd |  12 ++--
 sklearn/tree/_tree.pyx | 159 ++++++++++++++++++-----------------------
 2 files changed, 77 insertions(+), 94 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 9c6d8213b0e13..20bc25bc0adbd 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -62,14 +62,14 @@ cdef class Tree:
     cdef int _resize(self, SIZE_t capacity) nogil except -1
     cdef int _resize_c(self, SIZE_t capacity=*) nogil except -1
 
-    cdef cnp.ndarray _get_value_ndarray(self)
-    cdef cnp.ndarray _get_node_ndarray(self)
+    cdef DOUBLE_t[:, :, ::1] _get_value_ndarray(self)
+    cdef Node[::1] _get_node_ndarray(self)
 
-    cpdef cnp.ndarray predict(self, object X)
+    cpdef predict(self, object X)
 
-    cpdef cnp.ndarray apply(self, object X)
-    cdef cnp.ndarray _apply_dense(self, object X)
-    cdef cnp.ndarray _apply_sparse_csr(self, object X)
+    cpdef apply(self, object X)
+    cdef SIZE_t[:] _apply_dense(self, object X)
+    cdef SIZE_t[:] _apply_sparse_csr(self, object X)
 
     cpdef object decision_path(self, object X)
     cdef object _decision_path_dense(self, object X)
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 249452b1e12a9..de11145a241b4 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -572,11 +572,11 @@ cdef class Tree:
 
     property children_left:
         def __get__(self):
-            return self._get_node_ndarray()['left_child'][:self.node_count]
+            return np.asarray(self._get_node_ndarray())['left_child'][:self.node_count]
 
     property children_right:
         def __get__(self):
-            return self._get_node_ndarray()['right_child'][:self.node_count]
+            return np.asarray(self._get_node_ndarray())['right_child'][:self.node_count]
 
     property n_leaves:
         def __get__(self):
@@ -586,27 +586,27 @@ cdef class Tree:
 
     property feature:
         def __get__(self):
-            return self._get_node_ndarray()['feature'][:self.node_count]
+            return np.asarray(self._get_node_ndarray())['feature'][:self.node_count]
 
     property threshold:
         def __get__(self):
-            return self._get_node_ndarray()['threshold'][:self.node_count]
+            return np.asarray(self._get_node_ndarray())['threshold'][:self.node_count]
 
     property impurity:
         def __get__(self):
-            return self._get_node_ndarray()['impurity'][:self.node_count]
+            return np.asarray(self._get_node_ndarray())['impurity'][:self.node_count]
 
     property n_node_samples:
         def __get__(self):
-            return self._get_node_ndarray()['n_node_samples'][:self.node_count]
+            return np.asarray(self._get_node_ndarray())['n_node_samples'][:self.node_count]
 
     property weighted_n_node_samples:
         def __get__(self):
-            return self._get_node_ndarray()['weighted_n_node_samples'][:self.node_count]
+            return np.asarray(self._get_node_ndarray())['weighted_n_node_samples'][:self.node_count]
 
     property value:
         def __get__(self):
-            return self._get_value_ndarray()[:self.node_count]
+            return np.asarray(self._get_value_ndarray())[:self.node_count]
 
     # TODO: Convert n_classes to cython.integral memory view once
     #  https://github.com/cython/cython/issues/5243 is fixed
@@ -656,8 +656,8 @@ cdef class Tree:
         # capacity is inferred during the __setstate__ using nodes
         d["max_depth"] = self.max_depth
         d["node_count"] = self.node_count
-        d["nodes"] = self._get_node_ndarray()
-        d["values"] = self._get_value_ndarray()
+        d["nodes"] = np.asarray(self._get_node_ndarray())
+        d["values"] = np.asarray(self._get_value_ndarray())
         return d
 
     def __setstate__(self, d):
@@ -685,9 +685,12 @@ cdef class Tree:
         self.capacity = node_ndarray.shape[0]
         if self._resize_c(self.capacity) != 0:
             raise MemoryError("resizing tree to %d" % self.capacity)
-        nodes = memcpy(self.nodes, (<cnp.ndarray> node_ndarray).data,
+
+        cdef Node[::1] node_memory_view = node_ndarray
+        cdef DOUBLE_t[:, :, ::1] value_memory_view = value_ndarray
+        nodes = memcpy(self.nodes, &node_memory_view[0],
                        self.capacity * sizeof(Node))
-        value = memcpy(self.value, (<cnp.ndarray> value_ndarray).data,
+        value = memcpy(self.value, &value_memory_view[0, 0, 0],
                        self.capacity * self.value_stride * sizeof(double))
 
     cdef int _resize(self, SIZE_t capacity) nogil except -1:
@@ -775,22 +778,22 @@ cdef class Tree:
 
         return node_id
 
-    cpdef cnp.ndarray predict(self, object X):
+    cpdef predict(self, object X):
         """Predict target for X."""
-        out = self._get_value_ndarray().take(self.apply(X), axis=0,
-                                             mode='clip')
+        out_memory_view = np.asarray(self._get_value_ndarray())
+        out = out_memory_view.take(self.apply(X), axis=0, mode='clip')
         if self.n_outputs == 1:
             out = out.reshape(X.shape[0], self.max_n_classes)
         return out
 
-    cpdef cnp.ndarray apply(self, object X):
+    cpdef apply(self, object X):
         """Finds the terminal region (=leaf node) for each sample in X."""
         if issparse(X):
-            return self._apply_sparse_csr(X)
+            return np.asarray(self._apply_sparse_csr(X))
         else:
-            return self._apply_dense(X)
+            return np.asarray(self._apply_dense(X))
 
-    cdef inline cnp.ndarray _apply_dense(self, object X):
+    cdef inline SIZE_t[:] _apply_dense(self, object X):
         """Finds the terminal region (=leaf node) for each sample in X."""
 
         # Check input
@@ -806,8 +809,7 @@ cdef class Tree:
         cdef SIZE_t n_samples = X.shape[0]
 
         # Initialize output
-        cdef cnp.ndarray[SIZE_t] out = np.zeros((n_samples,), dtype=np.intp)
-        cdef SIZE_t* out_ptr = <SIZE_t*> out.data
+        cdef SIZE_t[:] out = np.zeros((n_samples,), dtype=np.intp)
 
         # Initialize auxiliary data-structure
         cdef Node* node = NULL
@@ -824,11 +826,11 @@ cdef class Tree:
                     else:
                         node = &self.nodes[node.right_child]
 
-                out_ptr[i] = <SIZE_t>(node - self.nodes)  # node offset
+                out[i] = <SIZE_t>(node - self.nodes)  # node offset
 
         return out
 
-    cdef inline cnp.ndarray _apply_sparse_csr(self, object X):
+    cdef inline SIZE_t[:] _apply_sparse_csr(self, object X):
         """Finds the terminal region (=leaf node) for each sample in sparse X.
         """
         # Check input
@@ -840,21 +842,15 @@ cdef class Tree:
             raise ValueError("X.dtype should be np.float32, got %s" % X.dtype)
 
         # Extract input
-        cdef cnp.ndarray[ndim=1, dtype=DTYPE_t] X_data_ndarray = X.data
-        cdef cnp.ndarray[ndim=1, dtype=INT32_t] X_indices_ndarray  = X.indices
-        cdef cnp.ndarray[ndim=1, dtype=INT32_t] X_indptr_ndarray  = X.indptr
-
-        cdef DTYPE_t* X_data = <DTYPE_t*>X_data_ndarray.data
-        cdef INT32_t* X_indices = <INT32_t*>X_indices_ndarray.data
-        cdef INT32_t* X_indptr = <INT32_t*>X_indptr_ndarray.data
+        cdef const DTYPE_t[:] X_data = X.data
+        cdef const INT32_t[:] X_indices  = X.indices
+        cdef const INT32_t[:] X_indptr  = X.indptr
 
         cdef SIZE_t n_samples = X.shape[0]
         cdef SIZE_t n_features = X.shape[1]
 
         # Initialize output
-        cdef cnp.ndarray[SIZE_t, ndim=1] out = np.zeros((n_samples,),
-                                                        dtype=np.intp)
-        cdef SIZE_t* out_ptr = <SIZE_t*> out.data
+        cdef SIZE_t[:] out = np.zeros((n_samples,), dtype=np.intp)
 
         # Initialize auxiliary data-structure
         cdef DTYPE_t feature_value = 0.
@@ -895,7 +891,7 @@ cdef class Tree:
                     else:
                         node = &self.nodes[node.right_child]
 
-                out_ptr[i] = <SIZE_t>(node - self.nodes)  # node offset
+                out[i] = <SIZE_t>(node - self.nodes)  # node offset
 
             # Free auxiliary arrays
             free(X_sample)
@@ -926,13 +922,10 @@ cdef class Tree:
         cdef SIZE_t n_samples = X.shape[0]
 
         # Initialize output
-        cdef cnp.ndarray[SIZE_t] indptr = np.zeros(n_samples + 1, dtype=np.intp)
-        cdef SIZE_t* indptr_ptr = <SIZE_t*> indptr.data
-
-        cdef cnp.ndarray[SIZE_t] indices = np.zeros(n_samples *
-                                                    (1 + self.max_depth),
-                                                    dtype=np.intp)
-        cdef SIZE_t* indices_ptr = <SIZE_t*> indices.data
+        cdef SIZE_t[:] indptr = np.zeros(n_samples + 1, dtype=np.intp)
+        cdef SIZE_t[:] indices = np.zeros(
+            n_samples * (1 + self.max_depth), dtype=np.intp
+        )
 
         # Initialize auxiliary data-structure
         cdef Node* node = NULL
@@ -941,13 +934,13 @@ cdef class Tree:
         with nogil:
             for i in range(n_samples):
                 node = self.nodes
-                indptr_ptr[i + 1] = indptr_ptr[i]
+                indptr[i + 1] = indptr[i]
 
                 # Add all external nodes
                 while node.left_child != _TREE_LEAF:
                     # ... and node.right_child != _TREE_LEAF:
-                    indices_ptr[indptr_ptr[i + 1]] = <SIZE_t>(node - self.nodes)
-                    indptr_ptr[i + 1] += 1
+                    indices[indptr[i + 1]] = <SIZE_t>(node - self.nodes)
+                    indptr[i + 1] += 1
 
                     if X_ndarray[i, node.feature] <= node.threshold:
                         node = &self.nodes[node.left_child]
@@ -955,12 +948,11 @@ cdef class Tree:
                         node = &self.nodes[node.right_child]
 
                 # Add the leave node
-                indices_ptr[indptr_ptr[i + 1]] = <SIZE_t>(node - self.nodes)
-                indptr_ptr[i + 1] += 1
+                indices[indptr[i + 1]] = <SIZE_t>(node - self.nodes)
+                indptr[i + 1] += 1
 
         indices = indices[:indptr[n_samples]]
-        cdef cnp.ndarray[SIZE_t] data = np.ones(shape=len(indices),
-                                                dtype=np.intp)
+        cdef SIZE_t[:] data = np.ones(shape=len(indices), dtype=np.intp)
         out = csr_matrix((data, indices, indptr),
                          shape=(n_samples, self.node_count))
 
@@ -978,25 +970,18 @@ cdef class Tree:
             raise ValueError("X.dtype should be np.float32, got %s" % X.dtype)
 
         # Extract input
-        cdef cnp.ndarray[ndim=1, dtype=DTYPE_t] X_data_ndarray = X.data
-        cdef cnp.ndarray[ndim=1, dtype=INT32_t] X_indices_ndarray  = X.indices
-        cdef cnp.ndarray[ndim=1, dtype=INT32_t] X_indptr_ndarray  = X.indptr
-
-        cdef DTYPE_t* X_data = <DTYPE_t*>X_data_ndarray.data
-        cdef INT32_t* X_indices = <INT32_t*>X_indices_ndarray.data
-        cdef INT32_t* X_indptr = <INT32_t*>X_indptr_ndarray.data
+        cdef const DTYPE_t[:] X_data = X.data
+        cdef const INT32_t[:] X_indices  = X.indices
+        cdef const INT32_t[:] X_indptr  = X.indptr
 
         cdef SIZE_t n_samples = X.shape[0]
         cdef SIZE_t n_features = X.shape[1]
 
         # Initialize output
-        cdef cnp.ndarray[SIZE_t] indptr = np.zeros(n_samples + 1, dtype=np.intp)
-        cdef SIZE_t* indptr_ptr = <SIZE_t*> indptr.data
-
-        cdef cnp.ndarray[SIZE_t] indices = np.zeros(n_samples *
-                                                    (1 + self.max_depth),
-                                                    dtype=np.intp)
-        cdef SIZE_t* indices_ptr = <SIZE_t*> indices.data
+        cdef SIZE_t[:] indptr = np.zeros(n_samples + 1, dtype=np.intp)
+        cdef SIZE_t[:] indices = np.zeros(
+            n_samples * (1 + self.max_depth), dtype=np.intp
+        )
 
         # Initialize auxiliary data-structure
         cdef DTYPE_t feature_value = 0.
@@ -1018,7 +1003,7 @@ cdef class Tree:
 
             for i in range(n_samples):
                 node = self.nodes
-                indptr_ptr[i + 1] = indptr_ptr[i]
+                indptr[i + 1] = indptr[i]
 
                 for k in range(X_indptr[i], X_indptr[i + 1]):
                     feature_to_sample[X_indices[k]] = i
@@ -1028,8 +1013,8 @@ cdef class Tree:
                 while node.left_child != _TREE_LEAF:
                     # ... and node.right_child != _TREE_LEAF:
 
-                    indices_ptr[indptr_ptr[i + 1]] = <SIZE_t>(node - self.nodes)
-                    indptr_ptr[i + 1] += 1
+                    indices[indptr[i + 1]] = <SIZE_t>(node - self.nodes)
+                    indptr[i + 1] += 1
 
                     if feature_to_sample[node.feature] == i:
                         feature_value = X_sample[node.feature]
@@ -1043,16 +1028,15 @@ cdef class Tree:
                         node = &self.nodes[node.right_child]
 
                 # Add the leave node
-                indices_ptr[indptr_ptr[i + 1]] = <SIZE_t>(node - self.nodes)
-                indptr_ptr[i + 1] += 1
+                indices[indptr[i + 1]] = <SIZE_t>(node - self.nodes)
+                indptr[i + 1] += 1
 
             # Free auxiliary arrays
             free(X_sample)
             free(feature_to_sample)
 
         indices = indices[:indptr[n_samples]]
-        cdef cnp.ndarray[SIZE_t] data = np.ones(shape=len(indices),
-                                                dtype=np.intp)
+        cdef SIZE_t[:] data = np.ones(shape=len(indices), dtype=np.intp)
         out = csr_matrix((data, indices, indptr),
                          shape=(n_samples, self.node_count))
 
@@ -1095,9 +1079,7 @@ cdef class Tree:
 
         cdef double normalizer = 0.
 
-        cdef cnp.ndarray[cnp.float64_t, ndim=1] importances
-        importances = np.zeros((self.n_features,))
-        cdef DOUBLE_t* importance_data = <DOUBLE_t*>importances.data
+        cdef cnp.float64_t[:] importances = np.zeros((self.n_features,))
 
         with nogil:
             while node != end_node:
@@ -1106,24 +1088,24 @@ cdef class Tree:
                     left = &nodes[node.left_child]
                     right = &nodes[node.right_child]
 
-                    importance_data[node.feature] += (
+                    importances[node.feature] += (
                         node.weighted_n_node_samples * node.impurity -
                         left.weighted_n_node_samples * left.impurity -
                         right.weighted_n_node_samples * right.impurity)
                 node += 1
 
-        importances /= nodes[0].weighted_n_node_samples
+        importances = np.divide(importances, nodes[0].weighted_n_node_samples)
 
         if normalize:
             normalizer = np.sum(importances)
 
             if normalizer > 0.0:
                 # Avoid dividing by zero (e.g., when root is pure)
-                importances /= normalizer
+                importances = np.divide(importances, normalizer)
 
-        return importances
+        return np.asarray(importances)
 
-    cdef cnp.ndarray _get_value_ndarray(self):
+    cdef DOUBLE_t[:, :, ::1] _get_value_ndarray(self):
         """Wraps value as a 3-d NumPy array.
 
         The array keeps a reference to this Tree, which manages the underlying
@@ -1133,14 +1115,14 @@ cdef class Tree:
         shape[0] = <cnp.npy_intp> self.node_count
         shape[1] = <cnp.npy_intp> self.n_outputs
         shape[2] = <cnp.npy_intp> self.max_n_classes
-        cdef cnp.ndarray arr
+        cdef DOUBLE_t[:, :, ::1] arr
         arr = cnp.PyArray_SimpleNewFromData(3, shape, cnp.NPY_DOUBLE, self.value)
         Py_INCREF(self)
-        if PyArray_SetBaseObject(arr, <PyObject*> self) < 0:
+        if PyArray_SetBaseObject(arr.base, <PyObject*> self) < 0:
             raise ValueError("Can't initialize array.")
         return arr
 
-    cdef cnp.ndarray _get_node_ndarray(self):
+    cdef Node[::1] _get_node_ndarray(self):
         """Wraps nodes as a NumPy struct array.
 
         The array keeps a reference to this Tree, which manages the underlying
@@ -1151,14 +1133,14 @@ cdef class Tree:
         shape[0] = <cnp.npy_intp> self.node_count
         cdef cnp.npy_intp strides[1]
         strides[0] = sizeof(Node)
-        cdef cnp.ndarray arr
+        cdef Node[::1] arr
         Py_INCREF(NODE_DTYPE)
-        arr = PyArray_NewFromDescr(<PyTypeObject *> cnp.ndarray,
+        arr = PyArray_NewFromDescr(<PyTypeObject *> np.ndarray,
                                    <cnp.dtype> NODE_DTYPE, 1, shape,
                                    strides, <void*> self.nodes,
                                    cnp.NPY_DEFAULT, None)
         Py_INCREF(self)
-        if PyArray_SetBaseObject(arr, <PyObject*> self) < 0:
+        if PyArray_SetBaseObject(arr.base, <PyObject*> self) < 0:
             raise ValueError("Can't initialize array.")
         return arr
 
@@ -1688,10 +1670,8 @@ def ccp_pruning_path(Tree orig_tree):
 
     cdef:
         UINT32_t total_items = path_finder.count
-        cnp.ndarray ccp_alphas = np.empty(shape=total_items,
-                                          dtype=np.float64)
-        cnp.ndarray impurities = np.empty(shape=total_items,
-                                          dtype=np.float64)
+        DOUBLE_t[:] ccp_alphas = np.empty(shape=total_items, dtype=np.float64)
+        DOUBLE_t[:] impurities = np.empty(shape=total_items, dtype=np.float64)
         UINT32_t count = 0
 
     while count < total_items:
@@ -1699,7 +1679,10 @@ def ccp_pruning_path(Tree orig_tree):
         impurities[count] = path_finder.impurities[count]
         count += 1
 
-    return {'ccp_alphas': ccp_alphas, 'impurities': impurities}
+    return {
+        'ccp_alphas': np.asarray(ccp_alphas),
+        'impurities': np.asarray(impurities),
+    }
 
 
 cdef struct BuildPrunedRecord:

From 21cf642c03e87f4c688558ae53359b713a29b9e0 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Tue, 7 Feb 2023 17:26:05 +0500
Subject: [PATCH 08/14] Add TODO to remove redundant y check in _check_input

---
 sklearn/tree/_tree.pyx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index de11145a241b4..adcd2277b230d 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -109,6 +109,9 @@ cdef class TreeBuilder:
             # since we have to copy we will make it fortran for efficiency
             X = np.asfortranarray(X, dtype=DTYPE)
 
+        # TODO: This check for y seems to be redundant, as it is also
+        #  present in the BaseDecisionTree's fit method, and therefore
+        #  can be removed.
         if y.base.dtype != DOUBLE or not y.base.flags.contiguous:
             y = np.ascontiguousarray(y, dtype=DOUBLE)
 

From f5daddcb6fd8af1009271b678e904acc48cb33d7 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Tue, 7 Feb 2023 21:30:39 +0500
Subject: [PATCH 09/14] Applied suggestions provided on PR

---
 setup.py               | 1 +
 sklearn/tree/_tree.pyx | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index f252e1ec03ad8..dc2edc6a8127b 100755
--- a/setup.py
+++ b/setup.py
@@ -111,6 +111,7 @@
     "sklearn.svm._libsvm_sparse",
     "sklearn.svm._newrand",
     "sklearn.tree._splitter",
+    "sklearn.tree._tree",
     "sklearn.tree._utils",
     "sklearn.utils._cython_blas",
     "sklearn.utils._fast_dict",
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index adcd2277b230d..52a8f996b273f 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -1141,7 +1141,7 @@ cdef class Tree:
         arr = PyArray_NewFromDescr(<PyTypeObject *> np.ndarray,
                                    <cnp.dtype> NODE_DTYPE, 1, shape,
                                    strides, <void*> self.nodes,
-                                   cnp.NPY_DEFAULT, None)
+                                   cnp.NPY_ARRAY_DEFAULT, None)
         Py_INCREF(self)
         if PyArray_SetBaseObject(arr.base, <PyObject*> self) < 0:
             raise ValueError("Can't initialize array.")

From 0496453d54c2c81e7ed1a9137bd326d4fd8037c3 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Wed, 8 Feb 2023 11:49:18 +0500
Subject: [PATCH 10/14] Applied further suggestions

---
 sklearn/tree/_tree.pxd |  8 +++----
 sklearn/tree/_tree.pyx | 52 ++++++++++++++++++++++--------------------
 2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 20bc25bc0adbd..62338ddbbc20e 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -62,14 +62,14 @@ cdef class Tree:
     cdef int _resize(self, SIZE_t capacity) nogil except -1
     cdef int _resize_c(self, SIZE_t capacity=*) nogil except -1
 
-    cdef DOUBLE_t[:, :, ::1] _get_value_ndarray(self)
-    cdef Node[::1] _get_node_ndarray(self)
+    cdef cnp.ndarray _get_value_ndarray(self)
+    cdef cnp.ndarray _get_node_ndarray(self)
 
     cpdef predict(self, object X)
 
     cpdef apply(self, object X)
-    cdef SIZE_t[:] _apply_dense(self, object X)
-    cdef SIZE_t[:] _apply_sparse_csr(self, object X)
+    cdef cnp.ndarray _apply_dense(self, object X)
+    cdef cnp.ndarray _apply_sparse_csr(self, object X)
 
     cpdef object decision_path(self, object X)
     cdef object _decision_path_dense(self, object X)
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 52a8f996b273f..0e7cf6ecef6e0 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -575,11 +575,11 @@ cdef class Tree:
 
     property children_left:
         def __get__(self):
-            return np.asarray(self._get_node_ndarray())['left_child'][:self.node_count]
+            return self._get_node_ndarray()['left_child'][:self.node_count]
 
     property children_right:
         def __get__(self):
-            return np.asarray(self._get_node_ndarray())['right_child'][:self.node_count]
+            return self._get_node_ndarray()['right_child'][:self.node_count]
 
     property n_leaves:
         def __get__(self):
@@ -589,27 +589,27 @@ cdef class Tree:
 
     property feature:
         def __get__(self):
-            return np.asarray(self._get_node_ndarray())['feature'][:self.node_count]
+            return self._get_node_ndarray()['feature'][:self.node_count]
 
     property threshold:
         def __get__(self):
-            return np.asarray(self._get_node_ndarray())['threshold'][:self.node_count]
+            return self._get_node_ndarray()['threshold'][:self.node_count]
 
     property impurity:
         def __get__(self):
-            return np.asarray(self._get_node_ndarray())['impurity'][:self.node_count]
+            return self._get_node_ndarray()['impurity'][:self.node_count]
 
     property n_node_samples:
         def __get__(self):
-            return np.asarray(self._get_node_ndarray())['n_node_samples'][:self.node_count]
+            return self._get_node_ndarray()['n_node_samples'][:self.node_count]
 
     property weighted_n_node_samples:
         def __get__(self):
-            return np.asarray(self._get_node_ndarray())['weighted_n_node_samples'][:self.node_count]
+            return self._get_node_ndarray()['weighted_n_node_samples'][:self.node_count]
 
     property value:
         def __get__(self):
-            return np.asarray(self._get_value_ndarray())[:self.node_count]
+            return self._get_value_ndarray()[:self.node_count]
 
     # TODO: Convert n_classes to cython.integral memory view once
     #  https://github.com/cython/cython/issues/5243 is fixed
@@ -659,8 +659,8 @@ cdef class Tree:
         # capacity is inferred during the __setstate__ using nodes
         d["max_depth"] = self.max_depth
         d["node_count"] = self.node_count
-        d["nodes"] = np.asarray(self._get_node_ndarray())
-        d["values"] = np.asarray(self._get_value_ndarray())
+        d["nodes"] = self._get_node_ndarray()
+        d["values"] = self._get_value_ndarray()
         return d
 
     def __setstate__(self, d):
@@ -783,8 +783,8 @@ cdef class Tree:
 
     cpdef predict(self, object X):
         """Predict target for X."""
-        out_memory_view = np.asarray(self._get_value_ndarray())
-        out = out_memory_view.take(self.apply(X), axis=0, mode='clip')
+        out = self._get_value_ndarray().take(self.apply(X), axis=0,
+                                             mode='clip')
         if self.n_outputs == 1:
             out = out.reshape(X.shape[0], self.max_n_classes)
         return out
@@ -792,11 +792,11 @@ cdef class Tree:
     cpdef apply(self, object X):
         """Finds the terminal region (=leaf node) for each sample in X."""
         if issparse(X):
-            return np.asarray(self._apply_sparse_csr(X))
+            return self._apply_sparse_csr(X)
         else:
-            return np.asarray(self._apply_dense(X))
+            return self._apply_dense(X)
 
-    cdef inline SIZE_t[:] _apply_dense(self, object X):
+    cdef inline cnp.ndarray _apply_dense(self, object X):
         """Finds the terminal region (=leaf node) for each sample in X."""
 
         # Check input
@@ -831,9 +831,9 @@ cdef class Tree:
 
                 out[i] = <SIZE_t>(node - self.nodes)  # node offset
 
-        return out
+        return np.asarray(out)
 
-    cdef inline SIZE_t[:] _apply_sparse_csr(self, object X):
+    cdef inline cnp.ndarray _apply_sparse_csr(self, object X):
         """Finds the terminal region (=leaf node) for each sample in sparse X.
         """
         # Check input
@@ -900,7 +900,7 @@ cdef class Tree:
             free(X_sample)
             free(feature_to_sample)
 
-        return out
+        return np.asarray(out)
 
     cpdef object decision_path(self, object X):
         """Finds the decision path (=node) for each sample in X."""
@@ -1082,7 +1082,7 @@ cdef class Tree:
 
         cdef double normalizer = 0.
 
-        cdef cnp.float64_t[:] importances = np.zeros((self.n_features,))
+        cdef cnp.float64_t[:] importances = np.zeros(self.n_features)
 
         with nogil:
             while node != end_node:
@@ -1097,18 +1097,20 @@ cdef class Tree:
                         right.weighted_n_node_samples * right.impurity)
                 node += 1
 
-        importances = np.divide(importances, nodes[0].weighted_n_node_samples)
+        for i in range(self.n_features):
+            importances[i] /= nodes[0].weighted_n_node_samples
 
         if normalize:
             normalizer = np.sum(importances)
 
             if normalizer > 0.0:
                 # Avoid dividing by zero (e.g., when root is pure)
-                importances = np.divide(importances, normalizer)
+                for i in range(self.n_features):
+                    importances[i] /= normalizer
 
         return np.asarray(importances)
 
-    cdef DOUBLE_t[:, :, ::1] _get_value_ndarray(self):
+    cdef cnp.ndarray _get_value_ndarray(self):
         """Wraps value as a 3-d NumPy array.
 
         The array keeps a reference to this Tree, which manages the underlying
@@ -1123,9 +1125,9 @@ cdef class Tree:
         Py_INCREF(self)
         if PyArray_SetBaseObject(arr.base, <PyObject*> self) < 0:
             raise ValueError("Can't initialize array.")
-        return arr
+        return np.asarray(arr)
 
-    cdef Node[::1] _get_node_ndarray(self):
+    cdef cnp.ndarray _get_node_ndarray(self):
         """Wraps nodes as a NumPy struct array.
 
         The array keeps a reference to this Tree, which manages the underlying
@@ -1145,7 +1147,7 @@ cdef class Tree:
         Py_INCREF(self)
         if PyArray_SetBaseObject(arr.base, <PyObject*> self) < 0:
             raise ValueError("Can't initialize array.")
-        return arr
+        return np.asarray(arr)
 
     def compute_partial_dependence(self, DTYPE_t[:, ::1] X,
                                    int[::1] target_features,

From c18c3cb72996fdc940f615c844bc94d53d4b6ac3 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Wed, 8 Feb 2023 18:33:08 +0500
Subject: [PATCH 11/14] Reverted some lines

---
 sklearn/tree/_tree.pxd | 4 ++--
 sklearn/tree/_tree.pyx | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 62338ddbbc20e..9c6d8213b0e13 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -65,9 +65,9 @@ cdef class Tree:
     cdef cnp.ndarray _get_value_ndarray(self)
     cdef cnp.ndarray _get_node_ndarray(self)
 
-    cpdef predict(self, object X)
+    cpdef cnp.ndarray predict(self, object X)
 
-    cpdef apply(self, object X)
+    cpdef cnp.ndarray apply(self, object X)
     cdef cnp.ndarray _apply_dense(self, object X)
     cdef cnp.ndarray _apply_sparse_csr(self, object X)
 
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 0e7cf6ecef6e0..be66c03239709 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -781,7 +781,7 @@ cdef class Tree:
 
         return node_id
 
-    cpdef predict(self, object X):
+    cpdef cnp.ndarray predict(self, object X):
         """Predict target for X."""
         out = self._get_value_ndarray().take(self.apply(X), axis=0,
                                              mode='clip')
@@ -789,7 +789,7 @@ cdef class Tree:
             out = out.reshape(X.shape[0], self.max_n_classes)
         return out
 
-    cpdef apply(self, object X):
+    cpdef cnp.ndarray apply(self, object X):
         """Finds the terminal region (=leaf node) for each sample in X."""
         if issparse(X):
             return self._apply_sparse_csr(X)
@@ -1140,7 +1140,7 @@ cdef class Tree:
         strides[0] = sizeof(Node)
         cdef Node[::1] arr
         Py_INCREF(NODE_DTYPE)
-        arr = PyArray_NewFromDescr(<PyTypeObject *> np.ndarray,
+        arr = PyArray_NewFromDescr(<PyTypeObject *> cnp.ndarray,
                                    <cnp.dtype> NODE_DTYPE, 1, shape,
                                    strides, <void*> self.nodes,
                                    cnp.NPY_ARRAY_DEFAULT, None)

From 317abb38e192540be491c4589bbf77c00c0d4918 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Wed, 8 Feb 2023 20:02:34 +0500
Subject: [PATCH 12/14] PR suggestions

---
 sklearn/tree/_tree.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index be66c03239709..1be17659bd67c 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -812,7 +812,7 @@ cdef class Tree:
         cdef SIZE_t n_samples = X.shape[0]
 
         # Initialize output
-        cdef SIZE_t[:] out = np.zeros((n_samples,), dtype=np.intp)
+        cdef SIZE_t[:] out = np.zeros(n_samples, dtype=np.intp)
 
         # Initialize auxiliary data-structure
         cdef Node* node = NULL
@@ -853,7 +853,7 @@ cdef class Tree:
         cdef SIZE_t n_features = X.shape[1]
 
         # Initialize output
-        cdef SIZE_t[:] out = np.zeros((n_samples,), dtype=np.intp)
+        cdef SIZE_t[:] out = np.zeros(n_samples, dtype=np.intp)
 
         # Initialize auxiliary data-structure
         cdef DTYPE_t feature_value = 0.

From e7e806bfe356d1ca62e26735b9136571efb318c4 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Wed, 8 Feb 2023 22:32:28 +0500
Subject: [PATCH 13/14] Replace cnp.ndarray back in _get_value_ndarray and
 _get_node_ndarray

---
 sklearn/tree/_tree.pyx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 1be17659bd67c..21d58e58dd475 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -1120,12 +1120,12 @@ cdef class Tree:
         shape[0] = <cnp.npy_intp> self.node_count
         shape[1] = <cnp.npy_intp> self.n_outputs
         shape[2] = <cnp.npy_intp> self.max_n_classes
-        cdef DOUBLE_t[:, :, ::1] arr
+        cdef cnp.ndarray arr
         arr = cnp.PyArray_SimpleNewFromData(3, shape, cnp.NPY_DOUBLE, self.value)
         Py_INCREF(self)
-        if PyArray_SetBaseObject(arr.base, <PyObject*> self) < 0:
+        if PyArray_SetBaseObject(arr, <PyObject*> self) < 0:
             raise ValueError("Can't initialize array.")
-        return np.asarray(arr)
+        return arr
 
     cdef cnp.ndarray _get_node_ndarray(self):
         """Wraps nodes as a NumPy struct array.
@@ -1138,16 +1138,16 @@ cdef class Tree:
         shape[0] = <cnp.npy_intp> self.node_count
         cdef cnp.npy_intp strides[1]
         strides[0] = sizeof(Node)
-        cdef Node[::1] arr
+        cdef cnp.ndarray arr
         Py_INCREF(NODE_DTYPE)
         arr = PyArray_NewFromDescr(<PyTypeObject *> cnp.ndarray,
                                    <cnp.dtype> NODE_DTYPE, 1, shape,
                                    strides, <void*> self.nodes,
                                    cnp.NPY_ARRAY_DEFAULT, None)
         Py_INCREF(self)
-        if PyArray_SetBaseObject(arr.base, <PyObject*> self) < 0:
+        if PyArray_SetBaseObject(arr, <PyObject*> self) < 0:
             raise ValueError("Can't initialize array.")
-        return np.asarray(arr)
+        return arr
 
     def compute_partial_dependence(self, DTYPE_t[:, ::1] X,
                                    int[::1] target_features,

From 6d6dc0475a0105b4c0fe244b4099a3c67706d406 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Thu, 9 Feb 2023 12:12:22 +0500
Subject: [PATCH 14/14] Add black formatting in modified method's signatures

---
 sklearn/tree/_tree.pxd | 17 ++++++++++++++---
 sklearn/tree/_tree.pyx | 35 +++++++++++++++++++++++++++--------
 2 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 9c6d8213b0e13..3e60e91d6940a 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -99,6 +99,17 @@ cdef class TreeBuilder:
     cdef SIZE_t max_depth               # Maximal tree depth
     cdef double min_impurity_decrease   # Impurity threshold for early stopping
 
-    cpdef build(self, Tree tree, object X, const DOUBLE_t[:, ::1] y,
-                const DOUBLE_t[:] sample_weight=*)
-    cdef _check_input(self, object X, const DOUBLE_t[:, ::1] y, const DOUBLE_t[:] sample_weight)
+    cpdef build(
+        self,
+        Tree tree,
+        object X,
+        const DOUBLE_t[:, ::1] y,
+        const DOUBLE_t[:] sample_weight=*,
+    )
+
+    cdef _check_input(
+        self,
+        object X,
+        const DOUBLE_t[:, ::1] y,
+        const DOUBLE_t[:] sample_weight,
+    )
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 21d58e58dd475..72e98b33b847f 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -86,13 +86,22 @@ NODE_DTYPE = np.asarray(<Node[:1]>(&dummy)).dtype
 cdef class TreeBuilder:
     """Interface for different tree building strategies."""
 
-    cpdef build(self, Tree tree, object X, const DOUBLE_t[:, ::1] y,
-                const DOUBLE_t[:] sample_weight=None):
+    cpdef build(
+        self,
+        Tree tree,
+        object X,
+        const DOUBLE_t[:, ::1] y,
+        const DOUBLE_t[:] sample_weight=None,
+    ):
         """Build a decision tree from the training set (X, y)."""
         pass
 
-    cdef inline _check_input(self, object X, const DOUBLE_t[:, ::1] y,
-                             const DOUBLE_t[:] sample_weight):
+    cdef inline _check_input(
+        self,
+        object X,
+        const DOUBLE_t[:, ::1] y,
+        const DOUBLE_t[:] sample_weight,
+    ):
         """Check input dtype, layout and format"""
         if issparse(X):
             X = X.tocsc()
@@ -147,8 +156,13 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         self.max_depth = max_depth
         self.min_impurity_decrease = min_impurity_decrease
 
-    cpdef build(self, Tree tree, object X, const DOUBLE_t[:, ::1] y,
-                const DOUBLE_t[:] sample_weight=None):
+    cpdef build(
+        self,
+        Tree tree,
+        object X,
+        const DOUBLE_t[:, ::1] y,
+        const DOUBLE_t[:] sample_weight=None,
+    ):
         """Build a decision tree from the training set (X, y)."""
 
         # check input
@@ -338,8 +352,13 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
         self.max_leaf_nodes = max_leaf_nodes
         self.min_impurity_decrease = min_impurity_decrease
 
-    cpdef build(self, Tree tree, object X, const DOUBLE_t[:, ::1] y,
-                const DOUBLE_t[:] sample_weight=None):
+    cpdef build(
+        self,
+        Tree tree,
+        object X,
+        const DOUBLE_t[:, ::1] y,
+        const DOUBLE_t[:] sample_weight=None,
+    ):
         """Build a decision tree from the training set (X, y)."""
 
         # check input