scikit-learn · jeremiedbb · Nov 3, 2021 · Oct 18, 2021 · Oct 29, 2021 · Oct 30, 2021
diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py
@@ -76,7 +76,14 @@ def cythonize_extensions(top_path, config):
         compile_time_env={
             "SKLEARN_OPENMP_PARALLELISM_ENABLED": sklearn._OPENMP_SUPPORTED
         },
-        compiler_directives={"language_level": 3},
+        compiler_directives={
+            "language_level": 3,
+            "boundscheck": False,
+            "wraparound": False,
+            "initializedcheck": False,
+            "nonecheck": False,
+            "cdivision": True,
+        },
     )
 
 

diff --git a/sklearn/_isotonic.pyx b/sklearn/_isotonic.pyx
@@ -3,8 +3,6 @@
 # Uses the pool adjacent violators algorithm (PAVA), with the
 # enhancement of searching for the longest decreasing subsequence to
 # pool at each step.
-#
-# cython: boundscheck=False, wraparound=False, cdivision=True
 
 import numpy as np
 cimport numpy as np

diff --git a/sklearn/cluster/_dbscan_inner.pyx b/sklearn/cluster/_dbscan_inner.pyx
@@ -1,8 +1,6 @@
 # Fast inner loop for DBSCAN.
 # Author: Lars Buitinck
 # License: 3-clause BSD
-#
-# cython: boundscheck=False, wraparound=False
 
 cimport cython
 from libcpp.vector cimport vector

diff --git a/sklearn/cluster/_hierarchical_fast.pyx b/sklearn/cluster/_hierarchical_fast.pyx
@@ -8,9 +8,6 @@ ctypedef np.float64_t DOUBLE
 ctypedef np.npy_intp INTP
 ctypedef np.int8_t INT8
 
-# Numpy must be initialized. When using numpy from C or Cython you must
-# _always_ do that, or you will have segfaults
-
 np.import_array()
 
 from ..metrics._dist_metrics cimport DistanceMetric
@@ -32,9 +29,6 @@ from numpy.math cimport INFINITY
 ###############################################################################
 # Utilities for computing the ward momentum
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.cdivision(True)
 def compute_ward_dist(np.ndarray[DOUBLE, ndim=1, mode='c'] m_1,
                       np.ndarray[DOUBLE, ndim=2, mode='c'] m_2,
                       np.ndarray[INTP, ndim=1, mode='c'] coord_row,
@@ -101,8 +95,6 @@ def _hc_get_descendent(INTP node, children, INTP n_leaves):
     return descendent
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def hc_get_heads(np.ndarray[INTP, ndim=1] parents, copy=True):
     """Returns the heads of the forest, as defined by parents.
 
@@ -135,8 +127,6 @@ def hc_get_heads(np.ndarray[INTP, ndim=1] parents, copy=True):
     return parents
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def _get_parents(nodes, heads, np.ndarray[INTP, ndim=1] parents,
                  np.ndarray[INT8, ndim=1, mode='c'] not_visited):
     """Returns the heads of the given nodes, as defined by parents.
@@ -176,8 +166,6 @@ def _get_parents(nodes, heads, np.ndarray[INTP, ndim=1] parents,
 # as keys and edge weights as values.
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def max_merge(IntFloatDict a, IntFloatDict b,
               np.ndarray[ITYPE_t, ndim=1] mask,
               ITYPE_t n_a, ITYPE_t n_b):
@@ -231,8 +219,6 @@ def max_merge(IntFloatDict a, IntFloatDict b,
     return out_obj
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def average_merge(IntFloatDict a, IntFloatDict b,
               np.ndarray[ITYPE_t, ndim=1] mask,
               ITYPE_t n_a, ITYPE_t n_b):
@@ -302,7 +288,6 @@ cdef class WeightedEdge:
         self.a = a
         self.b = b
 
-    @cython.nonecheck(False)
     def __richcmp__(self, WeightedEdge other, int op):
         """Cython-specific comparison method.
 
@@ -348,8 +333,6 @@ cdef class UnionFind(object):
         self.size = np.hstack((np.ones(N, dtype=ITYPE),
                                np.zeros(N - 1, dtype=ITYPE)))
 
-    @cython.boundscheck(False)
-    @cython.nonecheck(False)
     cdef void union(self, ITYPE_t m, ITYPE_t n):
         self.parent[m] = self.next_label
         self.parent[n] = self.next_label
@@ -358,8 +341,7 @@ cdef class UnionFind(object):
 
         return
 
-    @cython.boundscheck(False)
-    @cython.nonecheck(False)
+    @cython.wraparound(True)
     cdef ITYPE_t fast_find(self, ITYPE_t n):
         cdef ITYPE_t p
         p = n
@@ -371,8 +353,7 @@ cdef class UnionFind(object):
             p, self.parent[p] = self.parent[p], n
         return n
 
-@cython.boundscheck(False)
-@cython.nonecheck(False)
+
 cpdef np.ndarray[DTYPE_t, ndim=2] _single_linkage_label(
     np.ndarray[DTYPE_t, ndim=2] L):
     """
@@ -423,6 +404,7 @@ cpdef np.ndarray[DTYPE_t, ndim=2] _single_linkage_label(
     return result_arr
 
 
+@cython.wraparound(True)
 def single_linkage_label(L):
     """
     Convert an linkage array or MST to a tree by labelling clusters at merges.
@@ -452,8 +434,6 @@ def single_linkage_label(L):
 
 
 # Implements MST-LINKAGE-CORE from https://arxiv.org/abs/1109.2378
-@cython.boundscheck(False)
-@cython.nonecheck(False)
 def mst_linkage_core(
         const DTYPE_t [:, ::1] raw_data,
         DistanceMetric dist_metric):

diff --git a/sklearn/cluster/_k_means_common.pxd b/sklearn/cluster/_k_means_common.pxd
@@ -1,6 +1,3 @@
-# cython: language_level=3
-
-
 from cython cimport floating
 cimport numpy as np
 

diff --git a/sklearn/cluster/_k_means_common.pyx b/sklearn/cluster/_k_means_common.pyx
@@ -1,7 +1,3 @@
-# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
-# Profiling is enabled by default as the overhead does not seem to be
-# measurable on this specific use case.
-
 # Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
 #         Olivier Grisel <olivier.grisel@ensta.org>
 #         Lars Buitinck

diff --git a/sklearn/cluster/_k_means_elkan.pyx b/sklearn/cluster/_k_means_elkan.pyx
@@ -1,5 +1,3 @@
-# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
-#
 # Author: Andreas Mueller
 #
 # Licence: BSD 3 clause

diff --git a/sklearn/cluster/_k_means_lloyd.pyx b/sklearn/cluster/_k_means_lloyd.pyx
@@ -1,5 +1,3 @@
-# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
-#
 # Licence: BSD 3 clause
 
 # TODO: We still need to use ndarrays instead of typed memoryviews when using

diff --git a/sklearn/cluster/_k_means_minibatch.pyx b/sklearn/cluster/_k_means_minibatch.pyx
@@ -1,5 +1,3 @@
-# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
-
 # TODO: We still need to use ndarrays instead of typed memoryviews when using
 # fused types and when the array may be read-only (for instance when it's
 # provided by the user). This will be fixed in cython >= 0.3.

diff --git a/sklearn/datasets/_svmlight_format_fast.pyx b/sklearn/datasets/_svmlight_format_fast.pyx
@@ -4,8 +4,6 @@
 #          Lars Buitinck
 #          Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD 3 clause
-#
-# cython: boundscheck=False, wraparound=False
 
 import array
 from cpython cimport array

diff --git a/sklearn/decomposition/_cdnmf_fast.pyx b/sklearn/decomposition/_cdnmf_fast.pyx
@@ -1,7 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-
 # Author: Mathieu Blondel, Tom Dupre la Tour
 # License: BSD 3 clause
 

diff --git a/sklearn/decomposition/_online_lda_fast.pyx b/sklearn/decomposition/_online_lda_fast.pyx
@@ -1,6 +1,3 @@
-#
-# cython: boundscheck=False, wraparound=False
-
 cimport cython
 cimport numpy as np
 import numpy as np
@@ -91,7 +88,6 @@ def _dirichlet_expectation_2d(np.ndarray[ndim=2, dtype=np.float64_t] arr):
 #
 # After: J. Bernardo (1976). Algorithm AS 103: Psi (Digamma) Function.
 # https://www.uv.es/~bernardo/1976AppStatist.pdf
-@cython.cdivision(True)
 cdef double psi(double x) nogil:
     if x <= 1e-6:
         # psi(x) = -EULER - 1/x + O(x)

diff --git a/sklearn/ensemble/_gradient_boosting.pyx b/sklearn/ensemble/_gradient_boosting.pyx
@@ -1,7 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-#
 # Author: Peter Prettenhofer
 #
 # License: BSD 3 clause

diff --git a/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx b/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx
@@ -1,9 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: nonecheck=False
-# cython: language_level=3
-
 # Author: Nicolas Hug
 
 cimport cython

diff --git a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd
@@ -1,4 +1,3 @@
-# cython: language_level=3
 from .common cimport X_BINNED_DTYPE_C
 from .common cimport BITSET_DTYPE_C
 from .common cimport BITSET_INNER_DTYPE_C

diff --git a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx
@@ -1,7 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: language_level=3
 from .common cimport BITSET_INNER_DTYPE_C
 from .common cimport BITSET_DTYPE_C
 from .common cimport X_DTYPE_C

diff --git a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx
@@ -1,8 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: language_level=3
-
 # Author: Nicolas Hug
 
 cimport cython

diff --git a/sklearn/ensemble/_hist_gradient_boosting/_loss.pyx b/sklearn/ensemble/_hist_gradient_boosting/_loss.pyx
@@ -1,8 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: language_level=3
-
 # Author: Nicolas Hug
 
 cimport cython

diff --git a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
@@ -1,8 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: language_level=3
-
 # Author: Nicolas Hug
 
 cimport cython

diff --git a/sklearn/ensemble/_hist_gradient_boosting/common.pxd b/sklearn/ensemble/_hist_gradient_boosting/common.pxd
@@ -1,4 +1,3 @@
-# cython: language_level=3
 import numpy as np
 cimport numpy as np
 

diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -1,7 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: language_level=3
 """This module contains routines for building histograms."""
 
 # Author: Nicolas Hug

diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
@@ -1,8 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: language_level=3
-
 """This module contains routines and data structures to:
 
 - Find the best possible split of a node. For a given node, a split is
@@ -791,7 +786,6 @@ cdef class Splitter:
                 split_info.sum_gradient_right, split_info.sum_hessian_right,
                 lower_bound, upper_bound, self.l2_regularization)
 
-    @cython.initializedcheck(False)
     cdef void _find_best_bin_to_split_category(
             self,
             unsigned int feature_idx,

diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx
@@ -1,7 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: language_level=3
 """This module contains utility routines."""
 # Author: Nicolas Hug
 

diff --git a/sklearn/feature_extraction/_hashing_fast.pyx b/sklearn/feature_extraction/_hashing_fast.pyx
@@ -1,7 +1,5 @@
 # Author: Lars Buitinck
 # License: BSD 3 clause
-#
-# cython: boundscheck=False, cdivision=True
 
 import sys
 import array
@@ -92,7 +90,7 @@ def transform(raw_X, Py_ssize_t n_features, dtype,
     indices_a = np.frombuffer(indices, dtype=np.int32)
     indptr_a = np.frombuffer(indptr, dtype=indices_np_dtype)
 
-    if indptr[-1] > np.iinfo(np.int32).max:  # = 2**31 - 1
+    if indptr[len(indptr) - 1] > np.iinfo(np.int32).max:  # = 2**31 - 1
         # both indices and indptr have the same dtype in CSR arrays
         indices_a = indices_a.astype(np.int64, copy=False)
     else:

diff --git a/sklearn/linear_model/_cd_fast.pyx b/sklearn/linear_model/_cd_fast.pyx
@@ -5,8 +5,6 @@
 #         Manoj Kumar <manojkumarsivaraj334@gmail.com>
 #
 # License: BSD 3 clause
-#
-# cython: boundscheck=False, wraparound=False, cdivision=True
 
 from libc.math cimport fabs
 cimport numpy as np

diff --git a/sklearn/linear_model/_sag_fast.pyx.tp b/sklearn/linear_model/_sag_fast.pyx.tp
@@ -27,10 +27,6 @@ dtypes = [('64', 'double', 'np.float64'),
 
 #------------------------------------------------------------------------------
 
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-#
 # Authors: Danny Sullivan <dbsullivan23@gmail.com>
 #          Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
 #          Arthur Mensch <arthur.mensch@m4x.org

diff --git a/sklearn/linear_model/_sgd_fast.pyx b/sklearn/linear_model/_sgd_fast.pyx
@@ -1,7 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-#
 # Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
 #         Mathieu Blondel (partial_fit support)
 #         Rob Zinkov (passive-aggressive)

diff --git a/sklearn/manifold/_barnes_hut_tsne.pyx b/sklearn/manifold/_barnes_hut_tsne.pyx
@@ -1,7 +1,3 @@
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: cdivision=True
-#
 # Author: Christopher Moody <chrisemoody@gmail.com>
 # Author: Nick Travers <nickt@squareup.com>
 # Implementation by Chris Moody & Nick Travers

diff --git a/sklearn/manifold/_utils.pyx b/sklearn/manifold/_utils.pyx
@@ -1,5 +1,3 @@
-# cython: boundscheck=False
-
 from libc cimport math
 cimport cython
 import numpy as np

diff --git a/sklearn/metrics/_dist_metrics.pxd b/sklearn/metrics/_dist_metrics.pxd
@@ -1,8 +1,3 @@
-# cython: boundscheck=False
-# cython: cdivision=True
-# cython: initializedcheck=False
-# cython: wraparound=False
-
 cimport numpy as np
 from libc.math cimport sqrt, exp
 

diff --git a/sklearn/metrics/_dist_metrics.pyx b/sklearn/metrics/_dist_metrics.pyx
@@ -1,8 +1,3 @@
-# cython: boundscheck=False
-# cython: cdivision=True
-# cython: initializedcheck=False
-# cython: wraparound=False
-
 # By Jake Vanderplas (2013) <jakevdp@cs.washington.edu>
 # written for the scikit-learn project
 # License: BSD

diff --git a/sklearn/metrics/_pairwise_fast.pyx b/sklearn/metrics/_pairwise_fast.pyx
@@ -1,7 +1,3 @@
-#cython: boundscheck=False
-#cython: cdivision=True
-#cython: wraparound=False
-#
 # Author: Andreas Mueller <amueller@ais.uni-bonn.de>
 #         Lars Buitinck
 #         Paolo Toccaceli

diff --git a/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx b/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx
@@ -1,7 +1,3 @@
-# cython: cdivision=True
-# cython: boundscheck=False
-# cython: wraparound=False
-#
 # Authors: Robert Layton <robertlayton@gmail.com>
 #           Corey Lynch <coreylynch9@gmail.com>
 # License: BSD 3 clause