Skip to content

MNT Centralize common cython compiler directives #21512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion sklearn/_build_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,14 @@ def cythonize_extensions(top_path, config):
compile_time_env={
"SKLEARN_OPENMP_PARALLELISM_ENABLED": sklearn._OPENMP_SUPPORTED
},
compiler_directives={"language_level": 3},
compiler_directives={
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Super cool thing :)

"language_level": 3,
"boundscheck": False,
"wraparound": False,
"initializedcheck": False,
"nonecheck": False,
"cdivision": True,
},
)


Expand Down
2 changes: 0 additions & 2 deletions sklearn/_isotonic.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
# Uses the pool adjacent violators algorithm (PAVA), with the
# enhancement of searching for the longest decreasing subsequence to
# pool at each step.
#
# cython: boundscheck=False, wraparound=False, cdivision=True

import numpy as np
cimport numpy as np
Expand Down
2 changes: 0 additions & 2 deletions sklearn/cluster/_dbscan_inner.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# Fast inner loop for DBSCAN.
# Author: Lars Buitinck
# License: 3-clause BSD
#
# cython: boundscheck=False, wraparound=False

cimport cython
from libcpp.vector cimport vector
Expand Down
26 changes: 3 additions & 23 deletions sklearn/cluster/_hierarchical_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ ctypedef np.float64_t DOUBLE
ctypedef np.npy_intp INTP
ctypedef np.int8_t INT8

# Numpy must be initialized. When using numpy from C or Cython you must
# _always_ do that, or you will have segfaults

np.import_array()

from ..metrics._dist_metrics cimport DistanceMetric
Expand All @@ -32,9 +29,6 @@ from numpy.math cimport INFINITY
###############################################################################
# Utilities for computing the ward momentum

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def compute_ward_dist(np.ndarray[DOUBLE, ndim=1, mode='c'] m_1,
np.ndarray[DOUBLE, ndim=2, mode='c'] m_2,
np.ndarray[INTP, ndim=1, mode='c'] coord_row,
Expand Down Expand Up @@ -101,8 +95,6 @@ def _hc_get_descendent(INTP node, children, INTP n_leaves):
return descendent


@cython.boundscheck(False)
@cython.wraparound(False)
def hc_get_heads(np.ndarray[INTP, ndim=1] parents, copy=True):
"""Returns the heads of the forest, as defined by parents.

Expand Down Expand Up @@ -135,8 +127,6 @@ def hc_get_heads(np.ndarray[INTP, ndim=1] parents, copy=True):
return parents


@cython.boundscheck(False)
@cython.wraparound(False)
def _get_parents(nodes, heads, np.ndarray[INTP, ndim=1] parents,
np.ndarray[INT8, ndim=1, mode='c'] not_visited):
"""Returns the heads of the given nodes, as defined by parents.
Expand Down Expand Up @@ -176,8 +166,6 @@ def _get_parents(nodes, heads, np.ndarray[INTP, ndim=1] parents,
# as keys and edge weights as values.


@cython.boundscheck(False)
@cython.wraparound(False)
def max_merge(IntFloatDict a, IntFloatDict b,
np.ndarray[ITYPE_t, ndim=1] mask,
ITYPE_t n_a, ITYPE_t n_b):
Expand Down Expand Up @@ -231,8 +219,6 @@ def max_merge(IntFloatDict a, IntFloatDict b,
return out_obj


@cython.boundscheck(False)
@cython.wraparound(False)
def average_merge(IntFloatDict a, IntFloatDict b,
np.ndarray[ITYPE_t, ndim=1] mask,
ITYPE_t n_a, ITYPE_t n_b):
Expand Down Expand Up @@ -302,7 +288,6 @@ cdef class WeightedEdge:
self.a = a
self.b = b

@cython.nonecheck(False)
def __richcmp__(self, WeightedEdge other, int op):
"""Cython-specific comparison method.

Expand Down Expand Up @@ -348,8 +333,6 @@ cdef class UnionFind(object):
self.size = np.hstack((np.ones(N, dtype=ITYPE),
np.zeros(N - 1, dtype=ITYPE)))

@cython.boundscheck(False)
@cython.nonecheck(False)
cdef void union(self, ITYPE_t m, ITYPE_t n):
self.parent[m] = self.next_label
self.parent[n] = self.next_label
Expand All @@ -358,8 +341,7 @@ cdef class UnionFind(object):

return

@cython.boundscheck(False)
@cython.nonecheck(False)
@cython.wraparound(True)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this method does negative indexing

cdef ITYPE_t fast_find(self, ITYPE_t n):
cdef ITYPE_t p
p = n
Expand All @@ -371,8 +353,7 @@ cdef class UnionFind(object):
p, self.parent[p] = self.parent[p], n
return n

@cython.boundscheck(False)
@cython.nonecheck(False)

cpdef np.ndarray[DTYPE_t, ndim=2] _single_linkage_label(
np.ndarray[DTYPE_t, ndim=2] L):
"""
Expand Down Expand Up @@ -423,6 +404,7 @@ cpdef np.ndarray[DTYPE_t, ndim=2] _single_linkage_label(
return result_arr


@cython.wraparound(True)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function does negative indexing

def single_linkage_label(L):
"""
Convert an linkage array or MST to a tree by labelling clusters at merges.
Expand Down Expand Up @@ -452,8 +434,6 @@ def single_linkage_label(L):


# Implements MST-LINKAGE-CORE from https://arxiv.org/abs/1109.2378
@cython.boundscheck(False)
@cython.nonecheck(False)
def mst_linkage_core(
const DTYPE_t [:, ::1] raw_data,
DistanceMetric dist_metric):
Expand Down
3 changes: 0 additions & 3 deletions sklearn/cluster/_k_means_common.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# cython: language_level=3


from cython cimport floating
cimport numpy as np

Expand Down
4 changes: 0 additions & 4 deletions sklearn/cluster/_k_means_common.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
# Profiling is enabled by default as the overhead does not seem to be
# measurable on this specific use case.

# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Olivier Grisel <olivier.grisel@ensta.org>
# Lars Buitinck
Expand Down
2 changes: 0 additions & 2 deletions sklearn/cluster/_k_means_elkan.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
#
# Author: Andreas Mueller
#
# Licence: BSD 3 clause
Expand Down
2 changes: 0 additions & 2 deletions sklearn/cluster/_k_means_lloyd.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
#
# Licence: BSD 3 clause

# TODO: We still need to use ndarrays instead of typed memoryviews when using
Expand Down
2 changes: 0 additions & 2 deletions sklearn/cluster/_k_means_minibatch.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True

# TODO: We still need to use ndarrays instead of typed memoryviews when using
# fused types and when the array may be read-only (for instance when it's
# provided by the user). This will be fixed in cython >= 0.3.
Expand Down
2 changes: 0 additions & 2 deletions sklearn/datasets/_svmlight_format_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
# Lars Buitinck
# Olivier Grisel <olivier.grisel@ensta.org>
# License: BSD 3 clause
#
# cython: boundscheck=False, wraparound=False

import array
from cpython cimport array
Expand Down
4 changes: 0 additions & 4 deletions sklearn/decomposition/_cdnmf_fast.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False

# Author: Mathieu Blondel, Tom Dupre la Tour
# License: BSD 3 clause

Expand Down
4 changes: 0 additions & 4 deletions sklearn/decomposition/_online_lda_fast.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#
# cython: boundscheck=False, wraparound=False

cimport cython
cimport numpy as np
import numpy as np
Expand Down Expand Up @@ -91,7 +88,6 @@ def _dirichlet_expectation_2d(np.ndarray[ndim=2, dtype=np.float64_t] arr):
#
# After: J. Bernardo (1976). Algorithm AS 103: Psi (Digamma) Function.
# https://www.uv.es/~bernardo/1976AppStatist.pdf
@cython.cdivision(True)
cdef double psi(double x) nogil:
if x <= 1e-6:
# psi(x) = -EULER - 1/x + O(x)
Expand Down
4 changes: 0 additions & 4 deletions sklearn/ensemble/_gradient_boosting.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
#
# Author: Peter Prettenhofer
#
# License: BSD 3 clause
Expand Down
6 changes: 0 additions & 6 deletions sklearn/ensemble/_hist_gradient_boosting/_binning.pyx
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# cython: nonecheck=False
# cython: language_level=3

# Author: Nicolas Hug

cimport cython
Expand Down
1 change: 0 additions & 1 deletion sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# cython: language_level=3
from .common cimport X_BINNED_DTYPE_C
from .common cimport BITSET_DTYPE_C
from .common cimport BITSET_INNER_DTYPE_C
Expand Down
4 changes: 0 additions & 4 deletions sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# cython: language_level=3
from .common cimport BITSET_INNER_DTYPE_C
from .common cimport BITSET_DTYPE_C
from .common cimport X_DTYPE_C
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# cython: language_level=3

# Author: Nicolas Hug

cimport cython
Expand Down
5 changes: 0 additions & 5 deletions sklearn/ensemble/_hist_gradient_boosting/_loss.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# cython: language_level=3

# Author: Nicolas Hug

cimport cython
Expand Down
5 changes: 0 additions & 5 deletions sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# cython: language_level=3

# Author: Nicolas Hug

cimport cython
Expand Down
1 change: 0 additions & 1 deletion sklearn/ensemble/_hist_gradient_boosting/common.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# cython: language_level=3
import numpy as np
cimport numpy as np

Expand Down
4 changes: 0 additions & 4 deletions sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# cython: language_level=3
"""This module contains routines for building histograms."""

# Author: Nicolas Hug
Expand Down
6 changes: 0 additions & 6 deletions sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# cython: language_level=3

"""This module contains routines and data structures to:

- Find the best possible split of a node. For a given node, a split is
Expand Down Expand Up @@ -791,7 +786,6 @@ cdef class Splitter:
split_info.sum_gradient_right, split_info.sum_hessian_right,
lower_bound, upper_bound, self.l2_regularization)

@cython.initializedcheck(False)
cdef void _find_best_bin_to_split_category(
self,
unsigned int feature_idx,
Expand Down
4 changes: 0 additions & 4 deletions sklearn/ensemble/_hist_gradient_boosting/utils.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# cython: language_level=3
"""This module contains utility routines."""
# Author: Nicolas Hug

Expand Down
4 changes: 1 addition & 3 deletions sklearn/feature_extraction/_hashing_fast.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# Author: Lars Buitinck
# License: BSD 3 clause
#
# cython: boundscheck=False, cdivision=True

import sys
import array
Expand Down Expand Up @@ -92,7 +90,7 @@ def transform(raw_X, Py_ssize_t n_features, dtype,
indices_a = np.frombuffer(indices, dtype=np.int32)
indptr_a = np.frombuffer(indptr, dtype=indices_np_dtype)

if indptr[-1] > np.iinfo(np.int32).max: # = 2**31 - 1
if indptr[len(indptr) - 1] > np.iinfo(np.int32).max: # = 2**31 - 1
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function used to do negative indexing but since there's only one and it's for indexing the last element I find it better to replace by a usual indexing instead of adding wraparound for the whole function

# both indices and indptr have the same dtype in CSR arrays
indices_a = indices_a.astype(np.int64, copy=False)
else:
Expand Down
2 changes: 0 additions & 2 deletions sklearn/linear_model/_cd_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
# Manoj Kumar <manojkumarsivaraj334@gmail.com>
#
# License: BSD 3 clause
#
# cython: boundscheck=False, wraparound=False, cdivision=True

from libc.math cimport fabs
cimport numpy as np
Expand Down
4 changes: 0 additions & 4 deletions sklearn/linear_model/_sag_fast.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ dtypes = [('64', 'double', 'np.float64'),

#------------------------------------------------------------------------------

# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
#
# Authors: Danny Sullivan <dbsullivan23@gmail.com>
# Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
# Arthur Mensch <arthur.mensch@m4x.org
Expand Down
4 changes: 0 additions & 4 deletions sklearn/linear_model/_sgd_fast.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
#
# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Mathieu Blondel (partial_fit support)
# Rob Zinkov (passive-aggressive)
Expand Down
4 changes: 0 additions & 4 deletions sklearn/manifold/_barnes_hut_tsne.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# cython: boundscheck=False
# cython: wraparound=False
# cython: cdivision=True
#
# Author: Christopher Moody <chrisemoody@gmail.com>
# Author: Nick Travers <nickt@squareup.com>
# Implementation by Chris Moody & Nick Travers
Expand Down
2 changes: 0 additions & 2 deletions sklearn/manifold/_utils.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# cython: boundscheck=False

from libc cimport math
cimport cython
import numpy as np
Expand Down
5 changes: 0 additions & 5 deletions sklearn/metrics/_dist_metrics.pxd
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# cython: boundscheck=False
# cython: cdivision=True
# cython: initializedcheck=False
# cython: wraparound=False

cimport numpy as np
from libc.math cimport sqrt, exp

Expand Down
5 changes: 0 additions & 5 deletions sklearn/metrics/_dist_metrics.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# cython: boundscheck=False
# cython: cdivision=True
# cython: initializedcheck=False
# cython: wraparound=False

# By Jake Vanderplas (2013) <jakevdp@cs.washington.edu>
# written for the scikit-learn project
# License: BSD
Expand Down
4 changes: 0 additions & 4 deletions sklearn/metrics/_pairwise_fast.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
#cython: boundscheck=False
#cython: cdivision=True
#cython: wraparound=False
#
# Author: Andreas Mueller <amueller@ais.uni-bonn.de>
# Lars Buitinck
# Paolo Toccaceli
Expand Down
4 changes: 0 additions & 4 deletions sklearn/metrics/cluster/_expected_mutual_info_fast.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
#
# Authors: Robert Layton <robertlayton@gmail.com>
# Corey Lynch <coreylynch9@gmail.com>
# License: BSD 3 clause
Expand Down
Loading