Skip to content

MAINT Replace deprecated cython conditional compilation #25654

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 7 additions & 16 deletions doc/developers/performance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -344,27 +344,18 @@ Using OpenMP
Since scikit-learn can be built without OpenMP, it's necessary to protect each
direct call to OpenMP.

There are some helpers in
The `_openmp_helpers` module, available in
`sklearn/utils/_openmp_helpers.pyx <https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/utils/_openmp_helpers.pyx>`_
that you can reuse for the main useful functionalities and already have the
necessary protection to be built without OpenMP.
provides protected versions of the OpenMP routines. To use OpenMP routines, they
must be cimported from this module and not from the OpenMP library directly::

If the helpers are not enough, you need to protect your OpenMP code using the
following syntax::

# importing OpenMP
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
cimport openmp

# calling OpenMP
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
max_threads = openmp.omp_get_max_threads()
ELSE:
max_threads = 1
from sklearn.utils._openmp_helpers cimport omp_get_max_threads
max_threads = omp_get_max_threads()
Comment on lines +352 to +353
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks much clearer than before.


.. note::

Protecting the parallel loop, ``prange``, is already done by cython.
The parallel loop, `prange`, is already protected by cython and can be used directly
from `cython.parallel`.


.. _profiling-compiled-extension:
Expand Down
11 changes: 3 additions & 8 deletions sklearn/_build_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,9 @@ def cythonize_extensions(extension):
# compilers are properly configured to build with OpenMP. This is expensive
# and we only want to call this function once.
# The result of this check is cached as a private attribute on the sklearn
# module (only at build-time) to be used twice:
# - First to set the value of SKLEARN_OPENMP_PARALLELISM_ENABLED, the
# cython build-time variable passed to the cythonize() call.
# - Then in the build_ext subclass defined in the top-level setup.py file
# to actually build the compiled extensions with OpenMP flags if needed.
# module (only at build-time) to be used in the build_ext subclass defined
# in the top-level setup.py file to actually build the compiled extensions
# with OpenMP flags if needed.
sklearn._OPENMP_SUPPORTED = check_openmp_support()

n_jobs = 1
Expand All @@ -82,9 +80,6 @@ def cythonize_extensions(extension):
return cythonize(
extension,
nthreads=n_jobs,
compile_time_env={
"SKLEARN_OPENMP_PARALLELISM_ENABLED": sklearn._OPENMP_SUPPORTED
},
compiler_directives=compiler_directives,
)

Expand Down
45 changes: 19 additions & 26 deletions sklearn/cluster/_k_means_elkan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
# fused types and when the array may be read-only (for instance when it's
# provided by the user). This is fixed in cython > 0.3.

IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
cimport openmp
from cython cimport floating
from cython.parallel import prange, parallel
from libc.stdlib cimport calloc, free
from libc.string cimport memset

from ..utils._openmp_helpers cimport omp_lock_t
from ..utils._openmp_helpers cimport omp_init_lock
from ..utils._openmp_helpers cimport omp_destroy_lock
from ..utils._openmp_helpers cimport omp_set_lock
from ..utils._openmp_helpers cimport omp_unset_lock
from ..utils.extmath import row_norms
from ._k_means_common import CHUNK_SIZE
from ._k_means_common cimport _relocate_empty_clusters_dense
Expand Down Expand Up @@ -274,8 +277,7 @@ def elkan_iter_chunked_dense(
floating *centers_new_chunk
floating *weight_in_clusters_chunk

IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_lock_t lock
omp_lock_t lock

# count remainder chunk in total number of chunks
n_chunks += n_samples != n_chunks * n_samples_chunk
Expand All @@ -286,8 +288,7 @@ def elkan_iter_chunked_dense(
if update_centers:
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_init_lock(&lock)
omp_init_lock(&lock)

with nogil, parallel(num_threads=n_threads):
# thread local buffers
Expand Down Expand Up @@ -316,23 +317,20 @@ def elkan_iter_chunked_dense(

# reduction from local buffers.
if update_centers:
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
# The lock is necessary to avoid race conditions when aggregating
# info from different thread-local buffers.
openmp.omp_set_lock(&lock)
# The lock is necessary to avoid race conditions when aggregating
# info from different thread-local buffers.
omp_set_lock(&lock)
for j in range(n_clusters):
weight_in_clusters[j] += weight_in_clusters_chunk[j]
for k in range(n_features):
centers_new[j, k] += centers_new_chunk[j * n_features + k]
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_unset_lock(&lock)
omp_unset_lock(&lock)

free(centers_new_chunk)
free(weight_in_clusters_chunk)

if update_centers:
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_destroy_lock(&lock)
omp_destroy_lock(&lock)
_relocate_empty_clusters_dense(X, sample_weight, centers_old,
centers_new, weight_in_clusters, labels)

Expand Down Expand Up @@ -516,8 +514,7 @@ def elkan_iter_chunked_sparse(
floating *centers_new_chunk
floating *weight_in_clusters_chunk

IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_lock_t lock
omp_lock_t lock

# count remainder chunk in total number of chunks
n_chunks += n_samples != n_chunks * n_samples_chunk
Expand All @@ -528,8 +525,7 @@ def elkan_iter_chunked_sparse(
if update_centers:
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_init_lock(&lock)
omp_init_lock(&lock)

with nogil, parallel(num_threads=n_threads):
# thread local buffers
Expand Down Expand Up @@ -561,23 +557,20 @@ def elkan_iter_chunked_sparse(

# reduction from local buffers.
if update_centers:
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
# The lock is necessary to avoid race conditions when aggregating
# info from different thread-local buffers.
openmp.omp_set_lock(&lock)
# The lock is necessary to avoid race conditions when aggregating
# info from different thread-local buffers.
omp_set_lock(&lock)
for j in range(n_clusters):
weight_in_clusters[j] += weight_in_clusters_chunk[j]
for k in range(n_features):
centers_new[j, k] += centers_new_chunk[j * n_features + k]
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_unset_lock(&lock)
omp_unset_lock(&lock)

free(centers_new_chunk)
free(weight_in_clusters_chunk)

if update_centers:
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_destroy_lock(&lock)
omp_destroy_lock(&lock)
_relocate_empty_clusters_sparse(
X_data, X_indices, X_indptr, sample_weight,
centers_old, centers_new, weight_in_clusters, labels)
Expand Down
47 changes: 21 additions & 26 deletions sklearn/cluster/_k_means_lloyd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
# fused types and when the array may be read-only (for instance when it's
# provided by the user). This is fixed in cython > 0.3.

IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
cimport openmp
from cython cimport floating
from cython.parallel import prange, parallel
from libc.stdlib cimport malloc, calloc, free
from libc.string cimport memset
from libc.float cimport DBL_MAX, FLT_MAX

from ..utils._openmp_helpers cimport omp_lock_t
from ..utils._openmp_helpers cimport omp_init_lock
from ..utils._openmp_helpers cimport omp_destroy_lock
from ..utils._openmp_helpers cimport omp_set_lock
from ..utils._openmp_helpers cimport omp_unset_lock
from ..utils.extmath import row_norms
from ..utils._cython_blas cimport _gemm
from ..utils._cython_blas cimport RowMajor, Trans, NoTrans
Expand Down Expand Up @@ -94,8 +97,8 @@ def lloyd_iter_chunked_dense(
floating *centers_new_chunk
floating *weight_in_clusters_chunk
floating *pairwise_distances_chunk
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_lock_t lock

omp_lock_t lock

# count remainder chunk in total number of chunks
n_chunks += n_samples != n_chunks * n_samples_chunk
Expand All @@ -106,8 +109,7 @@ def lloyd_iter_chunked_dense(
if update_centers:
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_init_lock(&lock)
omp_init_lock(&lock)

with nogil, parallel(num_threads=n_threads):
# thread local buffers
Expand Down Expand Up @@ -135,24 +137,22 @@ def lloyd_iter_chunked_dense(

# reduction from local buffers.
if update_centers:
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
# The lock is necessary to avoid race conditions when aggregating
# info from different thread-local buffers.
openmp.omp_set_lock(&lock)
# The lock is necessary to avoid race conditions when aggregating
# info from different thread-local buffers.
omp_set_lock(&lock)
for j in range(n_clusters):
weight_in_clusters[j] += weight_in_clusters_chunk[j]
for k in range(n_features):
centers_new[j, k] += centers_new_chunk[j * n_features + k]
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_unset_lock(&lock)

omp_unset_lock(&lock)

free(centers_new_chunk)
free(weight_in_clusters_chunk)
free(pairwise_distances_chunk)

if update_centers:
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_destroy_lock(&lock)
omp_destroy_lock(&lock)
_relocate_empty_clusters_dense(X, sample_weight, centers_old,
centers_new, weight_in_clusters, labels)

Expand Down Expand Up @@ -292,8 +292,7 @@ def lloyd_iter_chunked_sparse(
floating *centers_new_chunk
floating *weight_in_clusters_chunk

IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_lock_t lock
omp_lock_t lock

# count remainder chunk in total number of chunks
n_chunks += n_samples != n_chunks * n_samples_chunk
Expand All @@ -304,8 +303,7 @@ def lloyd_iter_chunked_sparse(
if update_centers:
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_init_lock(&lock)
omp_init_lock(&lock)

with nogil, parallel(num_threads=n_threads):
# thread local buffers
Expand Down Expand Up @@ -333,23 +331,20 @@ def lloyd_iter_chunked_sparse(

# reduction from local buffers.
if update_centers:
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
# The lock is necessary to avoid race conditions when aggregating
# info from different thread-local buffers.
openmp.omp_set_lock(&lock)
# The lock is necessary to avoid race conditions when aggregating
# info from different thread-local buffers.
omp_set_lock(&lock)
for j in range(n_clusters):
weight_in_clusters[j] += weight_in_clusters_chunk[j]
for k in range(n_features):
centers_new[j, k] += centers_new_chunk[j * n_features + k]
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_unset_lock(&lock)
omp_unset_lock(&lock)

free(centers_new_chunk)
free(weight_in_clusters_chunk)

if update_centers:
IF SKLEARN_OPENMP_PARALLELISM_ENABLED:
openmp.omp_destroy_lock(&lock)
omp_destroy_lock(&lock)
_relocate_empty_clusters_sparse(
X_data, X_indices, X_indptr, sample_weight,
centers_old, centers_new, weight_in_clusters, labels)
Expand Down
41 changes: 27 additions & 14 deletions sklearn/linear_model/_sgd_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,33 @@ from ..utils._seq_dataset cimport SequentialDataset64 as SequentialDataset

cnp.import_array()

# Penalty constants
DEF NO_PENALTY = 0
DEF L1 = 1
DEF L2 = 2
DEF ELASTICNET = 3

# Learning rate constants
DEF CONSTANT = 1
DEF OPTIMAL = 2
DEF INVSCALING = 3
DEF ADAPTIVE = 4
DEF PA1 = 5
DEF PA2 = 6

cdef extern from *:
"""
/* Penalty constants */
#define NO_PENALTY 0
#define L1 1
#define L2 2
#define ELASTICNET 3

/* Learning rate constants */
#define CONSTANT 1
#define OPTIMAL 2
#define INVSCALING 3
#define ADAPTIVE 4
#define PA1 5
#define PA2 6
"""
int NO_PENALTY = 0
int L1 = 1
int L2 = 2
int ELASTICNET = 3

int CONSTANT = 1
int OPTIMAL = 2
int INVSCALING = 3
int ADAPTIVE = 4
int PA1 = 5
int PA2 = 6


# ----------------------------------------
Expand Down
8 changes: 4 additions & 4 deletions sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ from cython.parallel cimport parallel, prange
from libcpp.vector cimport vector

from ...utils._cython_blas cimport _dot
from ...utils._openmp_helpers cimport _openmp_thread_num
from ...utils._openmp_helpers cimport omp_get_thread_num
from ...utils._typedefs cimport ITYPE_t, DTYPE_t

import numpy as np
Expand Down Expand Up @@ -88,7 +88,7 @@ cdef DTYPE_t[::1] _sqeuclidean_row_norms32_dense(
)

with nogil, parallel(num_threads=num_threads):
thread_num = _openmp_thread_num()
thread_num = omp_get_thread_num()

for i in prange(n, schedule='static'):
# Upcasting the i-th row of X from float32 to float64
Expand Down Expand Up @@ -245,7 +245,7 @@ cdef class BaseDistancesReduction{{name_suffix}}:
ITYPE_t thread_num

with nogil, parallel(num_threads=self.chunks_n_threads):
thread_num = _openmp_thread_num()
thread_num = omp_get_thread_num()

# Allocating thread datastructures
self._parallel_on_X_parallel_init(thread_num)
Expand Down Expand Up @@ -324,7 +324,7 @@ cdef class BaseDistancesReduction{{name_suffix}}:
X_end = X_start + self.X_n_samples_chunk

with nogil, parallel(num_threads=self.chunks_n_threads):
thread_num = _openmp_thread_num()
thread_num = omp_get_thread_num()

# Initializing datastructures used in this thread
self._parallel_on_Y_parallel_init(thread_num, X_start, X_end)
Expand Down
Loading