Skip to content

MAINT Remove -Wcpp warnings when compiling sklearn.decomposition._online_lda_fast #25020

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 26, 2023
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"sklearn.cluster._k_means_minibatch",
"sklearn.datasets._svmlight_format_fast",
"sklearn.decomposition._cdnmf_fast",
"sklearn.decomposition._online_lda_fast",
"sklearn.ensemble._gradient_boosting",
"sklearn.ensemble._hist_gradient_boosting._gradient_boosting",
"sklearn.ensemble._hist_gradient_boosting.histogram",
Expand Down
19 changes: 10 additions & 9 deletions sklearn/decomposition/_online_lda_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ from libc.math cimport exp, fabs, log
from numpy.math cimport EULER


def mean_change(cnp.ndarray[ndim=1, dtype=floating] arr_1,
cnp.ndarray[ndim=1, dtype=floating] arr_2):
def mean_change(const floating[:] arr_1, const floating[:] arr_2):
"""Calculate the mean difference between two arrays.

Equivalent to np.abs(arr_1 - arr2).mean().
Expand All @@ -28,9 +27,11 @@ def mean_change(cnp.ndarray[ndim=1, dtype=floating] arr_1,
return total / size


def _dirichlet_expectation_1d(cnp.ndarray[ndim=1, dtype=floating] doc_topic,
floating doc_topic_prior,
cnp.ndarray[ndim=1, dtype=floating] out):
def _dirichlet_expectation_1d(
floating[:] doc_topic,
floating doc_topic_prior,
floating[:] out
):
"""Dirichlet expectation for a single sample:
exp(E[log(theta)]) for theta ~ Dir(doc_topic)
after adding doc_topic_prior to doc_topic, in-place.
Expand All @@ -56,7 +57,7 @@ def _dirichlet_expectation_1d(cnp.ndarray[ndim=1, dtype=floating] doc_topic,
out[i] = exp(psi(doc_topic[i]) - psi_total)


def _dirichlet_expectation_2d(cnp.ndarray[ndim=2, dtype=floating] arr):
def _dirichlet_expectation_2d(const floating[:, :] arr):
"""Dirichlet expectation for multiple samples:
E[log(theta)] for theta ~ Dir(arr).

Expand All @@ -66,7 +67,7 @@ def _dirichlet_expectation_2d(cnp.ndarray[ndim=2, dtype=floating] arr):
the exp and doesn't add in the prior.
"""
cdef floating row_total, psi_row_total
cdef cnp.ndarray[ndim=2, dtype=floating] d_exp
cdef floating[:, :] d_exp
cdef cnp.npy_intp i, j, n_rows, n_cols

n_rows = arr.shape[0]
Expand All @@ -82,7 +83,7 @@ def _dirichlet_expectation_2d(cnp.ndarray[ndim=2, dtype=floating] arr):
for j in range(n_cols):
d_exp[i, j] = psi(arr[i, j]) - psi_row_total

return d_exp
return d_exp.base


# Psi function for positive arguments. Optimized for speed, not accuracy.
Expand All @@ -107,4 +108,4 @@ cdef floating psi(floating x) nogil:
result += log(x) - .5 * r
r = r * r
result -= r * ((1./12.) - r * ((1./120.) - r * (1./252.)))
return result;
return result