From 8c654ed3cd30bc980fba44c3031dc0640999ea72 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Thu, 24 Nov 2022 11:30:16 +0500 Subject: [PATCH 1/4] MAINT Remove -Wcpp warnings when compiling sklearn.decomposition._online_lda_fast --- setup.py | 1 + sklearn/decomposition/_online_lda_fast.pyx | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index 57900edba08a4..6bdcce78506b2 100755 --- a/setup.py +++ b/setup.py @@ -77,6 +77,7 @@ "sklearn.cluster._k_means_minibatch", "sklearn.datasets._svmlight_format_fast", "sklearn.decomposition._cdnmf_fast", + "sklearn.decomposition._online_lda_fast", "sklearn.ensemble._hist_gradient_boosting._gradient_boosting", "sklearn.ensemble._hist_gradient_boosting.histogram", "sklearn.ensemble._hist_gradient_boosting.splitting", diff --git a/sklearn/decomposition/_online_lda_fast.pyx b/sklearn/decomposition/_online_lda_fast.pyx index 61644b67205f5..6c153c84f7d07 100644 --- a/sklearn/decomposition/_online_lda_fast.pyx +++ b/sklearn/decomposition/_online_lda_fast.pyx @@ -8,9 +8,11 @@ cnp.import_array() from libc.math cimport exp, fabs, log from numpy.math cimport EULER +# TODO: use const memory views for fused type "floating" with Cython 3.0, +# where applicable -def mean_change(cnp.ndarray[ndim=1, dtype=floating] arr_1, - cnp.ndarray[ndim=1, dtype=floating] arr_2): + +def mean_change(floating[:] arr_1, floating[:] arr_2): """Calculate the mean difference between two arrays. Equivalent to np.abs(arr_1 - arr2).mean(). @@ -28,9 +30,11 @@ def mean_change(cnp.ndarray[ndim=1, dtype=floating] arr_1, return total / size -def _dirichlet_expectation_1d(cnp.ndarray[ndim=1, dtype=floating] doc_topic, - floating doc_topic_prior, - cnp.ndarray[ndim=1, dtype=floating] out): +def _dirichlet_expectation_1d( + floating[:] doc_topic, + floating doc_topic_prior, + floating[:] out +): """Dirichlet expectation for a single sample: exp(E[log(theta)]) for theta ~ Dir(doc_topic) after adding doc_topic_prior to doc_topic, in-place. @@ -56,7 +60,7 @@ def _dirichlet_expectation_1d(cnp.ndarray[ndim=1, dtype=floating] doc_topic, out[i] = exp(psi(doc_topic[i]) - psi_total) -def _dirichlet_expectation_2d(cnp.ndarray[ndim=2, dtype=floating] arr): +def _dirichlet_expectation_2d(floating[:, :] arr): """Dirichlet expectation for multiple samples: E[log(theta)] for theta ~ Dir(arr). @@ -66,7 +70,7 @@ def _dirichlet_expectation_2d(cnp.ndarray[ndim=2, dtype=floating] arr): the exp and doesn't add in the prior. """ cdef floating row_total, psi_row_total - cdef cnp.ndarray[ndim=2, dtype=floating] d_exp + cdef floating[:, :] d_exp cdef cnp.npy_intp i, j, n_rows, n_cols n_rows = arr.shape[0] @@ -82,7 +86,7 @@ def _dirichlet_expectation_2d(cnp.ndarray[ndim=2, dtype=floating] arr): for j in range(n_cols): d_exp[i, j] = psi(arr[i, j]) - psi_row_total - return d_exp + return d_exp.base # Psi function for positive arguments. Optimized for speed, not accuracy. From 7d57fe0b3ebcbb01c8fc3c6ed17a9a118baa527d Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Thu, 19 Jan 2023 18:15:25 +0500 Subject: [PATCH 2/4] Use const with floating memory type --- sklearn/decomposition/_online_lda_fast.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/decomposition/_online_lda_fast.pyx b/sklearn/decomposition/_online_lda_fast.pyx index 6c153c84f7d07..9c0727761d000 100644 --- a/sklearn/decomposition/_online_lda_fast.pyx +++ b/sklearn/decomposition/_online_lda_fast.pyx @@ -12,7 +12,7 @@ from numpy.math cimport EULER # where applicable -def mean_change(floating[:] arr_1, floating[:] arr_2): +def mean_change(const floating[:] arr_1, const floating[:] arr_2): """Calculate the mean difference between two arrays. Equivalent to np.abs(arr_1 - arr2).mean(). @@ -60,7 +60,7 @@ def _dirichlet_expectation_1d( out[i] = exp(psi(doc_topic[i]) - psi_total) -def _dirichlet_expectation_2d(floating[:, :] arr): +def _dirichlet_expectation_2d(const floating[:, :] arr): """Dirichlet expectation for multiple samples: E[log(theta)] for theta ~ Dir(arr). @@ -111,4 +111,4 @@ cdef floating psi(floating x) nogil: result += log(x) - .5 * r r = r * r result -= r * ((1./12.) - r * ((1./120.) - r * (1./252.))) - return result; + return result From 20f4d90e0c1cb13e50d6fee6d8afbf167afe337d Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Thu, 19 Jan 2023 18:43:17 +0500 Subject: [PATCH 3/4] Remove TODO --- sklearn/decomposition/_online_lda_fast.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/decomposition/_online_lda_fast.pyx b/sklearn/decomposition/_online_lda_fast.pyx index 9c0727761d000..9fbfd48d42428 100644 --- a/sklearn/decomposition/_online_lda_fast.pyx +++ b/sklearn/decomposition/_online_lda_fast.pyx @@ -8,9 +8,6 @@ cnp.import_array() from libc.math cimport exp, fabs, log from numpy.math cimport EULER -# TODO: use const memory views for fused type "floating" with Cython 3.0, -# where applicable - def mean_change(const floating[:] arr_1, const floating[:] arr_2): """Calculate the mean difference between two arrays. From 949d3f49e3def58f3149d6a1ce2dacc96ae14acb Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 20 Jan 2023 11:53:10 +0500 Subject: [PATCH 4/4] Trigger CI