From 63d47dcbbe4a0e8226f0e338673bb98626a8a012 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 10 Feb 2018 22:43:41 +1100 Subject: [PATCH 1/2] COSMIT Avoid writing out vectorizable operations in sparsefuncs --- sklearn/utils/sparsefuncs_fast.pyx | 35 +++++++++++++----------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx index 52c12ce5d5953..2e44c97825327 100644 --- a/sklearn/utils/sparsefuncs_fast.pyx +++ b/sklearn/utils/sparsefuncs_fast.pyx @@ -297,27 +297,22 @@ def _incr_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data, # First pass if last_n == 0: return new_mean, new_var, new_n - # Next passes - else: - updated_n = last_n + new_n - last_over_new_n = last_n / new_n - - for i in xrange(n_features): - # Unnormalized old stats - last_mean[i] *= last_n - last_var[i] *= last_n - # Unnormalized new stats - new_mean[i] *= new_n - new_var[i] *= new_n - - # Update stats - updated_var[i] = (last_var[i] + new_var[i] + - last_over_new_n / updated_n * - (last_mean[i] / last_over_new_n - new_mean[i]) ** 2) - - updated_mean[i] = (last_mean[i] + new_mean[i]) / updated_n - updated_var[i] = updated_var[i] / updated_n + # Next passes + updated_n = last_n + new_n + last_over_new_n = last_n / new_n + + # Unnormalized stats + last_mean *= last_n + last_var *= last_n + new_mean *= new_n + new_var *= new_n + + # Update stats + updated_var = (last_var + new_var + last_over_new_n / updated_n * + (last_mean / last_over_new_n - new_mean) ** 2) + updated_mean = last_mean + new_mean / updated_n + updated_var /= updated_n return updated_mean, updated_var, updated_n From 45af0e5b33bf6ca14e336e7f7d71ce68cb0a64eb Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Mon, 12 Feb 2018 10:15:30 +1100 Subject: [PATCH 2/2] fix: insert missing parentheses --- sklearn/utils/sparsefuncs_fast.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx index 2e44c97825327..481f2137fab77 100644 --- a/sklearn/utils/sparsefuncs_fast.pyx +++ b/sklearn/utils/sparsefuncs_fast.pyx @@ -311,7 +311,7 @@ def _incr_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data, # Update stats updated_var = (last_var + new_var + last_over_new_n / updated_n * (last_mean / last_over_new_n - new_mean) ** 2) - updated_mean = last_mean + new_mean / updated_n + updated_mean = (last_mean + new_mean) / updated_n updated_var /= updated_n return updated_mean, updated_var, updated_n