ENH speedup enet_coordinate_descent_gram (#31880)

lorentzenchr · web-flow · commit 8525ba5d3c3b · 2025-08-08T10:51:17.000+05:00
diff --git a/doc/whats_new/upcoming_changes/sklearn.linear_model/31880.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.linear_model/31880.enhancement.rst
@@ -0,0 +1,7 @@
+- :class:`linear_model.ElasticNet`, :class:`linear_model.ElasticNetCV`,
+  :class:`linear_model.Lasso` and :class:`linear_model.LassoCV` with `precompute=True`
+  (or `precompute="auto"`` and `n_samples > n_features`) are faster to fit by
+  avoiding a BLAS level 1 (axpy) call in the inner most loop.
+  Same for functions :func:`linear_model.enet_path` and
+  :func:`linear_model.lasso_path`.
+  By :user:`Christian Lorentzen <lorentzenchr>`.
diff --git a/sklearn/linear_model/_cd_fast.pyx b/sklearn/linear_model/_cd_fast.pyx
@@ -337,7 +337,7 @@ def sparse_enet_coordinate_descent(
     cdef unsigned int n_features = w.shape[0]
 
     # compute norms of the columns of X
-    cdef floating[:] norm_cols_X = np.zeros(n_features, dtype=dtype)
+    cdef floating[::1] norm_cols_X = np.zeros(n_features, dtype=dtype)
 
     # initial value of the residuals
     # R = y - Zw, weighted version R = sample_weight * (y - Zw)
@@ -609,9 +609,10 @@ def enet_coordinate_descent_gram(
     cdef unsigned int n_features = Q.shape[0]
 
     # initial value "Q w" which will be kept of up to date in the iterations
-    cdef floating[:] H = np.dot(Q, w)
+    cdef floating[::1] Qw = np.dot(Q, w)
+    cdef floating[::1] XtA = np.zeros(n_features, dtype=dtype)
+    cdef floating y_norm2 = np.dot(y, y)
 
-    cdef floating[:] XtA = np.zeros(n_features, dtype=dtype)
     cdef floating tmp
     cdef floating w_ii
     cdef floating d_w_max
@@ -628,14 +629,6 @@ def enet_coordinate_descent_gram(
     cdef uint32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
     cdef uint32_t* rand_r_state = &rand_r_state_seed
 
-    cdef floating y_norm2 = np.dot(y, y)
-    cdef floating* w_ptr = &w[0]
-    cdef const floating* Q_ptr = &Q[0, 0]
-    cdef const floating* q_ptr = &q[0]
-    cdef floating* H_ptr = &H[0]
-    cdef floating* XtA_ptr = &XtA[0]
-    tol = tol * y_norm2
-
     if alpha == 0:
         warnings.warn(
             "Coordinate descent without L1 regularization may "
@@ -644,6 +637,7 @@ def enet_coordinate_descent_gram(
         )
 
     with nogil:
+        tol *= y_norm2
         for n_iter in range(max_iter):
             w_max = 0.0
             d_w_max = 0.0
@@ -658,23 +652,19 @@ def enet_coordinate_descent_gram(
 
                 w_ii = w[ii]  # Store previous value
 
-                if w_ii != 0.0:
-                    # H -= w_ii * Q[ii]
-                    _axpy(n_features, -w_ii, Q_ptr + ii * n_features, 1,
-                          H_ptr, 1)
-
-                tmp = q[ii] - H[ii]
+                # if Q = X.T @ X then tmp = X[:,ii] @ (y - X @ w + X[:, ii] * w_ii)
+                tmp = q[ii] - Qw[ii] + w_ii * Q[ii, ii]
 
                 if positive and tmp < 0:
                     w[ii] = 0.0
                 else:
                     w[ii] = fsign(tmp) * fmax(fabs(tmp) - alpha, 0) \
                         / (Q[ii, ii] + beta)
 
-                if w[ii] != 0.0:
-                    # H +=  w[ii] * Q[ii] # Update H = X.T X w
-                    _axpy(n_features, w[ii], Q_ptr + ii * n_features, 1,
-                          H_ptr, 1)
+                if w[ii] != 0.0 or w_ii != 0.0:
+                    # Qw +=  (w[ii] - w_ii) * Q[ii]  # Update Qw = Q @ w
+                    _axpy(n_features, w[ii] - w_ii, &Q[ii, 0], 1,
+                          &Qw[0], 1)
 
                 # update the maximum absolute coefficient update
                 d_w_ii = fabs(w[ii] - w_ii)
@@ -689,23 +679,21 @@ def enet_coordinate_descent_gram(
                 # the tolerance: check the duality gap as ultimate stopping
                 # criterion
 
-                # q_dot_w = np.dot(w, q)
-                q_dot_w = _dot(n_features, w_ptr, 1, q_ptr, 1)
+                # q_dot_w = w @ q
+                q_dot_w = _dot(n_features, &w[0], 1, &q[0], 1)
 
                 for ii in range(n_features):
-                    XtA[ii] = q[ii] - H[ii] - beta * w[ii]
+                    XtA[ii] = q[ii] - Qw[ii] - beta * w[ii]
                 if positive:
-                    dual_norm_XtA = max(n_features, XtA_ptr)
+                    dual_norm_XtA = max(n_features, &XtA[0])
                 else:
-                    dual_norm_XtA = abs_max(n_features, XtA_ptr)
+                    dual_norm_XtA = abs_max(n_features, &XtA[0])
 
-                # temp = np.sum(w * H)
-                tmp = 0.0
-                for ii in range(n_features):
-                    tmp += w[ii] * H[ii]
+                # temp = w @ Q @ w
+                tmp = _dot(n_features, &w[0], 1, &Qw[0], 1)
                 R_norm2 = y_norm2 + tmp - 2.0 * q_dot_w
 
-                # w_norm2 = np.dot(w, w)
+                # w_norm2 = w @ w
                 w_norm2 = _dot(n_features, &w[0], 1, &w[0], 1)
 
                 if (dual_norm_XtA > alpha):