@@ -329,22 +329,18 @@ def enet_coordinate_descent(
329
329
330
330
w_j = w[j] # Store previous value
331
331
332
- if w_j != 0.0 :
333
- # R += w_j * X[:,j]
334
- _axpy(n_samples, w_j, & X[0 , j], 1 , & R[0 ], 1 )
335
-
336
- # tmp = (X[:,j]*R).sum()
337
- tmp = _dot(n_samples, & X[0 , j], 1 , & R[0 ], 1 )
332
+ # tmp = X[:,j] @ (R + w_j * X[:,j])
333
+ tmp = _dot(n_samples, & X[0 , j], 1 , & R[0 ], 1 ) + w_j * norm2_cols_X[j]
338
334
339
335
if positive and tmp < 0 :
340
336
w[j] = 0.0
341
337
else :
342
338
w[j] = (fsign(tmp) * fmax(fabs(tmp) - alpha, 0 )
343
339
/ (norm2_cols_X[j] + beta))
344
340
345
- if w[j] != 0.0 :
346
- # R -= w[j] * X[:,j] # Update residual
347
- _axpy(n_samples, - w[j], & X[0 , j], 1 , & R[0 ], 1 )
341
+ if w[j] != w_j :
342
+ # R -= ( w[j] - w_j) * X[:,j] # Update residual
343
+ _axpy(n_samples, w_j - w[j], & X[0 , j], 1 , & R[0 ], 1 )
348
344
349
345
# update the maximum absolute coefficient update
350
346
d_w_j = fabs(w[j] - w_j)
@@ -450,7 +446,7 @@ def sparse_enet_coordinate_descent(
450
446
# We work with:
451
447
# yw = sample_weight * y
452
448
# R = sample_weight * residual
453
- # norm_cols_X = np.sum(sample_weight * (X - X_mean)**2, axis=0)
449
+ # norm2_cols_X = np.sum(sample_weight * (X - X_mean)**2, axis=0)
454
450
455
451
if floating is float :
456
452
dtype = np.float32
@@ -461,8 +457,8 @@ def sparse_enet_coordinate_descent(
461
457
cdef unsigned int n_samples = y.shape[0 ]
462
458
cdef unsigned int n_features = w.shape[0 ]
463
459
464
- # compute norms of the columns of X
465
- cdef floating[::1 ] norm_cols_X = np.zeros(n_features, dtype = dtype)
460
+ # compute squared norms of the columns of X
461
+ cdef floating[::1 ] norm2_cols_X = np.zeros(n_features, dtype = dtype)
466
462
467
463
# initial value of the residuals
468
464
# R = y - Zw, weighted version R = sample_weight * (y - Zw)
@@ -523,7 +519,7 @@ def sparse_enet_coordinate_descent(
523
519
for jj in range (startptr, endptr):
524
520
normalize_sum += (X_data[jj] - X_mean_ii) ** 2
525
521
R[X_indices[jj]] -= X_data[jj] * w_ii
526
- norm_cols_X [ii] = normalize_sum + \
522
+ norm2_cols_X [ii] = normalize_sum + \
527
523
(n_samples - endptr + startptr) * X_mean_ii ** 2
528
524
if center:
529
525
for jj in range (n_samples):
@@ -542,7 +538,7 @@ def sparse_enet_coordinate_descent(
542
538
normalize_sum += sample_weight[jj] * X_mean_ii ** 2
543
539
R[jj] += sample_weight[jj] * X_mean_ii * w_ii
544
540
R_sum += R[jj]
545
- norm_cols_X [ii] = normalize_sum
541
+ norm2_cols_X [ii] = normalize_sum
546
542
startptr = endptr
547
543
548
544
# Note: No need to update R_sum from here on because the update terms cancel
@@ -564,34 +560,19 @@ def sparse_enet_coordinate_descent(
564
560
else :
565
561
ii = f_iter
566
562
567
- if norm_cols_X [ii] == 0.0 :
563
+ if norm2_cols_X [ii] == 0.0 :
568
564
continue
569
565
570
566
startptr = X_indptr[ii]
571
567
endptr = X_indptr[ii + 1 ]
572
568
w_ii = w[ii] # Store previous value
573
569
X_mean_ii = X_mean[ii]
574
570
575
- if w_ii != 0.0 :
576
- # R += w_ii * X[:,ii]
577
- if no_sample_weights:
578
- for jj in range (startptr, endptr):
579
- R[X_indices[jj]] += X_data[jj] * w_ii
580
- if center:
581
- for jj in range (n_samples):
582
- R[jj] -= X_mean_ii * w_ii
583
- else :
584
- for jj in range (startptr, endptr):
585
- tmp = sample_weight[X_indices[jj]]
586
- R[X_indices[jj]] += tmp * X_data[jj] * w_ii
587
- if center:
588
- for jj in range (n_samples):
589
- R[jj] -= sample_weight[jj] * X_mean_ii * w_ii
590
-
591
- # tmp = (X[:,ii] * R).sum()
571
+ # tmp = X[:,ii] @ (R + w_ii * X[:,ii])
592
572
tmp = 0.0
593
573
for jj in range (startptr, endptr):
594
574
tmp += R[X_indices[jj]] * X_data[jj]
575
+ tmp += w_ii * norm2_cols_X[ii]
595
576
596
577
if center:
597
578
tmp -= R_sum * X_mean_ii
@@ -600,23 +581,23 @@ def sparse_enet_coordinate_descent(
600
581
w[ii] = 0.0
601
582
else :
602
583
w[ii] = fsign(tmp) * fmax(fabs(tmp) - alpha, 0 ) \
603
- / (norm_cols_X [ii] + beta)
584
+ / (norm2_cols_X [ii] + beta)
604
585
605
- if w[ii] != 0.0 :
606
- # R -= w[ii] * X[:,ii] # Update residual
586
+ if w[ii] != w_ii :
587
+ # R -= ( w[ii] - w_ii) * X[:,ii] # Update residual
607
588
if no_sample_weights:
608
589
for jj in range (startptr, endptr):
609
- R[X_indices[jj]] -= X_data[jj] * w[ii]
590
+ R[X_indices[jj]] -= X_data[jj] * ( w[ii] - w_ii)
610
591
if center:
611
592
for jj in range (n_samples):
612
- R[jj] += X_mean_ii * w[ii]
593
+ R[jj] += X_mean_ii * ( w[ii] - w_ii)
613
594
else :
614
595
for jj in range (startptr, endptr):
615
- tmp = sample_weight[ X_indices[jj] ]
616
- R[X_indices[jj]] -= tmp * X_data[jj] * w[ii]
596
+ kk = X_indices[jj]
597
+ R[kk] -= sample_weight[kk] * X_data[jj] * ( w[ii] - w_ii)
617
598
if center:
618
599
for jj in range (n_samples):
619
- R[jj] += sample_weight[jj] * X_mean_ii * w[ii]
600
+ R[jj] += sample_weight[jj] * X_mean_ii * ( w[ii] - w_ii)
620
601
621
602
# update the maximum absolute coefficient update
622
603
d_w_ii = fabs(w[ii] - w_ii)
@@ -744,10 +725,13 @@ def enet_coordinate_descent_gram(
744
725
cdef floating w_max
745
726
cdef floating d_w_ii
746
727
cdef floating q_dot_w
747
- cdef floating w_norm2
748
728
cdef floating gap = tol + 1.0
749
729
cdef floating d_w_tol = tol
750
730
cdef floating dual_norm_XtA
731
+ cdef floating R_norm2
732
+ cdef floating w_norm2
733
+ cdef floating A_norm2
734
+ cdef floating const_
751
735
cdef unsigned int ii
752
736
cdef unsigned int n_iter = 0
753
737
cdef unsigned int f_iter
@@ -786,7 +770,7 @@ def enet_coordinate_descent_gram(
786
770
w[ii] = fsign(tmp) * fmax(fabs(tmp) - alpha, 0 ) \
787
771
/ (Q[ii, ii] + beta)
788
772
789
- if w[ii] != 0.0 or w_ii ! = 0.0 :
773
+ if w[ii] != w_ii:
790
774
# Qw += (w[ii] - w_ii) * Q[ii] # Update Qw = Q @ w
791
775
_axpy(n_features, w[ii] - w_ii, & Q[ii, 0 ], 1 ,
792
776
& Qw[0 ], 1 )
@@ -899,6 +883,12 @@ def enet_coordinate_descent_multi_task(
899
883
cdef unsigned int n_features = X.shape[1 ]
900
884
cdef unsigned int n_tasks = Y.shape[1 ]
901
885
886
+ # compute squared norms of the columns of X
887
+ # same as norm2_cols_X = np.square(X).sum(axis=0)
888
+ cdef floating[::1 ] norm2_cols_X = np.einsum(
889
+ " ij,ij->j" , X, X, dtype = dtype, order = " C"
890
+ )
891
+
902
892
# to store XtA
903
893
cdef floating[:, ::1 ] XtA = np.zeros((n_features, n_tasks), dtype = dtype)
904
894
cdef floating XtA_axis1norm
@@ -907,7 +897,6 @@ def enet_coordinate_descent_multi_task(
907
897
# initial value of the residuals
908
898
cdef floating[::1 , :] R = np.zeros((n_samples, n_tasks), dtype = dtype, order = ' F' )
909
899
910
- cdef floating[::1 ] norm_cols_X = np.zeros(n_features, dtype = dtype)
911
900
cdef floating[::1 ] tmp = np.zeros(n_tasks, dtype = dtype)
912
901
cdef floating[::1 ] w_ii = np.zeros(n_tasks, dtype = dtype)
913
902
cdef floating d_w_max
@@ -917,8 +906,8 @@ def enet_coordinate_descent_multi_task(
917
906
cdef floating W_ii_abs_max
918
907
cdef floating gap = tol + 1.0
919
908
cdef floating d_w_tol = tol
920
- cdef floating R_norm
921
- cdef floating w_norm
909
+ cdef floating R_norm2
910
+ cdef floating w_norm2
922
911
cdef floating ry_sum
923
912
cdef floating l21_norm
924
913
cdef unsigned int ii
@@ -928,30 +917,23 @@ def enet_coordinate_descent_multi_task(
928
917
cdef uint32_t rand_r_state_seed = rng.randint(0 , RAND_R_MAX)
929
918
cdef uint32_t* rand_r_state = & rand_r_state_seed
930
919
931
- cdef const floating* X_ptr = & X[0 , 0 ]
932
- cdef const floating* Y_ptr = & Y[0 , 0 ]
933
-
934
920
if l1_reg == 0 :
935
921
warnings.warn(
936
922
" Coordinate descent with l1_reg=0 may lead to unexpected"
937
923
" results and is discouraged."
938
924
)
939
925
940
926
with nogil:
941
- # norm_cols_X = (np.asarray(X) ** 2).sum(axis=0)
942
- for ii in range (n_features):
943
- norm_cols_X[ii] = _nrm2(n_samples, X_ptr + ii * n_samples, 1 ) ** 2
944
-
945
927
# R = Y - np.dot(X, W.T)
946
- _copy(n_samples * n_tasks, Y_ptr , 1 , & R[0 , 0 ], 1 )
928
+ _copy(n_samples * n_tasks, & Y[ 0 , 0 ] , 1 , & R[0 , 0 ], 1 )
947
929
for ii in range (n_features):
948
930
for jj in range (n_tasks):
949
931
if W[jj, ii] != 0 :
950
- _axpy(n_samples, - W[jj, ii], X_ptr + ii * n_samples , 1 ,
932
+ _axpy(n_samples, - W[jj, ii], & X[ 0 , ii] , 1 ,
951
933
& R[0 , jj], 1 )
952
934
953
935
# tol = tol * linalg.norm(Y, ord='fro') ** 2
954
- tol = tol * _nrm2(n_samples * n_tasks, Y_ptr , 1 ) ** 2
936
+ tol = tol * _nrm2(n_samples * n_tasks, & Y[ 0 , 0 ] , 1 ) ** 2
955
937
956
938
for n_iter in range (max_iter):
957
939
w_max = 0.0
@@ -962,54 +944,47 @@ def enet_coordinate_descent_multi_task(
962
944
else :
963
945
ii = f_iter
964
946
965
- if norm_cols_X [ii] == 0.0 :
947
+ if norm2_cols_X [ii] == 0.0 :
966
948
continue
967
949
968
950
# w_ii = W[:, ii] # Store previous value
969
951
_copy(n_tasks, & W[0 , ii], 1 , & w_ii[0 ], 1 )
970
952
971
- # Using Numpy:
972
- # R += np.dot(X[:, ii][:, None], w_ii[None, :]) # rank 1 update
973
- # Using Blas Level2:
974
- # _ger(RowMajor, n_samples, n_tasks, 1.0,
975
- # &X[0, ii], 1,
976
- # &w_ii[0], 1, &R[0, 0], n_tasks)
977
- # Using Blas Level1 and for loop to avoid slower threads
978
- # for such small vectors
979
- for jj in range (n_tasks):
980
- if w_ii[jj] != 0 :
981
- _axpy(n_samples, w_ii[jj], X_ptr + ii * n_samples, 1 ,
982
- & R[0 , jj], 1 )
983
-
984
- # Using numpy:
985
- # tmp = np.dot(X[:, ii][None, :], R).ravel()
986
- # Using BLAS Level 2:
987
- # _gemv(RowMajor, Trans, n_samples, n_tasks, 1.0, &R[0, 0],
988
- # n_tasks, &X[0, ii], 1, 0.0, &tmp[0], 1)
953
+ # tmp = X[:, ii] @ (R + w_ii * X[:,ii][:, None])
954
+ # first part: X[:, ii] @ R
955
+ # Using BLAS Level 2:
956
+ # _gemv(RowMajor, Trans, n_samples, n_tasks, 1.0, &R[0, 0],
957
+ # n_tasks, &X[0, ii], 1, 0.0, &tmp[0], 1)
958
+ # second part: (X[:, ii] @ X[:,ii]) * w_ii = norm2_cols * w_ii
959
+ # Using BLAS Level 1:
960
+ # _axpy(n_tasks, norm2_cols[ii], &w_ii[0], 1, &tmp[0], 1)
989
961
# Using BLAS Level 1 (faster for small vectors like here):
990
962
for jj in range (n_tasks):
991
- tmp[jj] = _dot(n_samples, X_ptr + ii * n_samples, 1 ,
992
- & R[0 , jj], 1 )
963
+ tmp[jj] = _dot(n_samples, & X[0 , ii], 1 , & R[0 , jj], 1 )
964
+ # As we have the loop already, we use it to replace the second BLAS
965
+ # Level 1, i.e., _axpy, too.
966
+ tmp[jj] += w_ii[jj] * norm2_cols_X[ii]
993
967
994
968
# nn = sqrt(np.sum(tmp ** 2))
995
969
nn = _nrm2(n_tasks, & tmp[0 ], 1 )
996
970
997
- # W[:, ii] = tmp * fmax(1. - l1_reg / nn, 0) / (norm_cols_X [ii] + l2_reg)
971
+ # W[:, ii] = tmp * fmax(1. - l1_reg / nn, 0) / (norm2_cols_X [ii] + l2_reg)
998
972
_copy(n_tasks, & tmp[0 ], 1 , & W[0 , ii], 1 )
999
- _scal(n_tasks, fmax(1. - l1_reg / nn, 0 ) / (norm_cols_X [ii] + l2_reg),
973
+ _scal(n_tasks, fmax(1. - l1_reg / nn, 0 ) / (norm2_cols_X [ii] + l2_reg),
1000
974
& W[0 , ii], 1 )
1001
975
976
+ # Update residual
1002
977
# Using numpy:
1003
- # R -= np.dot(X [:, ii][:, None], W [:, ii][None, :])
1004
- # Using BLAS Level 2:
1005
- # Update residual : rank 1 update
1006
- # _ger(RowMajor, n_samples, n_tasks, - 1.0,
1007
- # &X[0, ii], 1, &W[0, ii] , 1,
1008
- # &R[0, 0], n_tasks)
978
+ # R -= (W [:, ii] - w_ii) * X [:, ii][:, None]
979
+ # Using BLAS Level 1 and 2:
980
+ # _axpy(n_tasks, -1.0, &W[0, ii], 1, &w_ii[0], 1)
981
+ # _ger(RowMajor, n_samples, n_tasks, 1.0,
982
+ # &X[0, ii], 1, &w_ii , 1,
983
+ # &R[0, 0], n_tasks)
1009
984
# Using BLAS Level 1 (faster for small vectors like here):
1010
985
for jj in range (n_tasks):
1011
- if W[jj, ii] != 0 :
1012
- _axpy(n_samples, - W[jj, ii], X_ptr + ii * n_samples , 1 ,
986
+ if W[jj, ii] != w_ii[jj] :
987
+ _axpy(n_samples, w_ii[jj] - W[jj, ii], & X[ 0 , ii] , 1 ,
1013
988
& R[0 , jj], 1 )
1014
989
1015
990
# update the maximum absolute coefficient update
@@ -1031,7 +1006,7 @@ def enet_coordinate_descent_multi_task(
1031
1006
for ii in range (n_features):
1032
1007
for jj in range (n_tasks):
1033
1008
XtA[ii, jj] = _dot(
1034
- n_samples, X_ptr + ii * n_samples , 1 , & R[0 , jj], 1
1009
+ n_samples, & X[ 0 , ii] , 1 , & R[0 , jj], 1
1035
1010
) - l2_reg * W[jj, ii]
1036
1011
1037
1012
# dual_norm_XtA = np.max(np.sqrt(np.sum(XtA ** 2, axis=1)))
@@ -1042,18 +1017,17 @@ def enet_coordinate_descent_multi_task(
1042
1017
if XtA_axis1norm > dual_norm_XtA:
1043
1018
dual_norm_XtA = XtA_axis1norm
1044
1019
1045
- # TODO: use squared L2 norm directly
1046
- # R_norm = linalg.norm(R, ord='fro')
1047
- # w_norm = linalg.norm(W, ord='fro')
1048
- R_norm = _nrm2(n_samples * n_tasks, & R[0 , 0 ], 1 )
1049
- w_norm = _nrm2(n_features * n_tasks, & W[0 , 0 ], 1 )
1020
+ # R_norm2 = linalg.norm(R, ord='fro') ** 2
1021
+ # w_norm2 = linalg.norm(W, ord='fro') ** 2
1022
+ R_norm2 = _dot(n_samples * n_tasks, & R[0 , 0 ], 1 , & R[0 , 0 ], 1 )
1023
+ w_norm2 = _dot(n_features * n_tasks, & W[0 , 0 ], 1 , & W[0 , 0 ], 1 )
1050
1024
if (dual_norm_XtA > l1_reg):
1051
1025
const_ = l1_reg / dual_norm_XtA
1052
- A_norm = R_norm * const_
1053
- gap = 0.5 * (R_norm ** 2 + A_norm ** 2 )
1026
+ A_norm2 = R_norm2 * ( const_ ** 2 )
1027
+ gap = 0.5 * (R_norm2 + A_norm2 )
1054
1028
else :
1055
1029
const_ = 1.0
1056
- gap = R_norm ** 2
1030
+ gap = R_norm2
1057
1031
1058
1032
# ry_sum = np.sum(R * y)
1059
1033
ry_sum = _dot(n_samples * n_tasks, & R[0 , 0 ], 1 , & Y[0 , 0 ], 1 )
@@ -1066,7 +1040,7 @@ def enet_coordinate_descent_multi_task(
1066
1040
gap += (
1067
1041
l1_reg * l21_norm
1068
1042
- const_ * ry_sum
1069
- + 0.5 * l2_reg * (1 + const_ ** 2 ) * (w_norm ** 2 )
1043
+ + 0.5 * l2_reg * (1 + const_ ** 2 ) * w_norm2
1070
1044
)
1071
1045
1072
1046
if gap <= tol:
0 commit comments