15
15
import numpy as np
16
16
from scipy import linalg
17
17
from scipy import sparse
18
+ from scipy import optimize
18
19
from scipy .sparse import linalg as sp_linalg
19
20
20
21
from ._base import LinearClassifierMixin , LinearModel
@@ -235,6 +236,64 @@ def _solve_svd(X, y, alpha):
235
236
return np .dot (Vt .T , d_UT_y ).T
236
237
237
238
239
+ def _solve_lbfgs (
240
+ X , y , alpha , positive = True , max_iter = None , tol = 1e-3 , X_offset = None , X_scale = None
241
+ ):
242
+ """Solve ridge regression with LBFGS.
243
+
244
+ The main purpose is fitting with forcing coefficients to be positive.
245
+ For unconstrained ridge regression, there are faster dedicated solver methods.
246
+ Note that with positive bounds on the coefficients, LBFGS seems faster
247
+ than scipy.optimize.lsq_linear.
248
+ """
249
+ n_samples , n_features = X .shape
250
+
251
+ options = {}
252
+ if max_iter is not None :
253
+ options ["maxiter" ] = max_iter
254
+ config = {
255
+ "method" : "L-BFGS-B" ,
256
+ "tol" : tol ,
257
+ "jac" : True ,
258
+ "options" : options ,
259
+ }
260
+ if positive :
261
+ config ["bounds" ] = [(0 , np .inf )] * n_features
262
+
263
+ if X_offset is not None and X_scale is not None :
264
+ X_offset_scale = X_offset / X_scale
265
+ else :
266
+ X_offset_scale = None
267
+
268
+ coefs = np .empty ((y .shape [1 ], n_features ), dtype = X .dtype )
269
+
270
+ for i in range (y .shape [1 ]):
271
+ x0 = np .zeros ((n_features ,))
272
+ y_column = y [:, i ]
273
+
274
+ def func (w ):
275
+ residual = X .dot (w ) - y_column
276
+ if X_offset_scale is not None :
277
+ residual -= w .dot (X_offset_scale )
278
+ f = 0.5 * residual .dot (residual ) + 0.5 * alpha [i ] * w .dot (w )
279
+ grad = X .T @ residual + alpha [i ] * w
280
+ if X_offset_scale is not None :
281
+ grad -= X_offset_scale * np .sum (residual )
282
+
283
+ return f , grad
284
+
285
+ result = optimize .minimize (func , x0 , ** config )
286
+ if not result ["success" ]:
287
+ warnings .warn (
288
+ "The lbfgs solver did not converge. Try increasing max_iter "
289
+ f"or tol. Currently: max_iter={ max_iter } and tol={ tol } " ,
290
+ ConvergenceWarning ,
291
+ )
292
+ coefs [i ] = result ["x" ]
293
+
294
+ return coefs
295
+
296
+
238
297
def _get_valid_accept_sparse (is_X_sparse , solver ):
239
298
if is_X_sparse and solver in ["auto" , "sag" , "saga" ]:
240
299
return "csr"
@@ -252,6 +311,7 @@ def ridge_regression(
252
311
max_iter = None ,
253
312
tol = 1e-3 ,
254
313
verbose = 0 ,
314
+ positive = False ,
255
315
random_state = None ,
256
316
return_n_iter = False ,
257
317
return_intercept = False ,
@@ -287,8 +347,8 @@ def ridge_regression(
287
347
288
348
.. versionadded:: 0.17
289
349
290
- solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, \
291
- default='auto'
350
+ solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', \
351
+ 'sag', 'saga', 'lbfgs'}, default='auto'
292
352
Solver to use in the computational routines:
293
353
294
354
- 'auto' chooses the solver automatically based on the type of data.
@@ -317,10 +377,13 @@ def ridge_regression(
317
377
approximately the same scale. You can preprocess the data with a
318
378
scaler from sklearn.preprocessing.
319
379
380
+ - 'lbfgs' uses L-BFGS-B algorithm implemented in
381
+ `scipy.optimize.minimize`. It can be used only when `positive`
382
+ is True.
320
383
321
- All last five solvers support both dense and sparse data. However, only
322
- 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is
323
- True.
384
+ All last six solvers support both dense and sparse data. However, only
385
+ 'sag', 'sparse_cg', and 'lbfgs' support sparse input when `fit_intercept`
386
+ is True.
324
387
325
388
.. versionadded:: 0.17
326
389
Stochastic Average Gradient descent solver.
@@ -331,7 +394,7 @@ def ridge_regression(
331
394
Maximum number of iterations for conjugate gradient solver.
332
395
For the 'sparse_cg' and 'lsqr' solvers, the default value is determined
333
396
by scipy.sparse.linalg. For 'sag' and saga solver, the default value is
334
- 1000.
397
+ 1000. For 'lbfgs' solver, the default value is 15000.
335
398
336
399
tol : float, default=1e-3
337
400
Precision of the solution.
@@ -340,6 +403,10 @@ def ridge_regression(
340
403
Verbosity level. Setting verbose > 0 will display additional
341
404
information depending on the solver used.
342
405
406
+ positive : bool, default=False
407
+ When set to ``True``, forces the coefficients to be positive.
408
+ Only 'lbfgs' solver is supported in this case.
409
+
343
410
random_state : int, RandomState instance, default=None
344
411
Used when ``solver`` == 'sag' or 'saga' to shuffle the data.
345
412
See :term:`Glossary <random_state>` for details.
@@ -389,6 +456,7 @@ def ridge_regression(
389
456
max_iter = max_iter ,
390
457
tol = tol ,
391
458
verbose = verbose ,
459
+ positive = positive ,
392
460
random_state = random_state ,
393
461
return_n_iter = return_n_iter ,
394
462
return_intercept = return_intercept ,
@@ -407,6 +475,7 @@ def _ridge_regression(
407
475
max_iter = None ,
408
476
tol = 1e-3 ,
409
477
verbose = 0 ,
478
+ positive = False ,
410
479
random_state = None ,
411
480
return_n_iter = False ,
412
481
return_intercept = False ,
@@ -418,18 +487,33 @@ def _ridge_regression(
418
487
has_sw = sample_weight is not None
419
488
420
489
if solver == "auto" :
421
- if return_intercept :
422
- # only sag supports fitting intercept directly
490
+ if positive :
491
+ solver = "lbfgs"
492
+ elif return_intercept :
493
+ # sag supports fitting intercept directly
423
494
solver = "sag"
424
495
elif not sparse .issparse (X ):
425
496
solver = "cholesky"
426
497
else :
427
498
solver = "sparse_cg"
428
499
429
- if solver not in ("sparse_cg" , "cholesky" , "svd" , "lsqr" , "sag" , "saga" ):
500
+ if solver not in ("sparse_cg" , "cholesky" , "svd" , "lsqr" , "sag" , "saga" , "lbfgs" ):
430
501
raise ValueError (
431
502
"Known solvers are 'sparse_cg', 'cholesky', 'svd'"
432
- " 'lsqr', 'sag' or 'saga'. Got %s." % solver
503
+ " 'lsqr', 'sag', 'saga' or 'lbfgs'. Got %s." % solver
504
+ )
505
+
506
+ if positive and solver != "lbfgs" :
507
+ raise ValueError (
508
+ "When positive=True, only 'lbfgs' solver can be used. "
509
+ f"Please change solver { solver } to 'lbfgs' "
510
+ "or set positive=False."
511
+ )
512
+
513
+ if solver == "lbfgs" and not positive :
514
+ raise ValueError (
515
+ "'lbfgs' solver can be used only when positive=True. "
516
+ "Please use another solver."
433
517
)
434
518
435
519
if return_intercept and solver != "sag" :
@@ -554,6 +638,18 @@ def _ridge_regression(
554
638
intercept = intercept [0 ]
555
639
coef = np .asarray (coef )
556
640
641
+ elif solver == "lbfgs" :
642
+ coef = _solve_lbfgs (
643
+ X ,
644
+ y ,
645
+ alpha ,
646
+ positive = positive ,
647
+ tol = tol ,
648
+ max_iter = max_iter ,
649
+ X_offset = X_offset ,
650
+ X_scale = X_scale ,
651
+ )
652
+
557
653
if solver == "svd" :
558
654
if sparse .issparse (X ):
559
655
raise TypeError ("SVD solver does not support sparse inputs currently" )
@@ -585,6 +681,7 @@ def __init__(
585
681
max_iter = None ,
586
682
tol = 1e-3 ,
587
683
solver = "auto" ,
684
+ positive = False ,
588
685
random_state = None ,
589
686
):
590
687
self .alpha = alpha
@@ -594,6 +691,7 @@ def __init__(
594
691
self .max_iter = max_iter
595
692
self .tol = tol
596
693
self .solver = solver
694
+ self .positive = positive
597
695
self .random_state = random_state
598
696
599
697
def fit (self , X , y , sample_weight = None ):
@@ -612,16 +710,31 @@ def fit(self, X, y, sample_weight=None):
612
710
multi_output = True ,
613
711
y_numeric = True ,
614
712
)
615
- if sparse .issparse (X ) and self .fit_intercept :
616
- if self .solver not in ["auto" , "sparse_cg" , "sag" ]:
713
+ if self .solver == "lbfgs" and not self .positive :
714
+ raise ValueError (
715
+ "'lbfgs' solver can be used only when positive=True. "
716
+ "Please use another solver."
717
+ )
718
+
719
+ if self .positive :
720
+ if self .solver not in ["auto" , "lbfgs" ]:
721
+ raise ValueError (
722
+ f"solver='{ self .solver } ' does not support positive fitting. Please"
723
+ " set the solver to 'auto' or 'lbfgs', or set `positive=False`"
724
+ )
725
+ else :
726
+ solver = self .solver
727
+ elif sparse .issparse (X ) and self .fit_intercept :
728
+ if self .solver not in ["auto" , "sparse_cg" , "sag" , "lbfgs" ]:
617
729
raise ValueError (
618
730
"solver='{}' does not support fitting the intercept "
619
731
"on sparse data. Please set the solver to 'auto' or "
620
- "'sparse_cg', 'sag', or set `fit_intercept=False`" .format (
621
- self .solver
622
- )
732
+ "'sparse_cg', 'sag', 'lbfgs' "
733
+ "or set `fit_intercept=False`" .format (self .solver )
623
734
)
624
- if self .solver == "sag" and self .max_iter is None and self .tol > 1e-4 :
735
+ if self .solver == "lbfgs" :
736
+ solver = "lbfgs"
737
+ elif self .solver == "sag" and self .max_iter is None and self .tol > 1e-4 :
625
738
warnings .warn (
626
739
'"sag" solver requires many iterations to fit '
627
740
"an intercept with sparse inputs. Either set the "
@@ -658,6 +771,7 @@ def fit(self, X, y, sample_weight=None):
658
771
max_iter = self .max_iter ,
659
772
tol = self .tol ,
660
773
solver = "sag" ,
774
+ positive = self .positive ,
661
775
random_state = self .random_state ,
662
776
return_n_iter = True ,
663
777
return_intercept = True ,
@@ -682,6 +796,7 @@ def fit(self, X, y, sample_weight=None):
682
796
max_iter = self .max_iter ,
683
797
tol = self .tol ,
684
798
solver = solver ,
799
+ positive = self .positive ,
685
800
random_state = self .random_state ,
686
801
return_n_iter = True ,
687
802
return_intercept = False ,
@@ -744,12 +859,13 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):
744
859
Maximum number of iterations for conjugate gradient solver.
745
860
For 'sparse_cg' and 'lsqr' solvers, the default value is determined
746
861
by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.
862
+ For 'lbfgs' solver, the default value is 15000.
747
863
748
864
tol : float, default=1e-3
749
865
Precision of the solution.
750
866
751
- solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, \
752
- default='auto'
867
+ solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', \
868
+ 'sag', 'saga', 'lbfgs'}, default='auto'
753
869
Solver to use in the computational routines:
754
870
755
871
- 'auto' chooses the solver automatically based on the type of data.
@@ -777,15 +893,23 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):
777
893
approximately the same scale. You can preprocess the data with a
778
894
scaler from sklearn.preprocessing.
779
895
780
- All last five solvers support both dense and sparse data. However, only
781
- 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is
782
- True.
896
+ - 'lbfgs' uses L-BFGS-B algorithm implemented in
897
+ `scipy.optimize.minimize`. It can be used only when `positive`
898
+ is True.
899
+
900
+ All last six solvers support both dense and sparse data. However, only
901
+ 'sag', 'sparse_cg', and 'lbfgs' support sparse input when `fit_intercept`
902
+ is True.
783
903
784
904
.. versionadded:: 0.17
785
905
Stochastic Average Gradient descent solver.
786
906
.. versionadded:: 0.19
787
907
SAGA solver.
788
908
909
+ positive : bool, default=False
910
+ When set to ``True``, forces the coefficients to be positive.
911
+ Only 'lbfgs' solver is supported in this case.
912
+
789
913
random_state : int, RandomState instance, default=None
790
914
Used when ``solver`` == 'sag' or 'saga' to shuffle the data.
791
915
See :term:`Glossary <random_state>` for details.
@@ -843,6 +967,7 @@ def __init__(
843
967
max_iter = None ,
844
968
tol = 1e-3 ,
845
969
solver = "auto" ,
970
+ positive = False ,
846
971
random_state = None ,
847
972
):
848
973
super ().__init__ (
@@ -853,6 +978,7 @@ def __init__(
853
978
max_iter = max_iter ,
854
979
tol = tol ,
855
980
solver = solver ,
981
+ positive = positive ,
856
982
random_state = random_state ,
857
983
)
858
984
@@ -932,8 +1058,8 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
932
1058
weights inversely proportional to class frequencies in the input data
933
1059
as ``n_samples / (n_classes * np.bincount(y))``.
934
1060
935
- solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, \
936
- default='auto'
1061
+ solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', \
1062
+ 'sag', 'saga', 'lbfgs'}, default='auto'
937
1063
Solver to use in the computational routines:
938
1064
939
1065
- 'auto' chooses the solver automatically based on the type of data.
@@ -966,6 +1092,14 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
966
1092
.. versionadded:: 0.19
967
1093
SAGA solver.
968
1094
1095
+ - 'lbfgs' uses L-BFGS-B algorithm implemented in
1096
+ `scipy.optimize.minimize`. It can be used only when `positive`
1097
+ is True.
1098
+
1099
+ positive : bool, default=False
1100
+ When set to ``True``, forces the coefficients to be positive.
1101
+ Only 'lbfgs' solver is supported in this case.
1102
+
969
1103
random_state : int, RandomState instance, default=None
970
1104
Used when ``solver`` == 'sag' or 'saga' to shuffle the data.
971
1105
See :term:`Glossary <random_state>` for details.
@@ -1025,6 +1159,7 @@ def __init__(
1025
1159
tol = 1e-3 ,
1026
1160
class_weight = None ,
1027
1161
solver = "auto" ,
1162
+ positive = False ,
1028
1163
random_state = None ,
1029
1164
):
1030
1165
super ().__init__ (
@@ -1035,6 +1170,7 @@ def __init__(
1035
1170
max_iter = max_iter ,
1036
1171
tol = tol ,
1037
1172
solver = solver ,
1173
+ positive = positive ,
1038
1174
random_state = random_state ,
1039
1175
)
1040
1176
self .class_weight = class_weight
0 commit comments