@@ -319,7 +319,9 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
319
319
"""
320
320
score , n_samples_test , _ = _fit_and_score (estimator , X , y , scorer , train ,
321
321
test , verbose , parameters ,
322
- fit_params , error_score )
322
+ fit_params = fit_params ,
323
+ return_n_test_samples = True ,
324
+ error_score = error_score )
323
325
return score , parameters , n_samples_test
324
326
325
327
@@ -552,77 +554,61 @@ def _fit(self, X, y, groups, parameter_iterable):
552
554
pre_dispatch = pre_dispatch
553
555
)(delayed (_fit_and_score )(clone (base_estimator ), X , y , self .scorer_ ,
554
556
train , test , self .verbose , parameters ,
555
- self .fit_params ,
557
+ fit_params = self .fit_params ,
556
558
return_train_score = self .return_train_score ,
557
- return_parameters = True ,
559
+ return_n_test_samples = True ,
560
+ return_times = True , return_parameters = True ,
558
561
error_score = self .error_score )
559
562
for parameters in parameter_iterable
560
563
for train , test in cv .split (X , y , groups ))
561
564
562
565
# if one choose to see train score, "out" will contain train score info
563
566
if self .return_train_score :
564
- train_scores , test_scores , test_sample_counts , time , parameters = \
565
- zip (* out )
567
+ ( train_scores , test_scores , test_sample_counts ,
568
+ fit_time , score_time , parameters ) = zip (* out )
566
569
else :
567
- test_scores , test_sample_counts , time , parameters = zip (* out )
570
+ (test_scores , test_sample_counts ,
571
+ fit_time , score_time , parameters ) = zip (* out )
568
572
569
573
candidate_params = parameters [::n_splits ]
570
574
n_candidates = len (candidate_params )
571
575
572
- # if one choose to return train score, reshape the train_scores array
573
- if self .return_train_score :
574
- train_scores = np .array (train_scores ,
575
- dtype = np .float64 ).reshape (n_candidates ,
576
+ results = dict ()
577
+
578
+ def _store (key_name , array , weights = None , splits = False , rank = False ):
579
+ """A small helper to store the scores/times to the cv_results_"""
580
+ array = np .array (array , dtype = np .float64 ).reshape (n_candidates ,
576
581
n_splits )
577
- test_scores = np .array (test_scores ,
578
- dtype = np .float64 ).reshape (n_candidates ,
579
- n_splits )
582
+ if splits :
583
+ for split_i in range (n_splits ):
584
+ results ["split%d_%s"
585
+ % (split_i , key_name )] = array [:, split_i ]
586
+
587
+ array_means = np .average (array , axis = 1 , weights = weights )
588
+ results ['mean_%s' % key_name ] = array_means
589
+ # Weighted std is not directly available in numpy
590
+ array_stds = np .sqrt (np .average ((array -
591
+ array_means [:, np .newaxis ]) ** 2 ,
592
+ axis = 1 , weights = weights ))
593
+ results ['std_%s' % key_name ] = array_stds
594
+
595
+ if rank :
596
+ results ["rank_%s" % key_name ] = np .asarray (
597
+ rankdata (- array_means , method = 'min' ), dtype = np .int32 )
598
+
599
+ # Computed the (weighted) mean and std for test scores alone
580
600
# NOTE test_sample counts (weights) remain the same for all candidates
581
601
test_sample_counts = np .array (test_sample_counts [:n_splits ],
582
602
dtype = np .int )
583
603
584
- # Computed the (weighted) mean and std for test scores
585
- weights = test_sample_counts if self .iid else None
586
- test_means = np .average (test_scores , axis = 1 , weights = weights )
587
- test_stds = np .sqrt (
588
- np .average ((test_scores - test_means [:, np .newaxis ]) ** 2 , axis = 1 ,
589
- weights = weights ))
590
-
591
- time = np .array (time , dtype = np .float64 ).reshape (n_candidates , n_splits )
592
- time_means = np .average (time , axis = 1 )
593
- time_stds = np .sqrt (
594
- np .average ((time - time_means [:, np .newaxis ]) ** 2 ,
595
- axis = 1 ))
596
- if self .return_train_score :
597
- train_means = np .average (train_scores , axis = 1 )
598
- train_stds = np .sqrt (
599
- np .average ((train_scores - train_means [:, np .newaxis ]) ** 2 ,
600
- axis = 1 ))
601
-
602
- cv_results = dict ()
603
- for split_i in range (n_splits ):
604
- cv_results ["split%d_test_score" % split_i ] = test_scores [:,
605
- split_i ]
606
- cv_results ["mean_test_score" ] = means
607
- cv_results ["std_test_score" ] = stds
604
+ _store ('test_score' , test_scores , splits = True , rank = True ,
605
+ weights = test_sample_counts if self .iid else None )
606
+ _store ('train_score' , train_scores , splits = True )
607
+ _store ('fit_time' , fit_time )
608
+ _store ('score_time' , score_time )
608
609
609
- if self .return_train_score :
610
- for split_i in range (n_splits ):
611
- results ["train_split%d_score" % split_i ] = (
612
- train_scores [:, split_i ])
613
- results ["mean_train_score" ] = train_means
614
- results ["std_train_scores" ] = train_stds
615
- results ["rank_train_scores" ] = np .asarray (rankdata (- train_means ,
616
- method = 'min' ),
617
- dtype = np .int32 )
618
-
619
- results ["mean_test_time" ] = time_means
620
- results ["std_test_time" ] = time_stds
621
- ranks = np .asarray (rankdata (- test_means , method = 'min' ), dtype = np .int32 )
622
-
623
- best_index = np .flatnonzero (ranks == 1 )[0 ]
610
+ best_index = np .flatnonzero (results ["rank_test_score" ] == 1 )[0 ]
624
611
best_parameters = candidate_params [best_index ]
625
- cv_results ["rank_test_score" ] = ranks
626
612
627
613
# Use one np.MaskedArray and mask all the places where the param is not
628
614
# applicable for that candidate. Use defaultdict as each candidate may
@@ -636,12 +622,12 @@ def _fit(self, X, y, groups, parameter_iterable):
636
622
# Setting the value at an index also unmasks that index
637
623
param_results ["param_%s" % name ][cand_i ] = value
638
624
639
- cv_results .update (param_results )
625
+ results .update (param_results )
640
626
641
627
# Store a list of param dicts at the key 'params'
642
- cv_results ['params' ] = candidate_params
628
+ results ['params' ] = candidate_params
643
629
644
- self .cv_results_ = cv_results
630
+ self .cv_results_ = results
645
631
self .best_index_ = best_index
646
632
self .n_splits_ = n_splits
647
633
@@ -783,8 +769,8 @@ class GridSearchCV(BaseSearchCV):
783
769
FitFailedWarning is raised. This parameter does not affect the refit
784
770
step, which will always raise the error.
785
771
786
- return_train_score: boolean, default=True
787
- If ``'False'``, the results_ attribute will not include training
772
+ return_train_score : boolean, default=True
773
+ If ``'False'``, the ``cv_results_`` attribute will not include training
788
774
scores.
789
775
790
776
@@ -809,10 +795,12 @@ class GridSearchCV(BaseSearchCV):
809
795
scoring=..., verbose=...)
810
796
>>> sorted(clf.cv_results_.keys())
811
797
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
812
- ['mean_test_score', 'mean_test_time', 'mean_train_score',...
813
- 'param_C', 'param_kernel', 'params', 'rank_test_score',...
814
- 'split0_test_score', 'split1_test_score',...
815
- 'split2_test_score', 'std_test_score', 'std_test_time'...]
798
+ ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
799
+ 'mean_train_score', 'param_C', 'param_kernel', 'params',...
800
+ 'rank_test_score', 'split0_test_score',...
801
+ 'split0_train_score', 'split1_test_score', 'split1_train_score',...
802
+ 'split2_test_score', 'split2_train_score',...
803
+ 'std_fit_time', 'std_score_time', 'std_test_score', 'std_train_score'...]
816
804
817
805
Attributes
818
806
----------
@@ -843,25 +831,24 @@ class GridSearchCV(BaseSearchCV):
843
831
mask = [ True True False False]...),
844
832
'param_degree': masked_array(data = [2.0 3.0 -- --],
845
833
mask = [False False True True]...),
846
- 'split0_test_score' : [0.8, 0.7, 0.8, 0.9],
847
- 'split1_test_score' : [0.82, 0.5, 0.7, 0.78],
848
- 'mean_test_score' : [0.81, 0.60, 0.75, 0.82],
849
- 'std_test_score' : [0.02, 0.01, 0.03, 0.03],
850
- 'rank_test_score' : [2, 4, 3, 1],
851
- 'split0_train_score': [0.9, 0.8, 0.85, 1.]
852
- 'split1_train_score': [0.95, 0.7, 0.8, 0.8]
853
- 'mean_train_score' : [0.93, 0.75, 0.83, 0.9]
854
- 'std_train_score' : [0.02, 0.01, 0.03, 0.03],
855
- 'rank_train_score' : [2, 4, 3, 1],
856
- 'mean_test_time' : [0.00073, 0.00063, 0.00043, 0.00049]
857
- 'std_test_time' : [1.62e-4, 3.37e-5, 1.42e-5, 1.1e-5]
858
- 'params' : [{'kernel': 'poly', 'degree': 2}, ...],
834
+ 'split0_test_score' : [0.8, 0.7, 0.8, 0.9],
835
+ 'split1_test_score' : [0.82, 0.5, 0.7, 0.78],
836
+ 'mean_test_score' : [0.81, 0.60, 0.75, 0.82],
837
+ 'std_test_score' : [0.02, 0.01, 0.03, 0.03],
838
+ 'rank_test_score' : [2, 4, 3, 1],
839
+ 'split0_train_score' : [0.8, 0.9, 0.7],
840
+ 'split1_train_score' : [0.82, 0.5, 0.7],
841
+ 'mean_train_score' : [0.81, 0.7, 0.7],
842
+ 'std_train_score' : [0.03, 0.03, 0.04],
843
+ 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],
844
+ 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],
845
+ 'mean_score_time' : [0.007, 0.06, 0.04, 0.04],
846
+ 'std_score_time' : [0.001, 0.002, 0.003, 0.005],
847
+ 'params' : [{'kernel': 'poly', 'degree': 2}, ...],
859
848
}
860
849
861
850
NOTE that the key ``'params'`` is used to store a list of parameter
862
- settings dict for all the parameter candidates. Besides,
863
- ``'train_mean_score'``, ``'train_split*_score'``, ... will be present
864
- when ``return_train_score=True``.
851
+ settings dict for all the parameter candidates.
865
852
866
853
best_estimator_ : estimator
867
854
Estimator that was chosen by the search, i.e. estimator
@@ -920,7 +907,7 @@ class GridSearchCV(BaseSearchCV):
920
907
def __init__ (self , estimator , param_grid , scoring = None , fit_params = None ,
921
908
n_jobs = 1 , iid = True , refit = True , cv = None , verbose = 0 ,
922
909
pre_dispatch = '2*n_jobs' , error_score = 'raise' ,
923
- return_train_score = False ):
910
+ return_train_score = True ):
924
911
super (GridSearchCV , self ).__init__ (
925
912
estimator = estimator , scoring = scoring , fit_params = fit_params ,
926
913
n_jobs = n_jobs , iid = iid , refit = refit , cv = cv , verbose = verbose ,
@@ -1059,8 +1046,8 @@ class RandomizedSearchCV(BaseSearchCV):
1059
1046
FitFailedWarning is raised. This parameter does not affect the refit
1060
1047
step, which will always raise the error.
1061
1048
1062
- return_train_score: boolean, default=True
1063
- If ``'False'``, the results_ attribute will not include training
1049
+ return_train_score : boolean, default=True
1050
+ If ``'False'``, the ``cv_results_`` attribute will not include training
1064
1051
scores.
1065
1052
1066
1053
Attributes
@@ -1095,19 +1082,16 @@ class RandomizedSearchCV(BaseSearchCV):
1095
1082
'split0_train_score' : [0.8, 0.9, 0.7],
1096
1083
'split1_train_score' : [0.82, 0.5, 0.7],
1097
1084
'mean_train_score' : [0.81, 0.7, 0.7],
1098
- 'std_train_score' : [0.00073, 0.00063, 0.00043]
1099
- 'rank_train_score' : [1.62e-4, 3.37e-5, 1.1e-5]
1100
- 'test_mean_time' : [0.00073, 0.00063, 0.00043]
1101
- 'test_std_time' : [1.62e-4, 3.37e-5, 1.1e-5]
1102
- 'test_std_score' : [0.02, 0.2, 0.],
1103
- 'test_rank_score' : [3, 1, 1],
1085
+ 'std_train_score' : [0.03, 0.03, 0.04],
1086
+ 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],
1087
+ 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],
1088
+ 'mean_score_time' : [0.007, 0.06, 0.04, 0.04],
1089
+ 'std_score_time' : [0.001, 0.002, 0.003, 0.005],
1104
1090
'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],
1105
1091
}
1106
1092
1107
1093
NOTE that the key ``'params'`` is used to store a list of parameter
1108
- settings dict for all the parameter candidates. Besides,
1109
- 'train_mean_score', 'train_split*_score', ... will be present when
1110
- return_train_score is set to True.
1094
+ settings dict for all the parameter candidates.
1111
1095
1112
1096
best_estimator_ : estimator
1113
1097
Estimator that was chosen by the search, i.e. estimator
@@ -1162,7 +1146,7 @@ class RandomizedSearchCV(BaseSearchCV):
1162
1146
def __init__ (self , estimator , param_distributions , n_iter = 10 , scoring = None ,
1163
1147
fit_params = None , n_jobs = 1 , iid = True , refit = True , cv = None ,
1164
1148
verbose = 0 , pre_dispatch = '2*n_jobs' , random_state = None ,
1165
- error_score = 'raise' , return_train_score = False ):
1149
+ error_score = 'raise' , return_train_score = True ):
1166
1150
self .param_distributions = param_distributions
1167
1151
self .n_iter = n_iter
1168
1152
self .random_state = random_state
0 commit comments