Skip to content

Rename cv_scores(_) back to grid_scores(_) to keep the name free #2299

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 28, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/datasets/mldata_fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,4 @@ def setup_module():

def teardown_module():
uninstall_mldata_mock()
shutil.rmtree(custom_data_home)
shutil.rmtree(custom_data_home)
2 changes: 1 addition & 1 deletion examples/covariance/plot_sparse_cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
# plot the model selection metric
pl.figure(figsize=(4, 3))
pl.axes([.2, .15, .75, .7])
pl.plot(model.cv_alphas_, np.mean(model.cv_scores, axis=1), 'o-')
pl.plot(model.cv_alphas_, np.mean(model.grid_scores, axis=1), 'o-')
pl.axvline(model.alpha_, color='.5')
pl.title('Model selection')
pl.ylabel('Cross-validation score')
Expand Down
2 changes: 1 addition & 1 deletion examples/grid_search_digits.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
print()
print("Grid scores on development set:")
print()
for params, mean_score, scores in clf.cv_scores_:
for params, mean_score, scores in clf.grid_scores_:
print("%0.3f (+/-%0.03f) for %r"
% (mean_score, scores.std() / 2, params))
print()
Expand Down
2 changes: 1 addition & 1 deletion examples/plot_rfe_with_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@
pl.figure()
pl.xlabel("Number of features selected")
pl.ylabel("Cross validation score (nb of misclassifications)")
pl.plot(range(1, len(rfecv.cv_scores_) + 1), rfecv.cv_scores_)
pl.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
pl.show()
10 changes: 5 additions & 5 deletions examples/randomized_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@


# Utility function to report best scores
def report(cv_scores, n_top=3):
top_scores = sorted(cv_scores, key=itemgetter(1), reverse=True)[:n_top]
def report(grid_scores, n_top=3):
top_scores = sorted(grid_scores, key=itemgetter(1), reverse=True)[:n_top]
for i, score in enumerate(top_scores):
print("Model with rank: {0}".format(i + 1))
print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
Expand All @@ -67,7 +67,7 @@ def report(cv_scores, n_top=3):
random_search.fit(X, y)
print("RandomizedSearchCV took %.2f seconds for %d candidates"
" parameter settings." % ((time() - start), n_iter_search))
report(random_search.cv_scores_)
report(random_search.grid_scores_)

# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
Expand All @@ -82,5 +82,5 @@ def report(cv_scores, n_top=3):
grid_search.fit(X, y)

print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
% (time() - start, len(grid_search.cv_scores_)))
report(grid_search.cv_scores_)
% (time() - start, len(grid_search.grid_scores_)))
report(grid_search.grid_scores_)
4 changes: 2 additions & 2 deletions examples/svm/plot_rbf_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@
pl.axis('tight')

# plot the scores of the grid
# cv_scores_ contains parameter settings and scores
score_dict = grid.cv_scores_
# grid_scores_ contains parameter settings and scores
score_dict = grid.grid_scores_

# We extract just the scores
scores = [x[1] for x in score_dict]
Expand Down
2 changes: 1 addition & 1 deletion examples/svm/plot_svm_scale_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
cv=ShuffleSplit(n=n_samples, train_size=train_size,
n_iter=250, random_state=1))
grid.fit(X, y)
scores = [x[1] for x in grid.cv_scores_]
scores = [x[1] for x in grid.grid_scores_]

scales = [(1, 'No scaling'),
((n_samples * train_size), '1/n_samples'),
Expand Down
8 changes: 4 additions & 4 deletions sklearn/covariance/graph_lasso_.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ class GraphLassoCV(GraphLasso):
`cv_alphas_`: list of float
All penalization parameters explored.

`cv_scores`: 2D numpy.ndarray (n_alphas, n_folds)
`grid_scores`: 2D numpy.ndarray (n_alphas, n_folds)
Log-likelihood score on left-out data across folds.

See Also
Expand Down Expand Up @@ -551,14 +551,14 @@ def fit(self, X, y=None):
% (i + 1, n_refinements, time.time() - t0))

path = list(zip(*path))
cv_scores = list(path[1])
grid_scores = list(path[1])
alphas = list(path[0])
# Finally, compute the score with alpha = 0
alphas.append(0)
cv_scores.append(cross_val_score(EmpiricalCovariance(), X,
grid_scores.append(cross_val_score(EmpiricalCovariance(), X,
cv=cv, n_jobs=self.n_jobs,
verbose=inner_verbose))
self.cv_scores = np.array(cv_scores)
self.grid_scores = np.array(grid_scores)
best_alpha = alphas[best_index]
self.alpha_ = best_alpha
self.cv_alphas_ = alphas
Expand Down
6 changes: 3 additions & 3 deletions sklearn/feature_selection/rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,9 @@ class RFECV(RFE, MetaEstimatorMixin):
Selected (i.e., estimated best)
features are assigned rank 1.

`cv_scores_` : array of shape [n_subsets_of_features]
`grid_scores_` : array of shape [n_subsets_of_features]
The cross-validation scores such that
`cv_scores_[i]` corresponds to
`grid_scores_[i]` corresponds to
the CV score of the i-th subset of features.

`estimator_` : object
Expand Down Expand Up @@ -373,5 +373,5 @@ def fit(self, X, y):
self.estimator_.set_params(**self.estimator_params)
self.estimator_.fit(self.transform(X), y)

self.cv_scores_ = scores / n
self.grid_scores_ = scores / n
return self
2 changes: 1 addition & 1 deletion sklearn/feature_selection/tests/test_rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_rfecv():
rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=3)
rfecv.fit(X, y)
# non-regression test for missing worst feature:
assert_equal(len(rfecv.cv_scores_), X.shape[1])
assert_equal(len(rfecv.grid_scores_), X.shape[1])
assert_equal(len(rfecv.ranking_), X.shape[1])
X_r = rfecv.transform(X)

Expand Down
22 changes: 8 additions & 14 deletions sklearn/grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,8 +488,8 @@ def _fit(self, X, y, parameter_iterable):
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch)(
delayed(fit_grid_point)(
X, y, base_estimator, parameters, train, test, self.scorer_,
self.verbose, **self.fit_params)
X, y, base_estimator, parameters, train, test,
self.scorer_, self.verbose, **self.fit_params)
for parameters in parameter_iterable
for train, test in cv)

Expand All @@ -498,7 +498,7 @@ def _fit(self, X, y, parameter_iterable):
n_folds = len(cv)

scores = list()
cv_scores = list()
grid_scores = list()
for grid_start in range(0, n_fits, n_folds):
n_test_samples = 0
score = 0
Expand All @@ -516,16 +516,16 @@ def _fit(self, X, y, parameter_iterable):
score /= float(n_folds)
scores.append((score, parameters))
# TODO: shall we also store the test_fold_sizes?
cv_scores.append(_CVScoreTuple(
grid_scores.append(_CVScoreTuple(
parameters,
score,
np.array(all_scores)))
# Store the computed scores
self.cv_scores_ = cv_scores
self.grid_scores_ = grid_scores

# Find the best parameters by comparing on the mean validation score:
# note that `sorted` is deterministic in the way it breaks ties
best = sorted(cv_scores, key=lambda x: x.mean_validation_score,
best = sorted(grid_scores, key=lambda x: x.mean_validation_score,
reverse=True)[0]
self.best_params_ = best.parameters
self.best_score_ = best.mean_validation_score
Expand Down Expand Up @@ -630,7 +630,7 @@ class GridSearchCV(BaseSearchCV):

Attributes
----------
`cv_scores_` : list of named tuples
`grid_scores_` : list of named tuples
Contains scores for all parameter combinations in param_grid.
Each entry corresponds to one parameter setting.
Each named tuple has the attributes:
Expand Down Expand Up @@ -685,12 +685,6 @@ def __init__(self, estimator, param_grid, scoring=None, loss_func=None,
self.param_grid = param_grid
_check_param_grid(param_grid)

@property
def grid_scores_(self):
warnings.warn("grid_scores_ is deprecated and will be removed in 0.15."
" Use cv_scores_ instead.", DeprecationWarning)
return self.cv_scores_

def fit(self, X, y=None, **params):
"""Run fit with all sets of parameters.

Expand Down Expand Up @@ -789,7 +783,7 @@ class RandomizedSearchCV(BaseSearchCV):

Attributes
----------
`cv_scores_` : list of named tuples
`grid_scores_` : list of named tuples
Contains scores for all parameter combinations in param_grid.
Each entry corresponds to one parameter setting.
Each named tuple has the attributes:
Expand Down
36 changes: 18 additions & 18 deletions sklearn/tests/test_grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def test_grid_search():
assert_equal(grid_search.best_estimator_.foo_param, 2)

for i, foo_i in enumerate([1, 2, 3]):
assert_true(grid_search.cv_scores_[i][0]
assert_true(grid_search.grid_scores_[i][0]
== {'foo_param': foo_i})
# Smoke test the score etc:
grid_search.score(X, y)
Expand Down Expand Up @@ -194,19 +194,19 @@ def test_grid_search_no_score():
GridSearchCV, clf_no_score, {'C': Cs})


def test_trivial_cv_scores():
def test_trivial_grid_scores():
"""Test search over a "grid" with only one point.

Non-regression test: cv_scores_ wouldn't be set by GridSearchCV.
Non-regression test: grid_scores_ wouldn't be set by GridSearchCV.
"""
clf = MockClassifier()
grid_search = GridSearchCV(clf, {'foo_param': [1]})
grid_search.fit(X, y)
assert_true(hasattr(grid_search, "cv_scores_"))
assert_true(hasattr(grid_search, "grid_scores_"))

random_search = RandomizedSearchCV(clf, {'foo_param': [0]})
random_search.fit(X, y)
assert_true(hasattr(random_search, "cv_scores_"))
assert_true(hasattr(random_search, "grid_scores_"))


def test_no_refit():
Expand Down Expand Up @@ -245,7 +245,7 @@ def test_grid_search_iid():
# once with iid=True (default)
grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv)
grid_search.fit(X, y)
first = grid_search.cv_scores_[0]
first = grid_search.grid_scores_[0]
assert_equal(first.parameters['C'], 1)
assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
# for first split, 1/4 of dataset is in test, for second 3/4.
Expand All @@ -257,7 +257,7 @@ def test_grid_search_iid():
grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv,
iid=False)
grid_search.fit(X, y)
first = grid_search.cv_scores_[0]
first = grid_search.grid_scores_[0]
assert_equal(first.parameters['C'], 1)
# scores are the same as above
assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.])
Expand Down Expand Up @@ -471,7 +471,7 @@ def test_X_as_list():
cv = KFold(n=len(X), n_folds=3)
grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
grid_search.fit(X.tolist(), y).score(X, y)
assert_true(hasattr(grid_search, "cv_scores_"))
assert_true(hasattr(grid_search, "grid_scores_"))


def test_unsupervised_grid_search():
Expand Down Expand Up @@ -511,7 +511,7 @@ def test_param_sampler():
assert_true(0 <= sample["C"] <= 1)


def test_randomized_search_cv_scores():
def test_randomized_search_grid_scores():
# Make a dataset with a lot of noise to get various kind of prediction
# errors across CV folds and parameter settings
X, y = make_classification(n_samples=200, n_features=100, n_informative=3,
Expand All @@ -527,10 +527,10 @@ def test_randomized_search_cv_scores():
search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_cv_iter,
param_distributions=params, iid=False)
search.fit(X, y)
assert_equal(len(search.cv_scores_), n_search_iter)
assert_equal(len(search.grid_scores_), n_search_iter)

# Check consistency of the structure of each cv_score item
for cv_score in search.cv_scores_:
for cv_score in search.grid_scores_:
assert_equal(len(cv_score.cv_validation_scores), n_cv_iter)
# Because we set iid to False, the mean_validation score is the
# mean of the fold mean scores instead of the aggregate sample-wise
Expand All @@ -541,12 +541,12 @@ def test_randomized_search_cv_scores():
list(sorted(params.keys())))

# Check the consistency with the best_score_ and best_params_ attributes
sorted_cv_scores = list(sorted(search.cv_scores_,
sorted_grid_scores = list(sorted(search.grid_scores_,
key=lambda x: x.mean_validation_score))
best_score = sorted_cv_scores[-1].mean_validation_score
best_score = sorted_grid_scores[-1].mean_validation_score
assert_equal(search.best_score_, best_score)

tied_best_params = [s.parameters for s in sorted_cv_scores
tied_best_params = [s.parameters for s in sorted_grid_scores
if s.mean_validation_score == best_score]
assert_true(search.best_params_ in tied_best_params,
"best_params_={0} is not part of the"
Expand All @@ -563,7 +563,7 @@ def test_grid_search_score_consistency():
grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score)
grid_search.fit(X, y)
cv = StratifiedKFold(n_folds=3, y=y)
for C, scores in zip(Cs, grid_search.cv_scores_):
for C, scores in zip(Cs, grid_search.grid_scores_):
clf.set_params(C=C)
scores = scores[2] # get the separate runs from grid scores
i = 0
Expand Down Expand Up @@ -607,7 +607,7 @@ def test_grid_search_with_multioutput_data():
for est in estimators:
grid_search = GridSearchCV(est, est_parameters, cv=cv)
grid_search.fit(X, y)
for parameters, _, cv_validation_scores in grid_search.cv_scores_:
for parameters, _, cv_validation_scores in grid_search.grid_scores_:
est.set_params(**parameters)

for i, (train, test) in enumerate(cv):
Expand All @@ -620,7 +620,7 @@ def test_grid_search_with_multioutput_data():
for est in estimators:
random_search = RandomizedSearchCV(est, est_parameters, cv=cv)
random_search.fit(X, y)
for parameters, _, cv_validation_scores in random_search.cv_scores_:
for parameters, _, cv_validation_scores in random_search.grid_scores_:
est.set_params(**parameters)

for i, (train, test) in enumerate(cv):
Expand All @@ -633,7 +633,7 @@ def test_grid_search_with_multioutput_data():
for est in estimators:
random_search = RandomizedSearchCV(est, est_parameters, cv=cv)
random_search.fit(X, y)
for parameters, _, cv_validation_scores in random_search.cv_scores_:
for parameters, _, cv_validation_scores in random_search.grid_scores_:
est.set_params(**parameters)

for i, (train, test) in enumerate(cv):
Expand Down