Skip to content

[WIP] Estimator.iter_fits for efficient parameter search #2000

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from scipy import sparse
from .externals import six

from .utils.iter_fits import group_params


###############################################################################
def clone(estimator, safe=True):
Expand Down Expand Up @@ -260,6 +262,21 @@ def __str__(self):
_pprint(self.get_params(deep=True),
offset=len(class_name), printer=str,),)

def iter_fits(self, param_iter, *args, **kwargs):
if not hasattr(self, '_non_fit_params'):
for params in param_iter:
self.set_params(**params)
yield params, self.fit(*args, **kwargs)
return
non_fit_params = set(self._non_fit_params)
for group, entries in group_params(param_iter, lambda k: k not in non_fit_params):
entries = list(entries)
self.set_params(**entries[0])
self.fit(*args, **kwargs)
for params in entries:
self.set_params(**params)
yield params, self


###############################################################################
class ClassifierMixin(object):
Expand Down
6 changes: 6 additions & 0 deletions sklearn/feature_selection/univariate_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ class SelectPercentile(_ScoreFilter):
way.

"""
_non_fit_params = ['percentile']

def __init__(self, score_func=f_classif, percentile=10):
if not 0 <= percentile <= 100:
Expand Down Expand Up @@ -437,6 +438,7 @@ class SelectKBest(_ScoreFilter):
way.

"""
_non_fit_params = ['k']

def __init__(self, score_func=f_classif, k=10):
self.k = k
Expand Down Expand Up @@ -483,6 +485,7 @@ class SelectFpr(_PvalueFilter):
`pvalues_` : array-like, shape=(n_features,)
p-values of feature scores.
"""
_non_fit_params = ['alpha']

def __init__(self, score_func=f_classif, alpha=5e-2):
self.alpha = alpha
Expand Down Expand Up @@ -517,6 +520,7 @@ class SelectFdr(_PvalueFilter):
`pvalues_` : array-like, shape=(n_features,)
p-values of feature scores.
"""
_non_fit_params = ['alpha']

def __init__(self, score_func=f_classif, alpha=5e-2):
self.alpha = alpha
Expand Down Expand Up @@ -549,6 +553,7 @@ class SelectFwe(_PvalueFilter):
`pvalues_` : array-like, shape=(n_features,)
p-values of feature scores.
"""
_non_fit_params = ['alpha']

def __init__(self, score_func=f_classif, alpha=5e-2):
self.alpha = alpha
Expand Down Expand Up @@ -588,6 +593,7 @@ class GenericUnivariateSelect(_PvalueFilter):
`pvalues_` : array-like, shape=(n_features,)
p-values of feature scores.
"""
_non_fit_params = ['param', 'mode']

_selection_modes = {'percentile': SelectPercentile,
'k_best': SelectKBest,
Expand Down
51 changes: 29 additions & 22 deletions sklearn/grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def __len__(self):
return self.n_iter


def fit_grid_point(X, y, base_clf, clf_params, train, test, scorer,
def fit_grid_points(X, y, base_clf, param_iter, train, test, scorer,
verbose, loss_func=None, **fit_params):
"""Run fit on one set of parameters.

Expand Down Expand Up @@ -245,7 +245,6 @@ def fit_grid_point(X, y, base_clf, clf_params, train, test, scorer,

# update parameters of the classifier after a copy of its base structure
clf = clone(base_clf)
clf.set_params(**clf_params)

if hasattr(base_clf, 'kernel') and callable(base_clf.kernel):
# cannot compute the kernel values with custom function
Expand All @@ -272,31 +271,34 @@ def fit_grid_point(X, y, base_clf, clf_params, train, test, scorer,
if y is not None:
y_test = y[safe_mask(y, test)]
y_train = y[safe_mask(y, train)]
clf.fit(X_train, y_train, **fit_params)

if scorer is not None:
this_score = scorer(clf, X_test, y_test)
else:
this_score = clf.score(X_test, y_test)
train_args = (X_train, y_train)
test_args = (X_test, y_test)
else:
clf.fit(X_train, **fit_params)
train_args = (X_train, )
test_args = (X_test, )

results = []
for params, est in clf.iter_fits(param_iter, *train_args, **fit_params):
if scorer is not None:
this_score = scorer(clf, X_test)
this_score = scorer(clf, *test_args)
else:
this_score = clf.score(X_test)
this_score = clf.score(*test_args)

if not isinstance(this_score, numbers.Number):
raise ValueError("scoring must return a number, got %s (%s)"
" instead." % (str(this_score), type(this_score)))
if not isinstance(this_score, numbers.Number):
raise ValueError("scoring must return a number, got %s (%s)"
" instead." % (str(this_score), type(this_score)))

if verbose > 2:
msg += ", score=%f" % this_score

results.append((params, this_score))

if verbose > 2:
msg += ", score=%f" % this_score
if verbose > 1:
end_msg = "%s -%s" % (msg,
logger.short_format_time(time.time() -
start_time))
print("[GridSearchCV] %s %s" % ((64 - len(end_msg)) * '.', end_msg))
return this_score, clf_params, _num_samples(X_test)
return results, _num_samples(X_test)


def _check_param_grid(param_grid):
Expand Down Expand Up @@ -450,13 +452,18 @@ def _fit(self, X, y, parameter_iterator, **params):
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch)(
delayed(fit_grid_point)(
X, y, base_clf, clf_params, train, test, scorer,
self.verbose, **self.fit_params) for clf_params in
parameter_iterator for train, test in cv)
delayed(fit_grid_points)(
X, y, base_clf, parameter_iterator, train, test, scorer,
self.verbose, **self.fit_params) for train, test in cv)

# Out is a list of triplet: score, estimator, n_test_samples
# transform out to a list of triplet: score, params, n_test_samples:
n_param_points = len(list(parameter_iterator))
new_out = []
for i in range(n_param_points):
# XXX: assumes parameters iterated in same order!!!
new_out.extend((fold_out[0][i][1], fold_out[0][i][0], fold_out[1]) for fold_out in out)
out = new_out

n_fits = len(out)
n_folds = n_fits // n_param_points

Expand Down
44 changes: 44 additions & 0 deletions sklearn/linear_model/coordinate_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from ..externals import six
from ..externals.six.moves import xrange
from ..utils.extmath import safe_sparse_dot
from ..utils.iter_fits import group_params

from . import cd_fast

Expand Down Expand Up @@ -187,6 +188,49 @@ def fit(self, X, y, Xy=None, coef_init=None):
fit(X, y, Xy, coef_init)
return self

def _iter_fits(self, param_iter, X, y, Xy=None, coef_init=None):
# Expects param_iter where only alpha varies,
# and sorted by decreasing alpha
X = atleast2d_or_csc(X, dtype=np.float64, order='F',
copy=self.copy_X and self.fit_intercept)
if not sparse.isspmatrix(X):
X, y, X_mean, y_mean, X_std = center_data(X, y, self.fit_intercept,
self.normalize, copy=False)
if Xy is None:
Xy = safe_sparse_dot(X.T, y, dense_output=True)

n_samples, n_features = X.shape
precompute = self.precompute
if (hasattr(precompute, '__array__')
and not np.allclose(X_mean, np.zeros(n_features))
and not np.allclose(X_std, np.ones(n_features))):
# recompute Gram
precompute = 'auto'
Xy = None
if precompute == 'auto':
precompute = (n_samples > n_features)
if not sparse.isspmatrix(X):
precompute = np.dot(X.T, X)
self.precompute = precompute

self.coef_ = coef_init # XXX: even if warm_start = True?
for params in param_iter:
self.set_params(**params)
yield params, self.fit(X, y, Xy, self.coef_)

def iter_fits(self, param_iter, X, y, Xy=None, coef_init=None):
if self.precompute is not None:
if sparse.isspmatrix(X):
warnings.warn("precompute is ignored for sparse data")

orig_params = self.get_params()
for group, entries in group_params(param_iter, lambda k: k != 'alpha'):
self.set_params(**orig_params)
self.set_params(**group)
entries = sorted(entries, key=lambda x: -x.get('alpha', self.alpha))
for tup in self._iter_fits(entries, X, y, Xy, coef_init):
yield tup

def _dense_fit(self, X, y, Xy=None, coef_init=None):

# copy was done in fit if necessary
Expand Down
31 changes: 31 additions & 0 deletions sklearn/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from .externals.joblib import Parallel, delayed
from .externals import six
from .utils import tosequence
from .utils.iter_fits import iter_fit_transforms, iter_fits, group_params, \
find_group
from .externals.six import iteritems

__all__ = ['Pipeline', 'FeatureUnion']
Expand Down Expand Up @@ -141,6 +143,35 @@ def fit_transform(self, X, y=None, **fit_params):
else:
return self.steps[-1][-1].fit(Xt, y, **fit_params).transform(Xt)

def iter_fits(self, param_iter, X, y=None, **fit_params):
for params, obj in self._iter_fits(param_iter, 0, False, X, y, **fit_params):
yield params, self

def iter_fit_transforms(self, param_iter, X, y=None, **fit_params):
return self._iter_fits(param_iter, 0, True, X, y, **fit_params)

def _iter_fits(self, param_iter, step, transform_last, X, y=None, **fit_params):
step_name, step_est = self.steps[step]

if step == len(self.steps) - 1:
step_param_iter = [{k.split('__', 1)[1]: v for k, v in iteritems(params)
if k.startswith(step_name + '__')}
for params in param_iter]
fn = iter_fit_transforms if transform_last else iter_fits
for params, obj in fn(step_est, step_param_iter, X, y, **fit_params):
yield {step_name + '__' + k: v for k, v in iteritems(params)}, obj
return

grouped = list(group_params(param_iter,
lambda k: k.startswith(step_name + '__')))
step_param_iter = [{k.split('__', 1)[1]: v for k, v in iteritems(group)} for group, entries in grouped]
for group, Xt in iter_fit_transforms(step_est, step_param_iter, X, y, **fit_params):
group = {step_name + '__' + k: v for k, v in iteritems(group)}
entries = find_group(grouped, group)
for params, obj in self._iter_fits(entries, step + 1, transform_last, Xt, y, **fit_params):
params.update(group)
yield params, obj

def predict(self, X):
"""Applies transforms to the data, and the predict method of the
final estimator. Valid only if the final estimator implements
Expand Down
100 changes: 100 additions & 0 deletions sklearn/utils/iter_fits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from collections import defaultdict
from six import iteritems


def _default_iter_fits(est, fit, param_iter, *args, **kwargs):
for params in param_iter:
est.set_params(**params)
yield params, fit(*args, **kwargs)


def iter_fits(est, param_iter, *args, **kwargs):
if hasattr(est, 'iter_fits'):
return est.iter_fits(param_iter, *args, **kwargs)
return _default_iter_fits(est, est.fit, param_iter, *args, **kwargs)


def _fit_transform(est):
if hasattr(est, 'fit_transform'):
return est.fit_transform
def fn(X, *args, **kwargs):
est.fit(X, *args, **kwargs)
return est.transform(X)
return fn


def iter_fit_transforms(est, param_iter, X, *args, **kwargs):
if hasattr(est, 'iter_fit_transforms'):
return est.iter_fit_transforms(param_iter, X, *args, **kwargs)
if hasattr(est, 'iter_fits'):
return ((params, new_est.transform(X))
for params, new_est
in est.iter_fits(param_iter, X, *args, **kwargs))
return _default_iter_fits(est, _fit_transform, param_iter, X, *args, **kwargs)


def group_params(items, key_name=lambda x: True):
"""bin by sub-dicts where values are compared by id if not hashable

>>> a = ('boo', 6)
>>> b = ('boo', 6)
>>> id(a) == id(b)
False
>>> import numpy
>>> c = numpy.array([1, 2, 3])
>>> items = [{'p': a, 'q': 1}, {'p': b, 'q': 2}, {'p': c, 'q': 3}, {'p': c, 'q': 4}]
>>> groups = list(group_params(items, lambda x: x == "p"))
>>> len(groups)
2
>>> groups
>>> sorted([[x['q'] for x in gitems] for g, gitems in groups if g['p'] == a][0])
[1, 2]
>>> sorted([[x['q'] for x in gitems] for g, gitems in groups if g['p'] is c][0])
[3, 4]
"""
# can reduce work if input is sorted tuples rather than dict

groups = defaultdict(list)
canonical = {} # maps hashable x to a canonical instance of x
id_to_obj = {}

for params in items:
group = []
for k, v in iteritems(params):
if key_name(k):
try:
v_id = id(canonical.setdefault(v, v))
except TypeError:
v_id = id(v)
id_to_obj[v_id] = v
group.append((k, v_id))
groups[tuple(sorted(group))].append(params)

return [({k: id_to_obj[v_id] for k, v_id in group}, group_items)
for group, group_items in iteritems(groups)]


def find_group(haystack, needle):
"""Given the output of group_params, gets the entries for a single group value.

Again designed to deal with non-hashable, non-comparable types..."""
for group, entries in haystack:
if len(group) != len(needle):
continue
for key, val in iteritems(group):
try:
other_val = needle[key]
except KeyError:
break
if val is other_val:
continue
try:
if val == other_val:
continue
except (ValueError, TypeError):
# can't convert numpy equality to boolean
continue
break
else:
return entries
raise ValueError('Could not find {!r} in given groups:\n{!r}'.format(needle, haystack))