Skip to content

Nested Cross Validation for precomputed KNN #13920

Closed
@Jeanselme

Description

@Jeanselme

Description

A nested cross validation prediction using a knn with precomputed metric raised an error

Code to Reproduce

from sklearn import datasets
from sklearn.model_selection import cross_val_predict, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics.pairwise import euclidean_distances

# Open data
iris = datasets.load_iris()

# Compute pairwise metric
metric = euclidean_distances(iris.data)

# Create nested cross validation
knn = KNeighborsClassifier(metric = 'precomputed')
knngs = GridSearchCV(knn, param_grid={"n_neighbors": [1, 5, 10]})
predicted = cross_val_predict(knngs, metric, iris.target, cv=10)

Expected Results

Should return the predictions made by the model as the following code produces:

from sklearn import datasets
from sklearn.model_selection import cross_val_predict, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Open data
iris = datasets.load_iris()

# Create nested cross validation
knn = KNeighborsClassifier()
knngs = GridSearchCV(knn, param_grid={"n_neighbors": [1, 5, 10]})
predicted = cross_val_predict(knngs, iris.data, iris.target, cv=10)

Actual Results

ValueError                                Traceback (most recent call last)
<ipython-input-8-97c590e3aa1e> in <module>()
     10 
     11 knngs = GridSearchCV(knn, param_grid={"n_neighbors": [1, 5, 10]})
---> 12 predicted = cross_val_predict(knngs, metric, iris.target, cv=10)

/sklearn/model_selection/_validation.py in cross_val_predict(estimator, X, y, groups, cv, n_jobs, verbose, fit_params, pre_dispatch, method)
    775     prediction_blocks = parallel(delayed(_fit_and_predict)(
    776         clone(estimator), X, y, train, test, verbose, fit_params, method)
--> 777         for train, test in cv.split(X, y, groups))
    778 
    779     # Concatenate the predictions

/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None
    919 

/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 

/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):

/sklearn/externals/joblib/parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/sklearn/model_selection/_validation.py in _fit_and_predict(estimator, X, y, train, test, verbose, fit_params, method)
    848         estimator.fit(X_train, **fit_params)
    849     else:
--> 850         estimator.fit(X_train, y_train, **fit_params)
    851     func = getattr(estimator, method)
    852     predictions = func(X_test)

/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
    720                 return results_container[0]
    721 
--> 722             self._run_search(evaluate_candidates)
    723 
    724         results = results_container[0]

/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
   1189     def _run_search(self, evaluate_candidates):
   1190         """Search all candidates in param_grid"""
-> 1191         evaluate_candidates(ParameterGrid(self.param_grid))
   1192 
   1193 

/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params)
    709                                for parameters, (train, test)
    710                                in product(candidate_params,
--> 711                                           cv.split(X, y, groups)))
    712 
    713                 all_candidate_params.extend(candidate_params)

/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None
    919 

/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 

/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):

/sklearn/externals/joblib/parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    516     start_time = time.time()
    517 
--> 518     X_train, y_train = _safe_split(estimator, X, y, train)
    519     X_test, y_test = _safe_split(estimator, X, y, test, train)
    520 

/sklearn/utils/metaestimators.py in _safe_split(estimator, X, y, indices, train_indices)
    195         # X is a precomputed square kernel matrix
    196         if X.shape[0] != X.shape[1]:
--> 197             raise ValueError("X should be a square kernel matrix")
    198         if train_indices is None:
    199             X_subset = X[np.ix_(indices, indices)]

ValueError: X should be a square kernel matrix

Versions

sklearn 0.20.2

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugEasyWell-defined and straightforward way to resolvehelp wanted

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions