Closed
Description
Description
A nested cross validation prediction using a knn with precomputed metric raised an error
Code to Reproduce
from sklearn import datasets
from sklearn.model_selection import cross_val_predict, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics.pairwise import euclidean_distances
# Open data
iris = datasets.load_iris()
# Compute pairwise metric
metric = euclidean_distances(iris.data)
# Create nested cross validation
knn = KNeighborsClassifier(metric = 'precomputed')
knngs = GridSearchCV(knn, param_grid={"n_neighbors": [1, 5, 10]})
predicted = cross_val_predict(knngs, metric, iris.target, cv=10)
Expected Results
Should return the predictions made by the model as the following code produces:
from sklearn import datasets
from sklearn.model_selection import cross_val_predict, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
# Open data
iris = datasets.load_iris()
# Create nested cross validation
knn = KNeighborsClassifier()
knngs = GridSearchCV(knn, param_grid={"n_neighbors": [1, 5, 10]})
predicted = cross_val_predict(knngs, iris.data, iris.target, cv=10)
Actual Results
ValueError Traceback (most recent call last)
<ipython-input-8-97c590e3aa1e> in <module>()
10
11 knngs = GridSearchCV(knn, param_grid={"n_neighbors": [1, 5, 10]})
---> 12 predicted = cross_val_predict(knngs, metric, iris.target, cv=10)
/sklearn/model_selection/_validation.py in cross_val_predict(estimator, X, y, groups, cv, n_jobs, verbose, fit_params, pre_dispatch, method)
775 prediction_blocks = parallel(delayed(_fit_and_predict)(
776 clone(estimator), X, y, train, test, verbose, fit_params, method)
--> 777 for train, test in cv.split(X, y, groups))
778
779 # Concatenate the predictions
/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
915 # remaining jobs.
916 self._iterating = False
--> 917 if self.dispatch_one_batch(iterator):
918 self._iterating = self._original_iterator is not None
919
/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
/sklearn/externals/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/sklearn/model_selection/_validation.py in _fit_and_predict(estimator, X, y, train, test, verbose, fit_params, method)
848 estimator.fit(X_train, **fit_params)
849 else:
--> 850 estimator.fit(X_train, y_train, **fit_params)
851 func = getattr(estimator, method)
852 predictions = func(X_test)
/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
720 return results_container[0]
721
--> 722 self._run_search(evaluate_candidates)
723
724 results = results_container[0]
/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
1189 def _run_search(self, evaluate_candidates):
1190 """Search all candidates in param_grid"""
-> 1191 evaluate_candidates(ParameterGrid(self.param_grid))
1192
1193
/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params)
709 for parameters, (train, test)
710 in product(candidate_params,
--> 711 cv.split(X, y, groups)))
712
713 all_candidate_params.extend(candidate_params)
/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
915 # remaining jobs.
916 self._iterating = False
--> 917 if self.dispatch_one_batch(iterator):
918 self._iterating = self._original_iterator is not None
919
/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
/sklearn/externals/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
516 start_time = time.time()
517
--> 518 X_train, y_train = _safe_split(estimator, X, y, train)
519 X_test, y_test = _safe_split(estimator, X, y, test, train)
520
/sklearn/utils/metaestimators.py in _safe_split(estimator, X, y, indices, train_indices)
195 # X is a precomputed square kernel matrix
196 if X.shape[0] != X.shape[1]:
--> 197 raise ValueError("X should be a square kernel matrix")
198 if train_indices is None:
199 X_subset = X[np.ix_(indices, indices)]
ValueError: X should be a square kernel matrix
Versions
sklearn 0.20.2