-
-
Notifications
You must be signed in to change notification settings - Fork 26.2k
Closed
Description
Describe the bug
Using GridSearchCV
or RandomizedSearchCV
to perform parameters tuning on an AdaBoostClassifier
, the model doesn't fit when base_estimator__
parameters are searched, no matter what the base estimator is.
Steps/Code to Reproduce
from sklearn.datasets import load_iris
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
X, y = load_iris(as_frame=True, return_X_y=True)
ada_base_estimator = ExtraTreeClassifier() # change the weak classifier here.
ada = AdaBoostClassifier(ada_base_estimator)
model_params = {
#'n_estimators': loguniform_int(50, 1000), # [50, 75, 100, 150, 200, 250, 500]
#'learning_rate' : loguniform(0.0001, 10),
#'base_estimator__class_weight': [{0:2,1:1}, {0:3,1:1}, {0:5,1:1}]
'base_estimator__min_samples_split': [2, 4, 6], #
}
clf = GridSearchCV(ada, model_params, cv=5,
scoring='balanced_accuracy')
best_model = clf.fit(X, y)
Expected Results
No error thrown.
Note:
The error isn't thrown when only the parameters of the adaboost are specified (i.e. learning_rate
and n_estimators
)
Actual Results
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
[<ipython-input-3-2ade04b86dfd>](https://localhost:8080/#) in <module>
16 clf = GridSearchCV(ada, model_params, cv=5,
17 scoring='balanced_accuracy') #
---> 18 best_model = clf.fit(X, y)
12 frames
[/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_search.py](https://localhost:8080/#) in fit(self, X, y, groups, **fit_params)
873 return results
874
--> 875 self._run_search(evaluate_candidates)
876
877 # multimetric is determined here because in the case of a callable
[/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_search.py](https://localhost:8080/#) in _run_search(self, evaluate_candidates)
1387 def _run_search(self, evaluate_candidates):
1388 """Search all candidates in param_grid"""
-> 1389 evaluate_candidates(ParameterGrid(self.param_grid))
1390
1391
[/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_search.py](https://localhost:8080/#) in evaluate_candidates(candidate_params, cv, more_results)
820 )
821
--> 822 out = parallel(
823 delayed(_fit_and_score)(
824 clone(base_estimator),
[/usr/local/lib/python3.8/dist-packages/joblib/parallel.py](https://localhost:8080/#) in __call__(self, iterable)
1083 # remaining jobs.
1084 self._iterating = False
-> 1085 if self.dispatch_one_batch(iterator):
1086 self._iterating = self._original_iterator is not None
1087
[/usr/local/lib/python3.8/dist-packages/joblib/parallel.py](https://localhost:8080/#) in dispatch_one_batch(self, iterator)
899 return False
900 else:
--> 901 self._dispatch(tasks)
902 return True
903
[/usr/local/lib/python3.8/dist-packages/joblib/parallel.py](https://localhost:8080/#) in _dispatch(self, batch)
817 with self._lock:
818 job_idx = len(self._jobs)
--> 819 job = self._backend.apply_async(batch, callback=cb)
820 # A job can complete so quickly than its callback is
821 # called before we get here, causing self._jobs to
[/usr/local/lib/python3.8/dist-packages/joblib/_parallel_backends.py](https://localhost:8080/#) in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
[/usr/local/lib/python3.8/dist-packages/joblib/_parallel_backends.py](https://localhost:8080/#) in __init__(self, batch)
595 # Don't delay the application, to avoid keeping the input
596 # arguments in memory
--> 597 self.results = batch()
598
599 def get(self):
[/usr/local/lib/python3.8/dist-packages/joblib/parallel.py](https://localhost:8080/#) in __call__(self)
286 # change the default number of processes to -1
287 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 288 return [func(*args, **kwargs)
289 for func, args, kwargs in self.items]
290
[/usr/local/lib/python3.8/dist-packages/joblib/parallel.py](https://localhost:8080/#) in <listcomp>(.0)
286 # change the default number of processes to -1
287 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 288 return [func(*args, **kwargs)
289 for func, args, kwargs in self.items]
290
[/usr/local/lib/python3.8/dist-packages/sklearn/utils/fixes.py](https://localhost:8080/#) in __call__(self, *args, **kwargs)
115 def __call__(self, *args, **kwargs):
116 with config_context(**self.config):
--> 117 return self.function(*args, **kwargs)
118
119
[/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py](https://localhost:8080/#) in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
672 cloned_parameters[k] = clone(v, safe=False)
673
--> 674 estimator = estimator.set_params(**cloned_parameters)
675
676 start_time = time.time()
[/usr/local/lib/python3.8/dist-packages/sklearn/base.py](https://localhost:8080/#) in set_params(self, **params)
215
216 for key, sub_params in nested_params.items():
--> 217 valid_params[key].set_params(**sub_params)
218
219 return self
AttributeError: 'str' object has no attribute 'set_params'
Versions
System:
python: 3.8.10 (default, Nov 14 2022, 12:59:47) [GCC 9.4.0]
executable: /usr/bin/python3
machine: Linux-5.10.147+-x86_64-with-glibc2.29
Python dependencies:
sklearn: 1.2.0
pip: 22.0.4
setuptools: 57.4.0
numpy: 1.21.6
scipy: 1.7.3
Cython: 0.29.33
pandas: 1.3.5
matplotlib: 3.2.2
joblib: 1.2.0
threadpoolctl: 3.1.0
Built with OpenMP: True
threadpoolctl info:
user_api: blas
internal_api: openblas
prefix: libopenblas
filepath: /usr/local/lib/python3.8/dist-packages/numpy.libs/libopenblasp-r0-2d23e62b.3.17.so
version: 0.3.17
threading_layer: pthreads
architecture: Haswell
num_threads: 2
user_api: openmp
internal_api: openmp
prefix: libgomp
filepath: /usr/local/lib/python3.8/dist-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0
version: None
num_threads: 2
user_api: blas
internal_api: openblas
prefix: libopenblas
filepath: /usr/local/lib/python3.8/dist-packages/scipy.libs/libopenblasp-r0-8b9e111f.3.17.so
version: 0.3.17
threading_layer: pthreads
architecture: Haswell
num_threads: 2
IMPORTANT NOTE:
This error isn't present on a previous version of scikit-learn (1.0.2) where the code posted works perfectly fine.
Acsts and Dexter-Wx