Skip to content

Commit 33cde65

Browse files
BUG: ensure list of tuples results in 1d masked array in cv_results, as opposed to 2d array (#28571)
Co-authored-by: Loïc Estève <loic.esteve@ymail.com>
1 parent 08b4714 commit 33cde65

File tree

2 files changed

+34
-1
lines changed

2 files changed

+34
-1
lines changed

sklearn/model_selection/_search.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1083,7 +1083,9 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
10831083
arr_dtype = np.result_type(*param_list)
10841084
except TypeError:
10851085
arr_dtype = object
1086-
if len(param_list) == n_candidates:
1086+
if len(param_list) == n_candidates and arr_dtype != object:
1087+
# Exclude `object` else the numpy constructor might infer a list of
1088+
# tuples to be a 2d array.
10871089
results[key] = MaskedArray(param_list, mask=False, dtype=arr_dtype)
10881090
else:
10891091
# Use one MaskedArray and mask all the places where the param is not

sklearn/model_selection/tests/test_search.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from sklearn.ensemble import HistGradientBoostingClassifier
2424
from sklearn.exceptions import FitFailedWarning
2525
from sklearn.experimental import enable_halving_search_cv # noqa
26+
from sklearn.feature_extraction.text import TfidfVectorizer
2627
from sklearn.impute import SimpleImputer
2728
from sklearn.linear_model import (
2829
LinearRegression,
@@ -56,6 +57,7 @@
5657
)
5758
from sklearn.model_selection._search import BaseSearchCV
5859
from sklearn.model_selection.tests.common import OneTimeSplitter
60+
from sklearn.naive_bayes import ComplementNB
5961
from sklearn.neighbors import KernelDensity, KNeighborsClassifier, LocalOutlierFactor
6062
from sklearn.pipeline import Pipeline
6163
from sklearn.svm import SVC, LinearSVC
@@ -2492,6 +2494,35 @@ def test_search_estimator_param(SearchCV, param_search):
24922494
assert gs.best_estimator_.named_steps["clf"].C == 0.01
24932495

24942496

2497+
def test_search_with_2d_array():
2498+
parameter_grid = {
2499+
"vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams
2500+
"vect__norm": ("l1", "l2"),
2501+
}
2502+
pipeline = Pipeline(
2503+
[
2504+
("vect", TfidfVectorizer()),
2505+
("clf", ComplementNB()),
2506+
]
2507+
)
2508+
random_search = RandomizedSearchCV(
2509+
estimator=pipeline,
2510+
param_distributions=parameter_grid,
2511+
n_iter=3,
2512+
random_state=0,
2513+
n_jobs=2,
2514+
verbose=1,
2515+
cv=3,
2516+
)
2517+
data_train = ["one", "two", "three", "four", "five"]
2518+
data_target = [0, 0, 1, 0, 1]
2519+
random_search.fit(data_train, data_target)
2520+
result = random_search.cv_results_["param_vect__ngram_range"]
2521+
expected_data = np.empty(3, dtype=object)
2522+
expected_data[:] = [(1, 2), (1, 2), (1, 1)]
2523+
np.testing.assert_array_equal(result.data, expected_data)
2524+
2525+
24952526
# Metadata Routing Tests
24962527
# ======================
24972528

0 commit comments

Comments
 (0)