From 1526751ff1451c650439d40ca8c9357bff9e8d2d Mon Sep 17 00:00:00 2001 From: Sangam Date: Sat, 2 Jul 2022 16:34:00 +0530 Subject: [PATCH 1/4] add parameter validation --- sklearn/feature_extraction/_dict_vectorizer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index e04c409027bda..641a90eff24bd 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -13,6 +13,7 @@ from ..base import BaseEstimator, TransformerMixin from ..utils import check_array, tosequence from ..utils.deprecation import deprecated +from ..utils._param_validation import StrOptions def _tosequence(X): @@ -97,6 +98,13 @@ class DictVectorizer(TransformerMixin, BaseEstimator): array([[0., 0., 4.]]) """ + _parameter_constraints = { + "dtype": [type], # TODO: TypeOptions constraint, + "separator": [str, StrOptions({"="})], + "sparse": ["boolean"], + "sort": ["boolean"], + } + def __init__(self, *, dtype=np.float64, separator="=", sparse=True, sort=True): self.dtype = dtype self.separator = separator @@ -154,6 +162,7 @@ def fit(self, X, y=None): self : object DictVectorizer class instance. """ + self._validate_params() feature_names = [] vocab = {} From c686c31090dcb3d9d76226c2559b79c6f0a077c6 Mon Sep 17 00:00:00 2001 From: SangamSwadiK Date: Mon, 4 Jul 2022 20:39:10 +0530 Subject: [PATCH 2/4] remove stroptions --- sklearn/feature_extraction/_dict_vectorizer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index 641a90eff24bd..e9842dc0e0683 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -13,7 +13,6 @@ from ..base import BaseEstimator, TransformerMixin from ..utils import check_array, tosequence from ..utils.deprecation import deprecated -from ..utils._param_validation import StrOptions def _tosequence(X): @@ -100,7 +99,7 @@ class DictVectorizer(TransformerMixin, BaseEstimator): _parameter_constraints = { "dtype": [type], # TODO: TypeOptions constraint, - "separator": [str, StrOptions({"="})], + "separator": [str], "sparse": ["boolean"], "sort": ["boolean"], } From 765bb7d2ea94fd92ec5e844f86a6d51d83ea77d5 Mon Sep 17 00:00:00 2001 From: SangamSwadiK Date: Wed, 6 Jul 2022 16:08:58 +0530 Subject: [PATCH 3/4] add no validation to type and remove dictvectorizer from to be validated --- sklearn/feature_extraction/_dict_vectorizer.py | 3 ++- sklearn/tests/test_common.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index e9842dc0e0683..658b114176115 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -98,7 +98,7 @@ class DictVectorizer(TransformerMixin, BaseEstimator): """ _parameter_constraints = { - "dtype": [type], # TODO: TypeOptions constraint, + "dtype": "no validation", # validation delegated to numpy, "separator": [str], "sparse": ["boolean"], "sort": ["boolean"], @@ -318,6 +318,7 @@ def fit_transform(self, X, y=None): Xa : {array, sparse matrix} Feature vectors; always 2-d. """ + self._validate_params() return self._transform(X, fitting=True) def inverse_transform(self, X, dict_type=dict): diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 8c0a2aa57aa91..e7f084ae44959 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -457,7 +457,6 @@ def test_estimators_do_not_raise_errors_in_init_or_set_params(Estimator): "CalibratedClassifierCV", "ClassifierChain", "CountVectorizer", - "DictVectorizer", "DictionaryLearning", "ElasticNetCV", "EllipticEnvelope", From 9f1b4624a3622e34a940f83fe25807daff22c385 Mon Sep 17 00:00:00 2001 From: SangamSwadiK Date: Wed, 6 Jul 2022 17:01:34 +0530 Subject: [PATCH 4/4] fix type --- sklearn/feature_extraction/_dict_vectorizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index 658b114176115..8ca9b61645978 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -98,7 +98,7 @@ class DictVectorizer(TransformerMixin, BaseEstimator): """ _parameter_constraints = { - "dtype": "no validation", # validation delegated to numpy, + "dtype": "no_validation", # validation delegated to numpy, "separator": [str], "sparse": ["boolean"], "sort": ["boolean"],