diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 3804f5c01c26c..13b1143215027 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -69,6 +69,11 @@ Enhancements (`#7723 `_) by `Mikhail Korobov`_. + - Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier` + to change output shape of `transform` method to 2 dimensional. + (`#7794 `_) + by `Ibraim Ganiev`_. + Bug fixes ......... diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py index f3a5274ab3f4c..6268426b0eeae 100644 --- a/sklearn/ensemble/tests/test_voting_classifier.py +++ b/sklearn/ensemble/tests/test_voting_classifier.py @@ -210,7 +210,7 @@ def test_gridsearch(): grid.fit(iris.data, iris.target) -def test_parallel_predict(): +def test_parallel_fit(): """Check parallel backend of VotingClassifier on toy dataset.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) @@ -258,3 +258,23 @@ def test_sample_weight(): voting='soft') msg = ('Underlying estimator \'knn\' does not support sample weights.') assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight) + + +def test_transform(): + """Check trqansform method of VotingClassifier on toy dataset.""" + clf1 = LogisticRegression(random_state=123) + clf2 = RandomForestClassifier(random_state=123) + clf3 = GaussianNB() + X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) + y = np.array([1, 1, 2, 2]) + + eclf1 = VotingClassifier(estimators=[ + ('lr', clf1), ('rf', clf2), ('gnb', clf3)], + voting='soft').fit(X, y) + eclf2 = VotingClassifier(estimators=[ + ('lr', clf1), ('rf', clf2), ('gnb', clf3)], + voting='soft', + flatten_transform=True).fit(X, y) + + assert_array_equal(eclf1.transform(X).shape, (3, 4, 2)) + assert_array_equal(eclf2.transform(X).shape, (4, 6)) diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py index 40e21d56270db..a77b24c9baf04 100644 --- a/sklearn/ensemble/voting_classifier.py +++ b/sklearn/ensemble/voting_classifier.py @@ -12,6 +12,7 @@ # License: BSD 3 clause import numpy as np +import warnings from ..base import BaseEstimator from ..base import ClassifierMixin @@ -61,6 +62,12 @@ class VotingClassifier(BaseEstimator, ClassifierMixin, TransformerMixin): The number of jobs to run in parallel for ``fit``. If -1, then the number of jobs is set to the number of cores. + flatten_transform : bool, optional (default=False) + Affects shape of transform output only when voting='soft' + If voting='soft' and flatten_transform=True, transform method returns + matrix with shape [n_samples, n_classifiers * n_classes] instead of + [n_classifiers, n_samples, n_classes]. + Attributes ---------- estimators_ : list of classifiers @@ -100,12 +107,14 @@ class VotingClassifier(BaseEstimator, ClassifierMixin, TransformerMixin): >>> """ - def __init__(self, estimators, voting='hard', weights=None, n_jobs=1): + def __init__(self, estimators, voting='hard', weights=None, n_jobs=1, + flatten_transform=False): self.estimators = estimators self.named_estimators = dict(estimators) self.voting = voting self.weights = weights self.n_jobs = n_jobs + self.flatten_transform = flatten_transform def fit(self, X, y, sample_weight=None): """ Fit the estimators. @@ -238,16 +247,25 @@ def transform(self, X): Returns ------- - If `voting='soft'`: + If `voting='soft'` and `flatten_transform=False`: array-like = [n_classifiers, n_samples, n_classes] Class probabilities calculated by each classifier. + If `voting='soft'` and `flatten_transform=True`: + array-like = [n_samples, n_classifiers * n_classes] + Class probabilities calculated by each classifier. If `voting='hard'`: array-like = [n_samples, n_classifiers] Class labels predicted by each classifier. """ check_is_fitted(self, 'estimators_') if self.voting == 'soft': - return self._collect_probas(X) + probas = self._collect_probas(X) + if not self.flatten_transform: + warnings.warn("'flatten_transform' default value will be" + " changed to True in 0.21.", DeprecationWarning) + return probas + else: + return np.hstack(probas) else: return self._predict(X)