scikit-learn · hmasdev · Nov 2, 2023 · Nov 2, 2023 · Feb 3, 2024 · Feb 5, 2024
diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
@@ -180,7 +180,7 @@ def fit(self, X, y, sample_weight=None):
             Training vectors, where `n_samples` is the number of samples and
             `n_features` is the number of features.
 
-        y : array-like of shape (n_samples,)
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
             Target values.
 
         sample_weight : array-like of shape (n_samples,) or default=None
@@ -953,7 +953,7 @@ def fit(self, X, y, sample_weight=None):
             Training vectors, where `n_samples` is the number of samples and
             `n_features` is the number of features.
 
-        y : array-like of shape (n_samples,)
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
             Target values.
 
         sample_weight : array-like of shape (n_samples,), default=None
@@ -967,7 +967,9 @@ def fit(self, X, y, sample_weight=None):
             Returns a fitted instance.
         """
         _raise_for_unsupported_routing(self, "fit", sample_weight=sample_weight)
-        y = column_or_1d(y, warn=True)
+        y = self._validate_data(y=y, multi_output=True)
+        if y.ndim == 2 and y.shape[1] == 1:
+            y = column_or_1d(y, warn=True)
         return super().fit(X, y, sample_weight)
 
     def transform(self, X):
@@ -981,8 +983,10 @@ def transform(self, X):
 
         Returns
         -------
-        y_preds : ndarray of shape (n_samples, n_estimators)
+        y_preds : ndarray of shape
+            (n_samples, n_estimators) or (n_samples, n_estimators x n_outputs).
             Prediction outputs for each estimator.
+            If passthrough=True, the number of columns increases by n_features.
         """
         return self._transform(X)
 
@@ -995,7 +999,7 @@ def fit_transform(self, X, y, sample_weight=None):
             Training vectors, where `n_samples` is the number of samples and
             `n_features` is the number of features.
 
-        y : array-like of shape (n_samples,)
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
             Target values.
 
         sample_weight : array-like of shape (n_samples,), default=None
@@ -1005,8 +1009,10 @@ def fit_transform(self, X, y, sample_weight=None):
 
         Returns
         -------
-        y_preds : ndarray of shape (n_samples, n_estimators)
+        y_preds : ndarray of shape
+            (n_samples, n_estimators) or (n_samples, n_estimators x n_outputs).
             Prediction outputs for each estimator.
+            If passthrough=True, the number of columns increases by n_features.
         """
         return super().fit_transform(X, y, sample_weight=sample_weight)
 

diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py
@@ -34,6 +34,7 @@
     RidgeClassifier,
 )
 from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
+from sklearn.multioutput import MultiOutputRegressor
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.neural_network import MLPClassifier
 from sklearn.preprocessing import scale
@@ -861,6 +862,127 @@ def test_stacking_classifier_base_regressor():
     assert clf.score(X_test, y_test) > 0.8
 
 
+def test_stacking_regressor_singleoutput_but_2d():
+    """Check that a stacking regressor with a single output but 2D target works"""
+    cv = 2
+    acceptable_relative_tolerance = 1e-10
+    acceptable_aboslute_tolerance = 1e-10
+
+    X_train = np.hstack([np.arange(5)] * cv).reshape(-1, 1)
+    y_train = (2 * X_train + 1).reshape(-1, 1)
+
+    estimator1 = LinearRegression(fit_intercept=True)
+    estimator2 = DummyRegressor(strategy="constant", constant=0)
+    final_estimator = Ridge(alpha=1e-12, fit_intercept=False, random_state=42)
+
+    reg = StackingRegressor(
+        estimators=[("lr", estimator1), ("dr", estimator2)],
+        final_estimator=final_estimator,
+        cv=KFold(n_splits=cv, shuffle=False),
+        passthrough=False,
+    )
+
+    reg.fit(X_train, y_train)
+    y_pred = reg.predict(X_train)
+    # NOTE: In this case the estimator can predict almost exactly the target
+    # when the target is 2D but with a single output, the predictions are 1D
+    # because of column_or_1d
+    assert_allclose(
+        y_pred,
+        y_train.flatten(),
+        rtol=acceptable_relative_tolerance,
+        atol=acceptable_aboslute_tolerance,
+    )
+    X_trans = reg.transform(X_train)
+    # NOTE: The result of transform is the horizontal stack of the predictions
+    assert_allclose(
+        X_trans,
+        np.hstack([y_train, np.zeros(y_train.shape)]),
+        rtol=acceptable_relative_tolerance,
+        atol=acceptable_aboslute_tolerance,
+    )
+
+
+def test_stacking_regressor_multioutput():
+    """Check that a stacking regressor works with multioutput"""
+    cv = 2
+    acceptable_relative_tolerance = 1e-10
+    acceptable_aboslute_tolerance = 1e-10
+
+    X_train = np.hstack([np.arange(5)] * cv).reshape(-1, 1)
+    y_train = np.hstack([2 * X_train + 1, 3 * X_train - 2])
+    assert y_train.ndim > 1
+
+    estimator1 = LinearRegression(fit_intercept=True)
+    estimator2 = MultiOutputRegressor(DummyRegressor(strategy="constant", constant=0))
+    final_estimator = Ridge(alpha=1e-12, fit_intercept=False, random_state=42)
+
+    reg = StackingRegressor(
+        estimators=[("lr", estimator1), ("dr", estimator2)],
+        final_estimator=final_estimator,
+        cv=KFold(n_splits=cv, shuffle=False),
+        passthrough=False,
+    )
+
+    reg.fit(X_train, y_train)
+    y_pred = reg.predict(X_train)
+    # NOTE: In this case the estimator can predict almost exactly the target
+    assert_allclose(
+        y_pred,
+        y_train,
+        rtol=acceptable_relative_tolerance,
+        atol=acceptable_aboslute_tolerance,
+    )
+    X_trans = reg.transform(X_train)
+    # NOTE: The result of transform is the horizontal stack of the predictions
+    assert_allclose(
+        X_trans,
+        np.hstack([y_train, np.zeros(y_train.shape)]),
+        rtol=acceptable_relative_tolerance,
+        atol=acceptable_aboslute_tolerance,
+    )
+
+
+def test_stacking_regressor_multioutput_with_passthrough():
+    """Check that a stacking regressor works with multioutput"""
+    cv = 2
+    acceptable_relative_tolerance = 1e-10
+    acceptable_aboslute_tolerance = 1e-10
+
+    X_train = np.hstack([np.arange(5)] * cv).reshape(-1, 1)
+    y_train = np.hstack([2 * X_train + 1, 3 * X_train - 2])
+    assert y_train.ndim > 1
+
+    estimator1 = LinearRegression(fit_intercept=True)
+    estimator2 = MultiOutputRegressor(DummyRegressor(strategy="constant", constant=0))
+    final_estimator = Ridge(alpha=1e-12, fit_intercept=False, random_state=42)
+
+    reg = StackingRegressor(
+        estimators=[("lr", estimator1), ("dr", estimator2)],
+        final_estimator=final_estimator,
+        cv=KFold(n_splits=cv, shuffle=False),
+        passthrough=True,
+    )
+
+    reg.fit(X_train, y_train)
+    y_pred = reg.predict(X_train)
+    # NOTE: In this case, the estimator can predict almost exactly the target
+    assert_allclose(
+        y_pred,
+        y_train,
+        rtol=acceptable_relative_tolerance,
+        atol=acceptable_aboslute_tolerance,
+    )
+    X_trans = reg.transform(X_train)
+    # NOTE: X_trans should be the horizontal stack of the predictions and X_train
+    assert_allclose(
+        X_trans,
+        np.hstack([y_train, np.zeros(y_train.shape), X_train]),
+        rtol=acceptable_relative_tolerance,
+        atol=acceptable_aboslute_tolerance,
+    )
+
+
 def test_stacking_final_estimator_attribute_error():
     """Check that we raise the proper AttributeError when the final estimator
     does not implement the `decision_function` method, which is decorated with