diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 3fc6bb93e64c9..82f5378d9d07b 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -51,7 +51,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
 from joblib import Parallel
 
 from ..base import is_classifier
-from ..base import ClassifierMixin, RegressorMixin, MultiOutputMixin
+from ..base import ClassifierMixin, MultiOutputMixin, RegressorMixin
 from ..metrics import accuracy_score, r2_score
 from ..preprocessing import OneHotEncoder
 from ..tree import (
@@ -1052,6 +1052,9 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
 
         return averaged_predictions
 
+    def _more_tags(self):
+        return {"multilabel": True}
+
 
 class RandomForestClassifier(ForestClassifier):
     """
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 7f900f56e7e54..67d9a47881953 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -21,7 +21,7 @@
 from ._base import LinearClassifierMixin, LinearModel
 from ._base import _deprecate_normalize, _rescale_data
 from ._sag import sag_solver
-from ..base import RegressorMixin, MultiOutputMixin, is_classifier
+from ..base import MultiOutputMixin, RegressorMixin, is_classifier
 from ..utils.extmath import safe_sparse_dot
 from ..utils.extmath import row_norms
 from ..utils import check_array
@@ -2319,9 +2319,17 @@ def classes_(self):
 
     def _more_tags(self):
         return {
+            "multilabel": True,
             "_xfail_checks": {
                 "check_sample_weights_invariance": (
                     "zero sample_weight is not equivalent to removing samples"
                 ),
-            }
+                # FIXME: see
+                # https://github.com/scikit-learn/scikit-learn/issues/19858
+                # to track progress to resolve this issue
+                "check_classifiers_multilabel_output_format_predict": (
+                    "RidgeClassifierCV.predict outputs an array of shape (25,) "
+                    "instead of (25, 5)"
+                ),
+            },
         }
diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py
index 9ef91a7a2c3e3..4098b821cfebe 100644
--- a/sklearn/neighbors/_classification.py
+++ b/sklearn/neighbors/_classification.py
@@ -287,6 +287,9 @@ def predict_proba(self, X):
 
         return probabilities
 
+    def _more_tags(self):
+        return {"multilabel": True}
+
 
 class RadiusNeighborsClassifier(RadiusNeighborsMixin, ClassifierMixin, NeighborsBase):
     """Classifier implementing a vote among neighbors within a given radius
@@ -651,3 +654,6 @@ def predict_proba(self, X):
             probabilities = probabilities[0]
 
         return probabilities
+
+    def _more_tags(self):
+        return {"multilabel": True}
diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py
index 8d3b0853b5336..ce57d7376665a 100644
--- a/sklearn/neural_network/_multilayer_perceptron.py
+++ b/sklearn/neural_network/_multilayer_perceptron.py
@@ -6,6 +6,7 @@
 #          Jiyuan Qian
 # License: BSD 3 clause
 
+from tkinter.tix import Tree
 import numpy as np
 
 from abc import ABCMeta, abstractmethod
@@ -13,7 +14,11 @@
 
 import scipy.optimize
 
-from ..base import BaseEstimator, ClassifierMixin, RegressorMixin
+from ..base import (
+    BaseEstimator,
+    ClassifierMixin,
+    RegressorMixin,
+)
 from ..base import is_classifier
 from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS
 from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer
@@ -1246,6 +1251,9 @@ def predict_proba(self, X):
         else:
             return y_pred
 
+    def _more_tags(self):
+        return {"multilabel": Tree}
+
 
 class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron):
     """Multi-layer Perceptron regressor.
diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index 0256513a64721..87a9b5f815e28 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -1021,6 +1021,9 @@ def predict_log_proba(self, X):
     def n_features_(self):
         return self.n_features_in_
 
+    def _more_tags(self):
+        return {"multilabel": True}
+
 
 class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree):
     """A decision tree regressor.
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index ff853be22f663..7749484ea5b22 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -19,6 +19,7 @@
 from ._testing import assert_array_almost_equal
 from ._testing import assert_allclose
 from ._testing import assert_allclose_dense_sparse
+from ._testing import assert_array_less
 from ._testing import set_random_state
 from ._testing import SkipTest
 from ._testing import ignore_warnings
@@ -141,6 +142,9 @@ def _yield_classifier_checks(classifier):
     yield check_classifiers_regression_target
     if tags["multilabel"]:
         yield check_classifiers_multilabel_representation_invariance
+        yield check_classifiers_multilabel_output_format_predict
+        yield check_classifiers_multilabel_output_format_predict_proba
+        yield check_classifiers_multilabel_output_format_decision_function
     if not tags["no_validation"]:
         yield check_supervised_y_no_nan
         if not tags["multioutput_only"]:
@@ -651,7 +655,7 @@ def _set_checking_parameters(estimator):
         estimator.set_params(strategy="stratified")
 
     # Speed-up by reducing the number of CV or splits for CV estimators
-    loo_cv = ["RidgeCV"]
+    loo_cv = ["RidgeCV", "RidgeClassifierCV"]
     if name not in loo_cv and hasattr(estimator, "cv"):
         estimator.set_params(cv=3)
     if hasattr(estimator, "n_splits"):
@@ -2258,18 +2262,18 @@ def check_outliers_train(name, estimator_orig, readonly_memmap=True):
                 estimator.fit(X)
 
 
-@ignore_warnings(category=(FutureWarning))
+@ignore_warnings(category=FutureWarning)
 def check_classifiers_multilabel_representation_invariance(name, classifier_orig):
-
     X, y = make_multilabel_classification(
         n_samples=100,
-        n_features=20,
+        n_features=2,
         n_classes=5,
         n_labels=3,
         length=50,
         allow_unlabeled=True,
         random_state=0,
     )
+    X = scale(X)
 
     X_train, y_train = X[:80], y[:80]
     X_test = X[80:]
@@ -2299,6 +2303,181 @@ def check_classifiers_multilabel_representation_invariance(name, classifier_orig
     assert type(y_pred) == type(y_pred_list_of_lists)
 
 
+@ignore_warnings(category=FutureWarning)
+def check_classifiers_multilabel_output_format_predict(name, classifier_orig):
+    """Check the output of the `predict` method for classifiers supporting
+    multilabel-indicator targets."""
+    classifier = clone(classifier_orig)
+    set_random_state(classifier)
+
+    n_samples, test_size, n_outputs = 100, 25, 5
+    X, y = make_multilabel_classification(
+        n_samples=n_samples,
+        n_features=2,
+        n_classes=n_outputs,
+        n_labels=3,
+        length=50,
+        allow_unlabeled=True,
+        random_state=0,
+    )
+    X = scale(X)
+
+    X_train, X_test = X[:-test_size], X[-test_size:]
+    y_train, y_test = y[:-test_size], y[-test_size:]
+    classifier.fit(X_train, y_train)
+
+    response_method_name = "predict"
+    predict_method = getattr(classifier, response_method_name, None)
+    if predict_method is None:
+        raise SkipTest(f"{name} does not have a {response_method_name} method.")
+
+    y_pred = predict_method(X_test)
+
+    # y_pred.shape -> y_test.shape with the same dtype
+    assert isinstance(y_pred, np.ndarray), (
+        f"{name}.predict is expected to output a NumPy array. Got "
+        f"{type(y_pred)} instead."
+    )
+    assert y_pred.shape == y_test.shape, (
+        f"{name}.predict outputs a NumPy array of shape {y_pred.shape} "
+        f"instead of {y_test.shape}."
+    )
+    assert y_pred.dtype == y_test.dtype, (
+        f"{name}.predict does not output the same dtype than the targets. "
+        f"Got {y_pred.dtype} instead of {y_test.dtype}."
+    )
+
+
+@ignore_warnings(category=FutureWarning)
+def check_classifiers_multilabel_output_format_predict_proba(name, classifier_orig):
+    """Check the output of the `predict_proba` method for classifiers supporting
+    multilabel-indicator targets."""
+    classifier = clone(classifier_orig)
+    set_random_state(classifier)
+
+    n_samples, test_size, n_outputs = 100, 25, 5
+    X, y = make_multilabel_classification(
+        n_samples=n_samples,
+        n_features=2,
+        n_classes=n_outputs,
+        n_labels=3,
+        length=50,
+        allow_unlabeled=True,
+        random_state=0,
+    )
+    X = scale(X)
+
+    X_train, X_test = X[:-test_size], X[-test_size:]
+    y_train = y[:-test_size]
+    classifier.fit(X_train, y_train)
+
+    response_method_name = "predict_proba"
+    predict_proba_method = getattr(classifier, response_method_name, None)
+    if predict_proba_method is None:
+        raise SkipTest(f"{name} does not have a {response_method_name} method.")
+
+    y_pred = predict_proba_method(X_test)
+
+    # y_pred.shape -> 2 possibilities:
+    # - list of length n_outputs of shape (n_samples, 2);
+    # - ndarray of shape (n_samples, n_outputs).
+    # dtype should be floating
+    if isinstance(y_pred, list):
+        assert len(y_pred) == n_outputs, (
+            f"When {name}.predict_proba returns a list, the list should "
+            "be of length n_outputs and contain NumPy arrays. Got length "
+            f"of {len(y_pred)} instead of {n_outputs}."
+        )
+        for pred in y_pred:
+            assert pred.shape == (test_size, 2), (
+                f"When {name}.predict_proba returns a list, this list "
+                "should contain NumPy arrays of shape (n_samples, 2). Got "
+                f"NumPy arrays of shape {pred.shape} instead of "
+                f"{(test_size, 2)}."
+            )
+            assert pred.dtype.kind == "f", (
+                f"When {name}.predict_proba returns a list, it should "
+                "contain NumPy arrays with floating dtype. Got "
+                f"{pred.dtype} instead."
+            )
+            # check that we have the correct probabilities
+            err_msg = (
+                f"When {name}.predict_proba returns a list, each NumPy "
+                "array should contain probabilities for each class and "
+                "thus each row should sum to 1 (or close to 1 due to "
+                "numerical errors)."
+            )
+            assert_allclose(pred.sum(axis=1), 1, err_msg=err_msg)
+    elif isinstance(y_pred, np.ndarray):
+        assert y_pred.shape == (test_size, n_outputs), (
+            f"When {name}.predict_proba returns a NumPy array, the "
+            f"expected shape is (n_samples, n_outputs). Got {y_pred.shape}"
+            f" instead of {(test_size, n_outputs)}."
+        )
+        assert y_pred.dtype.kind == "f", (
+            f"When {name}.predict_proba returns a NumPy array, the "
+            f"expected data type is floating. Got {y_pred.dtype} instead."
+        )
+        err_msg = (
+            f"When {name}.predict_proba returns a NumPy array, this array "
+            "is expected to provide probabilities of the positive class "
+            "and should therefore contain values between 0 and 1."
+        )
+        assert_array_less(0, y_pred, err_msg=err_msg)
+        assert_array_less(y_pred, 1, err_msg=err_msg)
+    else:
+        raise ValueError(
+            f"Unknown returned type {type(y_pred)} by {name}."
+            "predict_proba. A list or a Numpy array is expected."
+        )
+
+
+@ignore_warnings(category=FutureWarning)
+def check_classifiers_multilabel_output_format_decision_function(name, classifier_orig):
+    """Check the output of the `decision_function` method for classifiers supporting
+    multilabel-indicator targets."""
+    classifier = clone(classifier_orig)
+    set_random_state(classifier)
+
+    n_samples, test_size, n_outputs = 100, 25, 5
+    X, y = make_multilabel_classification(
+        n_samples=n_samples,
+        n_features=2,
+        n_classes=n_outputs,
+        n_labels=3,
+        length=50,
+        allow_unlabeled=True,
+        random_state=0,
+    )
+    X = scale(X)
+
+    X_train, X_test = X[:-test_size], X[-test_size:]
+    y_train = y[:-test_size]
+    classifier.fit(X_train, y_train)
+
+    response_method_name = "decision_function"
+    decision_function_method = getattr(classifier, response_method_name, None)
+    if decision_function_method is None:
+        raise SkipTest(f"{name} does not have a {response_method_name} method.")
+
+    y_pred = decision_function_method(X_test)
+
+    # y_pred.shape -> y_test.shape with floating dtype
+    assert isinstance(y_pred, np.ndarray), (
+        f"{name}.decision_function is expected to output a NumPy array."
+        f" Got {type(y_pred)} instead."
+    )
+    assert y_pred.shape == (test_size, n_outputs), (
+        f"{name}.decision_function is expected to provide a NumPy array "
+        f"of shape (n_samples, n_outputs). Got {y_pred.shape} instead of "
+        f"{(test_size, n_outputs)}."
+    )
+    assert y_pred.dtype.kind == "f", (
+        f"{name}.decision_function is expected to output a floating dtype."
+        f" Got {y_pred.dtype} instead."
+    )
+
+
 @ignore_warnings(category=FutureWarning)
 def check_estimators_fit_returns_self(name, estimator_orig, readonly_memmap=False):
     """Check if self is returned when calling fit."""
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 8ff8d8cf5e782..ea158234ea785 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -11,6 +11,7 @@
 import joblib
 
 from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.datasets import make_multilabel_classification
 from sklearn.utils import deprecated
 from sklearn.utils._testing import (
     raises,
@@ -20,18 +21,7 @@
     MinimalTransformer,
     SkipTest,
 )
-from sklearn.utils.estimator_checks import check_estimator, _NotAnArray
-from sklearn.utils.estimator_checks import check_class_weight_balanced_linear_classifier
-from sklearn.utils.estimator_checks import set_random_state
-from sklearn.utils.estimator_checks import _set_checking_parameters
-from sklearn.utils.estimator_checks import check_estimators_unfitted
-from sklearn.utils.estimator_checks import check_fit_score_takes_y
-from sklearn.utils.estimator_checks import check_no_attributes_set_in_init
-from sklearn.utils.estimator_checks import check_classifier_data_not_an_array
-from sklearn.utils.estimator_checks import check_regressor_data_not_an_array
-from sklearn.utils.estimator_checks import check_estimator_get_tags_default_keys
 from sklearn.utils.validation import check_is_fitted
-from sklearn.utils.estimator_checks import check_outlier_corruption
 from sklearn.utils.fixes import np_version, parse_version
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.linear_model import LinearRegression, SGDClassifier
@@ -45,6 +35,24 @@
 from sklearn.utils import all_estimators
 from sklearn.exceptions import SkipTestWarning
 
+from sklearn.utils.estimator_checks import (
+    _NotAnArray,
+    _set_checking_parameters,
+    check_class_weight_balanced_linear_classifier,
+    check_classifier_data_not_an_array,
+    check_classifiers_multilabel_output_format_decision_function,
+    check_classifiers_multilabel_output_format_predict,
+    check_classifiers_multilabel_output_format_predict_proba,
+    check_estimator,
+    check_estimator_get_tags_default_keys,
+    check_estimators_unfitted,
+    check_fit_score_takes_y,
+    check_no_attributes_set_in_init,
+    check_regressor_data_not_an_array,
+    check_outlier_corruption,
+    set_random_state,
+)
+
 
 class CorrectNotFittedError(ValueError):
     """Exception class to raise if estimator is used before fitting.
@@ -689,6 +697,236 @@ def test_check_estimator_get_tags_default_keys():
     check_estimator_get_tags_default_keys(estimator.__class__.__name__, estimator)
 
 
+class _BaseMultiLabelClassifierMock(ClassifierMixin, BaseEstimator):
+    def __init__(self, response_output):
+        self.response_output = response_output
+
+    def fit(self, X, y):
+        return self
+
+    def _more_tags(self):
+        return {"multilabel": True}
+
+
+def test_check_classifiers_multilabel_output_format_predict():
+    n_samples, test_size, n_outputs = 100, 25, 5
+    _, y = make_multilabel_classification(
+        n_samples=n_samples,
+        n_features=2,
+        n_classes=n_outputs,
+        n_labels=3,
+        length=50,
+        allow_unlabeled=True,
+        random_state=0,
+    )
+    y_test = y[-test_size:]
+
+    class MultiLabelClassifierPredict(_BaseMultiLabelClassifierMock):
+        def predict(self, X):
+            return self.response_output
+
+    # 1. inconsistent array type
+    clf = MultiLabelClassifierPredict(response_output=y_test.tolist())
+    err_msg = (
+        r"MultiLabelClassifierPredict.predict is expected to output a "
+        r"NumPy array. Got <class 'list'> instead."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict(clf.__class__.__name__, clf)
+    # 2. inconsistent shape
+    clf = MultiLabelClassifierPredict(response_output=y_test[:, :-1])
+    err_msg = (
+        r"MultiLabelClassifierPredict.predict outputs a NumPy array of "
+        r"shape \(25, 4\) instead of \(25, 5\)."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict(clf.__class__.__name__, clf)
+    # 3. inconsistent dtype
+    clf = MultiLabelClassifierPredict(response_output=y_test.astype(np.float64))
+    err_msg = (
+        r"MultiLabelClassifierPredict.predict does not output the same "
+        r"dtype than the targets."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict(clf.__class__.__name__, clf)
+
+
+def test_check_classifiers_multilabel_output_format_predict_proba():
+    n_samples, test_size, n_outputs = 100, 25, 5
+    _, y = make_multilabel_classification(
+        n_samples=n_samples,
+        n_features=2,
+        n_classes=n_outputs,
+        n_labels=3,
+        length=50,
+        allow_unlabeled=True,
+        random_state=0,
+    )
+    y_test = y[-test_size:]
+
+    class MultiLabelClassifierPredictProba(_BaseMultiLabelClassifierMock):
+        def predict_proba(self, X):
+            return self.response_output
+
+    # 1. unknown output type
+    clf = MultiLabelClassifierPredictProba(response_output=sp.csr_matrix(y_test))
+    err_msg = (
+        r"Unknown returned type <class 'scipy.sparse.csr.csr_matrix'> by "
+        r"MultiLabelClassifierPredictProba.predict_proba. A list or a Numpy "
+        r"array is expected."
+    )
+    with raises(ValueError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict_proba(
+            clf.__class__.__name__,
+            clf,
+        )
+    # 2. for list output
+    # 2.1. inconsistent length
+    clf = MultiLabelClassifierPredictProba(response_output=y_test.tolist())
+    err_msg = (
+        "When MultiLabelClassifierPredictProba.predict_proba returns a list, "
+        "the list should be of length n_outputs and contain NumPy arrays. Got "
+        f"length of {test_size} instead of {n_outputs}."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict_proba(
+            clf.__class__.__name__,
+            clf,
+        )
+    # 2.2. array of inconsistent shape
+    response_output = [np.ones_like(y_test) for _ in range(n_outputs)]
+    clf = MultiLabelClassifierPredictProba(response_output=response_output)
+    err_msg = (
+        r"When MultiLabelClassifierPredictProba.predict_proba returns a list, "
+        r"this list should contain NumPy arrays of shape \(n_samples, 2\). Got "
+        r"NumPy arrays of shape \(25, 5\) instead of \(25, 2\)."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict_proba(
+            clf.__class__.__name__,
+            clf,
+        )
+    # 2.3. array of inconsistent dtype
+    response_output = [
+        np.ones(shape=(y_test.shape[0], 2), dtype=np.int64) for _ in range(n_outputs)
+    ]
+    clf = MultiLabelClassifierPredictProba(response_output=response_output)
+    err_msg = (
+        "When MultiLabelClassifierPredictProba.predict_proba returns a list, "
+        "it should contain NumPy arrays with floating dtype."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict_proba(
+            clf.__class__.__name__,
+            clf,
+        )
+    # 2.4. array does not contain probability (each row should sum to 1)
+    response_output = [
+        np.ones(shape=(y_test.shape[0], 2), dtype=np.float64) for _ in range(n_outputs)
+    ]
+    clf = MultiLabelClassifierPredictProba(response_output=response_output)
+    err_msg = (
+        r"When MultiLabelClassifierPredictProba.predict_proba returns a list, "
+        r"each NumPy array should contain probabilities for each class and "
+        r"thus each row should sum to 1"
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict_proba(
+            clf.__class__.__name__,
+            clf,
+        )
+    # 3 for array output
+    # 3.1. array of inconsistent shape
+    clf = MultiLabelClassifierPredictProba(response_output=y_test[:, :-1])
+    err_msg = (
+        r"When MultiLabelClassifierPredictProba.predict_proba returns a NumPy "
+        r"array, the expected shape is \(n_samples, n_outputs\). Got \(25, 4\)"
+        r" instead of \(25, 5\)."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict_proba(
+            clf.__class__.__name__,
+            clf,
+        )
+    # 3.2. array of inconsistent dtype
+    response_output = np.zeros_like(y_test, dtype=np.int64)
+    clf = MultiLabelClassifierPredictProba(response_output=response_output)
+    err_msg = (
+        r"When MultiLabelClassifierPredictProba.predict_proba returns a NumPy "
+        r"array, the expected data type is floating."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict_proba(
+            clf.__class__.__name__,
+            clf,
+        )
+    # 4. array does not contain probabilities
+    clf = MultiLabelClassifierPredictProba(response_output=y_test * 2.0)
+    err_msg = (
+        r"When MultiLabelClassifierPredictProba.predict_proba returns a NumPy "
+        r"array, this array is expected to provide probabilities of the "
+        r"positive class and should therefore contain values between 0 and 1."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_predict_proba(
+            clf.__class__.__name__,
+            clf,
+        )
+
+
+def test_check_classifiers_multilabel_output_format_decision_function():
+    n_samples, test_size, n_outputs = 100, 25, 5
+    _, y = make_multilabel_classification(
+        n_samples=n_samples,
+        n_features=2,
+        n_classes=n_outputs,
+        n_labels=3,
+        length=50,
+        allow_unlabeled=True,
+        random_state=0,
+    )
+    y_test = y[-test_size:]
+
+    class MultiLabelClassifierDecisionFunction(_BaseMultiLabelClassifierMock):
+        def decision_function(self, X):
+            return self.response_output
+
+    # 1. inconsistent array type
+    clf = MultiLabelClassifierDecisionFunction(response_output=y_test.tolist())
+    err_msg = (
+        r"MultiLabelClassifierDecisionFunction.decision_function is expected "
+        r"to output a NumPy array. Got <class 'list'> instead."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_decision_function(
+            clf.__class__.__name__,
+            clf,
+        )
+    # 2. inconsistent shape
+    clf = MultiLabelClassifierDecisionFunction(response_output=y_test[:, :-1])
+    err_msg = (
+        r"MultiLabelClassifierDecisionFunction.decision_function is expected "
+        r"to provide a NumPy array of shape \(n_samples, n_outputs\). Got "
+        r"\(25, 4\) instead of \(25, 5\)"
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_decision_function(
+            clf.__class__.__name__,
+            clf,
+        )
+    # 3. inconsistent dtype
+    clf = MultiLabelClassifierDecisionFunction(response_output=y_test)
+    err_msg = (
+        r"MultiLabelClassifierDecisionFunction.decision_function is expected "
+        r"to output a floating dtype."
+    )
+    with raises(AssertionError, match=err_msg):
+        check_classifiers_multilabel_output_format_decision_function(
+            clf.__class__.__name__,
+            clf,
+        )
+
+
 def run_tests_without_pytest():
     """Runs the tests in this file without using pytest."""
     main_module = sys.modules["__main__"]