diff --git a/doc/whats_new/upcoming_changes/sklearn.multiclass/31228.fix.rst b/doc/whats_new/upcoming_changes/sklearn.multiclass/31228.fix.rst
new file mode 100644
index 0000000000000..a22932c4ff397
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/sklearn.multiclass/31228.fix.rst
@@ -0,0 +1,4 @@
+- The method `predict_proba` of :class:`sklearn.multiclass.OneVsRestClassifier` now
+  avoids division by zero when normalizing the predicted probabilities matrix.
+  By :user:`Luis M. B. Varona <Luis-Varona>`, :user:`Marc Bresson <MarcBresson>`, and
+  :user:`Jérémie du Boisberranger <jeremiedbb>`
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index fa86201fb1d89..257c9a40b7741 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -553,8 +553,10 @@ def predict_proba(self, X):
             Y = np.concatenate(((1 - Y), Y), axis=1)
 
         if not self.multilabel_:
-            # Then, probabilities should be normalized to 1.
-            Y /= np.sum(Y, axis=1)[:, np.newaxis]
+            # Then, (nonzero) sample probability distributions should be normalized.
+            row_sums = np.sum(Y, axis=1)[:, np.newaxis]
+            np.divide(Y, row_sums, out=Y, where=row_sums != 0)  # Avoid division by 0
+
         return Y
 
     @available_if(_estimators_has("decision_function"))
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 566b8f535c9cb..b814a62fbc064 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -6,6 +6,7 @@
 from numpy.testing import assert_allclose
 
 from sklearn import datasets, svm
+from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.datasets import load_breast_cancer
 from sklearn.exceptions import NotFittedError
 from sklearn.impute import SimpleImputer
@@ -429,6 +430,39 @@ def test_ovr_single_label_predict_proba():
     assert not (pred - Y_pred).any()
 
 
+def test_ovr_single_label_predict_proba_zero_row():
+    class NaiveBinaryClassifier(BaseEstimator, ClassifierMixin):
+        def fit(self, X, y):
+            self.classes_ = np.unique(y)
+            return self
+
+        def predict_proba(self, X):
+            ones = np.ones((len(X), len(self.classes_)))
+            # Probability of being the positive class is 0 when all features are 0
+            ones[:, 1] = np.any(X, axis=1).astype(int)
+            return ones
+
+    base_clf = NaiveBinaryClassifier()
+    X, Y = iris.data, iris.target  # Three-class problem with 150 samples
+    X_train, Y_train = X[:80], Y[:80]
+    X_test = X[80:]
+
+    zero_indices = np.random.choice(np.arange(len(X_test)), size=5, replace=False)
+    X_test[zero_indices] = 0  # Change 5 random samples in the test set to be all zeros
+
+    clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
+    Y_proba = clf.predict_proba(
+        X_test
+    )  # Our classifier predicts 0 for the zero samples
+
+    zero_indices = np.repeat(zero_indices, len(clf.classes_))
+    nonzero_indices = np.setdiff1d(np.arange(len(Y_proba)), zero_indices)
+    # Nonzero sample probability distributions should be normalized to sum to 1
+    assert_almost_equal(np.sum(Y_proba[nonzero_indices], axis=1), 1.0)
+    # Zero-confidence samples should remain as-is and not be normalized
+    assert_array_equal(Y_proba[zero_indices], 0)
+
+
 def test_ovr_multilabel_decision_function():
     X, Y = datasets.make_multilabel_classification(
         n_samples=100,
@@ -597,13 +631,15 @@ def test_ovo_decision_function():
         # mostly tied predictions:
         assert set(votes[:, class_idx]).issubset(set([0.0, 1.0, 2.0]))
 
-        # The OVO decision function on the other hand is able to resolve
+        # The OVO decision function, on the other hand, is able to resolve
         # most of the ties on this data as it combines both the vote counts
         # and the aggregated confidence levels of the binary classifiers
         # to compute the aggregate decision function. The iris dataset
         # has 150 samples with a couple of duplicates. The OvO decisions
-        # can resolve most of the ties:
-        assert len(np.unique(decisions[:, class_idx])) > 146
+        # can resolve most of the ties; just to be safe, we allow for up to 9
+        # redundant values (i.e., at least 140 unique values), although there
+        # will probably be even less than that:
+        assert len(np.unique(decisions[:, class_idx])) > 140
 
 
 def test_ovo_gridsearch():