scikit-learn · glemaitre · Jun 28, 2021 · Jun 26, 2021 · Jun 26, 2021 · Jun 28, 2021
diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
@@ -17,7 +17,6 @@
     "AgglomerativeClustering",
     "BaggingClassifier",
     "BaggingRegressor",
-    "BayesianGaussianMixture",
     "BayesianRidge",
     "BernoulliNB",
     "BernoulliRBM",

diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
@@ -183,9 +183,13 @@ def fit(self, X, y=None):
             List of n_features-dimensional data points. Each row
             corresponds to a single data point.
 
+        y : Ignored
+            Not used, present for API consistency by convention.
+
         Returns
         -------
-        self
+        self : object
+            The fitted mixture.
         """
         self.fit_predict(X, y)
         return self
@@ -209,6 +213,9 @@ def fit_predict(self, X, y=None):
             List of n_features-dimensional data points. Each row
             corresponds to a single data point.
 
+        y : Ignored
+            Not used, present for API consistency by convention.
+
         Returns
         -------
         labels : array, shape (n_samples,)
@@ -351,6 +358,9 @@ def score(self, X, y=None):
             List of n_features-dimensional data points. Each row
             corresponds to a single data point.
 
+        y : Ignored
+            Not used, present for API consistency by convention.
+
         Returns
         -------
         log_likelihood : float
@@ -407,11 +417,10 @@ def sample(self, n_samples=1):
         Returns
         -------
         X : array, shape (n_samples, n_features)
-            Randomly generated sample
+            Randomly generated sample.
 
         y : array, shape (nsamples,)
-            Component labels
-
+            Component labels.
         """
         check_is_fitted(self)
 

diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
@@ -133,7 +133,7 @@ class BayesianGaussianMixture(BaseMixture):
             'dirichlet_process' (using the Stick-breaking representation),
             'dirichlet_distribution' (can favor more uniform weights).
 
-    weight_concentration_prior : float | None, default=None.
+    weight_concentration_prior : float or None, default=None
         The dirichlet concentration of each component on the weight
         distribution (Dirichlet). This is commonly called gamma in the
         literature. The higher concentration puts more mass in
@@ -142,22 +142,22 @@ class BayesianGaussianMixture(BaseMixture):
         mixture weights simplex. The value of the parameter must be greater
         than 0. If it is None, it's set to ``1. / n_components``.
 
-    mean_precision_prior : float | None, default=None.
+    mean_precision_prior : float or None, default=None
         The precision prior on the mean distribution (Gaussian).
         Controls the extent of where means can be placed. Larger
         values concentrate the cluster means around `mean_prior`.
         The value of the parameter must be greater than 0.
         If it is None, it is set to 1.
 
-    mean_prior : array-like, shape (n_features,), default=None.
+    mean_prior : array-like, shape (n_features,), default=None
         The prior on the mean distribution (Gaussian).
         If it is None, it is set to the mean of X.
 
-    degrees_of_freedom_prior : float | None, default=None.
+    degrees_of_freedom_prior : float or None, default=None
         The prior of the number of degrees of freedom on the covariance
         distributions (Wishart). If it is None, it's set to `n_features`.
 
-    covariance_prior : float or array-like, default=None.
+    covariance_prior : float or array-like, default=None
         The prior on the covariance distribution (Wishart).
         If it is None, the emiprical covariance prior is initialized using the
         covariance of X. The shape depends on `covariance_type`::
@@ -297,18 +297,6 @@ class BayesianGaussianMixture(BaseMixture):
 
         .. versionadded:: 0.24
 
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from sklearn.mixture import BayesianGaussianMixture
-    >>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])
-    >>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)
-    >>> bgm.means_
-    array([[2.49... , 2.29...],
-           [8.45..., 4.52... ]])
-    >>> bgm.predict([[0, 0], [9, 3]])
-    array([0, 1])
-
     See Also
     --------
     GaussianMixture : Finite Gaussian mixture fit with EM.
@@ -328,6 +316,18 @@ class BayesianGaussianMixture(BaseMixture):
     .. [3] `Blei, David M. and Michael I. Jordan. (2006). "Variational
        inference for Dirichlet process mixtures". Bayesian analysis 1.1
        <https://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.mixture import BayesianGaussianMixture
+    >>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])
+    >>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)
+    >>> bgm.means_
+    array([[2.49... , 2.29...],
+           [8.45..., 4.52... ]])
+    >>> bgm.predict([[0, 0], [9, 3]])
+    array([0, 1])
     """
 
     def __init__(