scikit-learn · ogrisel · Sep 10, 2016 · Aug 31, 2016
diff --git a/doc/modules/mixture.rst b/doc/modules/mixture.rst
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -419,10 +419,20 @@ API changes summary
      :class:`isotonic.IsotonicRegression`. By `Jonathan Arfa`_.
 
    - The old :class:`VBGMM` is deprecated in favor of the new
-     :class:`BayesianGaussianMixture`. The new class solves the computational
+     :class:`BayesianGaussianMixture` (with the parameter
+     ``weight_concentration_prior_type='dirichlet_distribution'``).
+     The new class solves the computational
+     problems of the old class and computes the Gaussian mixture with a
+     Dirichlet process prior faster than before.
+     (`#7295 <https://github.com/scikit-learn/scikit-learn/pull/7295>`_) by
+     `Wei Xue`_ and `Thierry Guillemot`_.
+
+   - The old :class:`VBGMM` is deprecated in favor of the new
+     :class:`BayesianGaussianMixture` (with the parameter
+     ``weight_concentration_prior_type='dirichlet_distribution'``).
+     The new class solves the computational
      problems of the old class and computes the Variational Bayesian Gaussian
      mixture faster than before.
-     Ref :ref:`b` for more information.
      (`#6651 <https://github.com/scikit-learn/scikit-learn/pull/6651>`_) by
      `Wei Xue`_ and `Thierry Guillemot`_.
 

diff --git a/examples/mixture/plot_bayesian_gaussian_mixture.py b/examples/mixture/plot_bayesian_gaussian_mixture.py
diff --git a/examples/mixture/plot_concentration_prior.py b/examples/mixture/plot_concentration_prior.py
@@ -0,0 +1,135 @@
+"""
+========================================================================
+Concentration Prior Type Analysis of Variation Bayesian Gaussian Mixture
+========================================================================
+
+This example plots the ellipsoids obtained from a toy dataset (mixture of three
+Gaussians) fitted by the ``BayesianGaussianMixture`` class models with a
+Dirichlet distribution prior
+(``weight_concentration_prior_type='dirichlet_distribution'``) and a Dirichlet
+process prior (``weight_concentration_prior_type='dirichlet_process'``). On
+each figure, we plot the results for three different values of the weight
+concentration prior.
+
+The ``BayesianGaussianMixture`` class can adapt its number of mixture
+componentsautomatically. The parameter ``weight_concentration_prior`` has a
+direct link with the resulting number of components with non-zero weights.
+Specifying a low value for the concentration prior will make the model put most
+of the weight on few components set the remaining components weights very close
+to zero. High values of the concentration prior will allow a larger number of
+components to be active in the mixture.
+
+The Dirichlet process prior allows to define an infinite number of components
+and automatically selects the correct number of components: it activates a
+component only if it is necessary.
+
+On the contrary the classical finite mixture model with a Dirichlet
+distribution prior will favor more uniformly weighted components and therefore
+tends to divide natural clusters into unnecessary sub-components.
+"""
+# Author: Thierry Guillemot <thierry.guillemot.work@gmail.com>
+# License: BSD 3 clause
+
+import numpy as np
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+
+from sklearn.mixture import BayesianGaussianMixture
+
+print(__doc__)
+
+
+def plot_ellipses(ax, weights, means, covars):
+    for n in range(means.shape[0]):
+        eig_vals, eig_vecs = np.linalg.eigh(covars[n])
+        unit_eig_vec = eig_vecs[0] / np.linalg.norm(eig_vecs[0])
+        angle = np.arctan2(unit_eig_vec[1], unit_eig_vec[0])
+        # Ellipse needs degrees
+        angle = 180 * angle / np.pi
+        # eigenvector normalization
+        eig_vals = 2 * np.sqrt(2) * np.sqrt(eig_vals)
+        ell = mpl.patches.Ellipse(means[n], eig_vals[0], eig_vals[1],
+                                  180 + angle)
+        ell.set_clip_box(ax.bbox)
+        ell.set_alpha(weights[n])
+        ell.set_facecolor('#56B4E9')
+        ax.add_artist(ell)
+
+
+def plot_results(ax1, ax2, estimator, X, y, title, plot_title=False):
+    ax1.set_title(title)
+    ax1.scatter(X[:, 0], X[:, 1], s=5, marker='o', color=colors[y], alpha=0.8)
+    ax1.set_xlim(-2., 2.)
+    ax1.set_ylim(-3., 3.)
+    ax1.set_xticks(())
+    ax1.set_yticks(())
+    plot_ellipses(ax1, estimator.weights_, estimator.means_,
+                  estimator.covariances_)
+
+    ax2.get_xaxis().set_tick_params(direction='out')
+    ax2.yaxis.grid(True, alpha=0.7)
+    for k, w in enumerate(estimator.weights_):
+        ax2.bar(k - .45, w, width=0.9, color='#56B4E9', zorder=3)
+        ax2.text(k, w + 0.007, "%.1f%%" % (w * 100.),
+                 horizontalalignment='center')
+    ax2.set_xlim(-.6, 2 * n_components - .4)
+    ax2.set_ylim(0., 1.1)
+    ax2.tick_params(axis='y', which='both', left='off',
+                    right='off', labelleft='off')
+    ax2.tick_params(axis='x', which='both', top='off')
+
+    if plot_title:
+        ax1.set_ylabel('Estimated Mixtures')
+        ax2.set_ylabel('Weight of each component')
+
+# Parameters of the dataset
+random_state, n_components, n_features = 2, 3, 2
+colors = np.array(['#0072B2', '#F0E442', '#D55E00'])
+
+covars = np.array([[[.7, .0], [.0, .1]],
+                   [[.5, .0], [.0, .1]],
+                   [[.5, .0], [.0, .1]]])
+samples = np.array([200, 500, 200])
+means = np.array([[.0, -.70],
+                  [.0, .0],
+                  [.0, .70]])
+
+# mean_precision_prior= 0.8 to minimize the influence of the prior
+estimators = [
+    ("Finite mixture with a Dirichlet distribution\nprior and "
+     r"$\gamma_0=$", BayesianGaussianMixture(
+        weight_concentration_prior_type="dirichlet_distribution",
+        n_components=2 * n_components, reg_covar=0, init_params='random',
+        max_iter=1500, mean_precision_prior=.8,
+        random_state=random_state), [0.001, 1, 1000]),
+    ("Infinite mixture with a Dirichlet process\n prior and" r"$\gamma_0=$",
+     BayesianGaussianMixture(
+        weight_concentration_prior_type="dirichlet_process",
+        n_components=2 * n_components, reg_covar=0, init_params='random',
+        max_iter=1500, mean_precision_prior=.8,
+        random_state=random_state), [1, 1000, 100000])]
+
+# Generate data
+rng = np.random.RandomState(random_state)
+X = np.vstack([
+    rng.multivariate_normal(means[j], covars[j], samples[j])
+    for j in range(n_components)])
+y = np.concatenate([j * np.ones(samples[j], dtype=int)
+                    for j in range(n_components)])
+
+# Plot results in two different figures
+for (title, estimator, concentrations_prior) in estimators:
+    plt.figure(figsize=(4.7 * 3, 8))
+    plt.subplots_adjust(bottom=.04, top=0.90, hspace=.05, wspace=.05,
+                        left=.03, right=.99)
+
+    gs = gridspec.GridSpec(3, len(concentrations_prior))
+    for k, concentration in enumerate(concentrations_prior):
+        estimator.weight_concentration_prior = concentration
+        estimator.fit(X)
+        plot_results(plt.subplot(gs[0:2, k]), plt.subplot(gs[2, k]), estimator,
+                     X, y, r"%s$%.1e$" % (title, concentration),
+                     plot_title=k == 0)
+
+plt.show()
diff --git a/examples/mixture/plot_gmm.py b/examples/mixture/plot_gmm.py
@@ -3,16 +3,18 @@
 Gaussian Mixture Model Ellipsoids
 =================================
 
-Plot the confidence ellipsoids of a mixture of two Gaussians with EM
-and variational Dirichlet process.
-
-Both models have access to five components with which to fit the
-data. Note that the EM model will necessarily use all five components
-while the DP model will effectively only use as many as are needed for
-a good fit. This is a property of the Dirichlet Process prior. Here we
-can see that the EM model splits some components arbitrarily, because it
-is trying to fit too many components, while the Dirichlet Process model
-adapts it number of state automatically.
+Plot the confidence ellipsoids of a mixture of two Gaussians
+obtained with Expectation Maximisation (``GaussianMixture`` class) and
+Variational Inference (``BayesianGaussianMixture`` class models with
+a Dirichlet process prior).
+
+Both models have access to five components with which to fit the data. Note
+that the Expectation Maximisation model will necessarily use all five
+components while the Variational Inference model will effectively only use as
+many as are needed for a good fit. Here we can see that the Expectation
+Maximisation model splits some components arbitrarily, because it is trying to
+fit too many components, while the Dirichlet Process model adapts it number of
+state automatically.
 
 This example doesn't show it, as we're in a low-dimensional space, but
 another advantage of the Dirichlet process model is that it can fit
@@ -56,7 +58,7 @@ def plot_results(X, Y_, means, covariances, index, title):
         ell.set_alpha(0.5)
         splot.add_artist(ell)
 
-    plt.xlim(-10., 10.)
+    plt.xlim(-9., 5.)
     plt.ylim(-3., 6.)
     plt.xticks(())
     plt.yticks(())
@@ -78,8 +80,9 @@ def plot_results(X, Y_, means, covariances, index, title):
              'Gaussian Mixture')
 
 # Fit a Dirichlet process Gaussian mixture using five components
-dpgmm = mixture.DPGMM(n_components=5, covariance_type='full').fit(X)
-plot_results(X, dpgmm.predict(X), dpgmm.means_, dpgmm._get_covars(), 1,
-             'Dirichlet Process GMM')
+dpgmm = mixture.BayesianGaussianMixture(n_components=5,
+                                        covariance_type='full').fit(X)
+plot_results(X, dpgmm.predict(X), dpgmm.means_, dpgmm.covariances_, 1,
+             'Bayesian Gaussian Mixture with a Dirichlet process prior')
 
 plt.show()