diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 0f46f28cd340b..932ca1e01b6d6 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -619,6 +619,13 @@ Changelog `n_targets`, which is used to decide the number of outputs when sampling from the prior distributions. :pr:`23099` by :user:`Zhehao Liu `. +:mod:`sklearn.mixture` +...................... + +- |Efficiency| :class:`GaussianMixture` is more efficient now and will bypass unnecessary + initialization if the weights, means, and precisions are given by users. + :pr:`26021` by :user:`Jiawei Zhang `. + :mod:`sklearn.model_selection` .............................. diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 9fcc791032c48..09e3674a6779f 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -754,6 +754,19 @@ def _check_parameters(self, X): n_features, ) + def _initialize_parameters(self, X, random_state): + # If all the initial parameters are all provided, then there is no need to run + # the initialization. + compute_resp = ( + self.weights_init is None + or self.means_init is None + or self.precisions_init is None + ) + if compute_resp: + super()._initialize_parameters(X, random_state) + else: + self._initialize(X, None) + def _initialize(self, X, resp): """Initialization of the Gaussian mixture parameters. @@ -764,11 +777,13 @@ def _initialize(self, X, resp): resp : array-like of shape (n_samples, n_components) """ n_samples, _ = X.shape - - weights, means, covariances = _estimate_gaussian_parameters( - X, resp, self.reg_covar, self.covariance_type - ) - weights /= n_samples + weights, means, covariances = None, None, None + if resp is not None: + weights, means, covariances = _estimate_gaussian_parameters( + X, resp, self.reg_covar, self.covariance_type + ) + if self.weights_init is None: + weights /= n_samples self.weights_ = weights if self.weights_init is None else self.weights_init self.means_ = means if self.means_init is None else self.means_init diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 2f39033faed6b..e24a6af966374 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -8,11 +8,13 @@ import sys import warnings from io import StringIO +from unittest.mock import Mock import numpy as np import pytest from scipy import linalg, stats +import sklearn from sklearn.cluster import KMeans from sklearn.covariance import EmpiricalCovariance from sklearn.datasets import make_spd_matrix @@ -1387,3 +1389,34 @@ def test_gaussian_mixture_single_component_stable(): X = rng.multivariate_normal(np.zeros(2), np.identity(2), size=3) gm = GaussianMixture(n_components=1) gm.fit(X).sample() + + +def test_gaussian_mixture_all_init_does_not_estimate_gaussian_parameters( + monkeypatch, + global_random_seed, +): + """When all init parameters are provided, the Gaussian parameters + are not estimated. + + Non-regression test for gh-26015. + """ + + mock = Mock(side_effect=_estimate_gaussian_parameters) + monkeypatch.setattr( + sklearn.mixture._gaussian_mixture, "_estimate_gaussian_parameters", mock + ) + + rng = np.random.RandomState(global_random_seed) + rand_data = RandomData(rng) + + gm = GaussianMixture( + n_components=rand_data.n_components, + weights_init=rand_data.weights, + means_init=rand_data.means, + precisions_init=rand_data.precisions["full"], + random_state=rng, + ) + gm.fit(rand_data.X["full"]) + # The initial gaussian parameters are not estimated. They are estimated for every + # m_step. + assert mock.call_count == gm.n_iter_