scikit-learn · ogrisel · Apr 29, 2025 · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/doc/whats_new/upcoming_changes/sklearn.manifold/31117.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.manifold/31117.enhancement.rst
@@ -0,0 +1,3 @@
+:class:`manifold.MDS` will switch to use `n_init=1` by default,
+starting from version 1.9.
+By :user:`Dmitry Kobak <dkobak>`
diff --git a/doc/whats_new/upcoming_changes/sklearn.manifold/31117.fix.rst b/doc/whats_new/upcoming_changes/sklearn.manifold/31117.fix.rst
@@ -0,0 +1,5 @@
+:class:`manifold.MDS` now uses `eps=1e-6` by default and the convergence
+criterion was adjusted to make sense for both metric and non-metric MDS
+and to follow the reference R implementation. The formula for normalized
+stress was adjusted to follow the original definition by Kruskal.
+By :user:`Dmitry Kobak <dkobak>`
diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py
@@ -166,7 +166,7 @@ def add_2d_scatter(ax, points, points_color, title=None):
 md_scaling = manifold.MDS(
     n_components=n_components,
     max_iter=50,
-    n_init=4,
+    n_init=1,
     random_state=0,
     normalized_stress=False,
 )

diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py
@@ -130,7 +130,7 @@ def plot_embedding(X, title):
     "LTSA LLE embedding": LocallyLinearEmbedding(
         n_neighbors=n_neighbors, n_components=2, method="ltsa"
     ),
-    "MDS embedding": MDS(n_components=2, n_init=1, max_iter=120, n_jobs=2),
+    "MDS embedding": MDS(n_components=2, n_init=1, max_iter=120, eps=1e-6),
     "Random Trees embedding": make_pipeline(
         RandomTreesEmbedding(n_estimators=200, max_depth=5, random_state=0),
         TruncatedSVD(n_components=2),

diff --git a/examples/manifold/plot_mds.py b/examples/manifold/plot_mds.py
@@ -5,14 +5,17 @@
 
 An illustration of the metric and non-metric MDS on generated noisy data.
 
-The reconstructed points using the metric MDS and non metric MDS are slightly
-shifted to avoid overlapping.
-
 """
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
+# %%
+# Dataset preparation
+# -------------------
+#
+# We start by uniformly generating 20 points in a 2D space.
+
 import numpy as np
 from matplotlib import pyplot as plt
 from matplotlib.collections import LineCollection
@@ -31,6 +34,11 @@
 # Center the data
 X_true -= X_true.mean()
 
+# %%
+# Now we compute pairwise distances between all points and add
+# a small amount of noise to the distance matrix. We make sure
+# to keep the noisy distance matrix symmetric.
+
 # Compute pairwise Euclidean distances
 distances = euclidean_distances(X_true)
 
@@ -40,10 +48,14 @@
 np.fill_diagonal(noise, 0)
 distances += noise
 
+# %%
+# Here we compute metric and non-metric MDS of the noisy distance matrix.
+
 mds = manifold.MDS(
     n_components=2,
     max_iter=3000,
     eps=1e-9,
+    n_init=1,
     random_state=42,
     dissimilarity="precomputed",
     n_jobs=1,
@@ -62,10 +74,16 @@
 )
 X_nmds = nmds.fit_transform(distances)
 
-# Rescale the data
-X_mds *= np.sqrt((X_true**2).sum()) / np.sqrt((X_mds**2).sum())
+# %%
+# Rescaling the non-metric MDS solution to match the spread of the original data.
+
 X_nmds *= np.sqrt((X_true**2).sum()) / np.sqrt((X_nmds**2).sum())
 
+# %%
+# To make the visual comparisons easier, we rotate the original data and both MDS
+# solutions to their PCA axes. And flip horizontal and vertical MDS axes, if needed,
+# to match the original data orientation.
+
 # Rotate the data
 pca = PCA(n_components=2)
 X_true = pca.fit_transform(X_true)
@@ -79,6 +97,9 @@
     if np.corrcoef(X_nmds[:, i], X_true[:, i])[0, 1] < 0:
         X_nmds[:, i] *= -1
 
+# %%
+# Finally, we plot the original data and both MDS reconstructions.
+
 fig = plt.figure(1)
 ax = plt.axes([0.0, 0.0, 1.0, 1.0])
 

diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py
@@ -27,7 +27,7 @@ def _smacof_single(
     init=None,
     max_iter=300,
     verbose=0,
-    eps=1e-3,
+    eps=1e-6,
     random_state=None,
     normalized_stress=False,
 ):
@@ -59,18 +59,21 @@ def _smacof_single(
     verbose : int, default=0
         Level of verbosity.
 
-    eps : float, default=1e-3
-        Relative tolerance with respect to stress at which to declare
-        convergence. The value of `eps` should be tuned separately depending
-        on whether or not `normalized_stress` is being used.
+    eps : float, default=1e-6
+        The tolerance with respect to stress (normalized by the sum of squared
+        embedding distances) at which to declare convergence.
+
+        .. versionchanged:: 1.7
+           The default value for `eps` has changed from 1e-3 to 1e-6, as a result
+           of a bugfix in the computation of the convergence criterion.
 
     random_state : int, RandomState instance or None, default=None
         Determines the random number generator used to initialize the centers.
         Pass an int for reproducible results across multiple function calls.
         See :term:`Glossary <random_state>`.
 
     normalized_stress : bool, default=False
-        Whether use and return normalized stress value (Stress-1) instead of raw
+        Whether to return normalized stress value (Stress-1) instead of raw
         stress.
 
         .. versionadded:: 1.2
@@ -168,29 +171,32 @@ def _smacof_single(
         # Compute stress
         distances = euclidean_distances(X)
         stress = ((distances.ravel() - disparities.ravel()) ** 2).sum() / 2
-        if normalized_stress:
-            stress = np.sqrt(stress / ((disparities.ravel() ** 2).sum() / 2))
 
-        normalization = np.sqrt((X**2).sum(axis=1)).sum()
         if verbose >= 2:  # pragma: no cover
             print(f"Iteration {it}, stress {stress:.4f}")
         if old_stress is not None:
-            if (old_stress - stress / normalization) < eps:
+            sum_squared_distances = (distances.ravel() ** 2).sum()
+            if ((old_stress - stress) / (sum_squared_distances / 2)) < eps:
                 if verbose:  # pragma: no cover
                     print("Convergence criterion reached.")
                 break
-        old_stress = stress / normalization
+        old_stress = stress
+
+    if normalized_stress:
+        sum_squared_distances = (distances.ravel() ** 2).sum()
+        stress = np.sqrt(stress / (sum_squared_distances / 2))
 
     return X, stress, it + 1
 
 
+# TODO(1.9): change default `n_init` to 1, see PR #31117
 @validate_params(
     {
         "dissimilarities": ["array-like"],
         "metric": ["boolean"],
         "n_components": [Interval(Integral, 1, None, closed="left")],
         "init": ["array-like", None],
-        "n_init": [Interval(Integral, 1, None, closed="left")],
+        "n_init": [Interval(Integral, 1, None, closed="left"), StrOptions({"warn"})],
         "n_jobs": [Integral, None],
         "max_iter": [Interval(Integral, 1, None, closed="left")],
         "verbose": ["verbose"],
@@ -207,11 +213,11 @@ def smacof(
     metric=True,
     n_components=2,
     init=None,
-    n_init=8,
+    n_init="warn",
     n_jobs=None,
     max_iter=300,
     verbose=0,
-    eps=1e-3,
+    eps=1e-6,
     random_state=None,
     return_n_iter=False,
     normalized_stress="auto",
@@ -262,6 +268,9 @@ def smacof(
         determined by the run with the smallest final stress. If ``init`` is
         provided, this option is overridden and a single run is performed.
 
+        .. versionchanged:: 1.9
+           The default value for `n_iter` will change from 8 to 1 in version 1.9.
+
     n_jobs : int, default=None
         The number of jobs to use for the computation. If multiple
         initializations are used (``n_init``), each run of the algorithm is
@@ -277,10 +286,13 @@ def smacof(
     verbose : int, default=0
         Level of verbosity.
 
-    eps : float, default=1e-3
-        Relative tolerance with respect to stress at which to declare
-        convergence. The value of `eps` should be tuned separately depending
-        on whether or not `normalized_stress` is being used.
+    eps : float, default=1e-6
+        The tolerance with respect to stress (normalized by the sum of squared
+        embedding distances) at which to declare convergence.
+
+        .. versionchanged:: 1.7
+           The default value for `eps` has changed from 1e-3 to 1e-6, as a result
+           of a bugfix in the computation of the convergence criterion.
 
     random_state : int, RandomState instance or None, default=None
         Determines the random number generator used to initialize the centers.
@@ -290,7 +302,7 @@ def smacof(
     return_n_iter : bool, default=False
         Whether or not to return the number of iterations.
 
-    normalized_stress : bool or "auto" default="auto"
+    normalized_stress : bool or "auto", default="auto"
         Whether to return normalized stress value (Stress-1) instead of raw
         stress. By default, metric MDS returns raw stress while non-metric MDS
         returns normalized stress.
@@ -335,17 +347,24 @@ def smacof(
     >>> import numpy as np
     >>> from sklearn.manifold import smacof
     >>> from sklearn.metrics import euclidean_distances
-    >>> X = np.array([[0, 1, 2], [1, 0, 3],[2, 3, 0]])
+    >>> X = np.array([[0, 1, 2], [1, 0, 3], [2, 3, 0]])
     >>> dissimilarities = euclidean_distances(X)
-    >>> mds_result, stress = smacof(dissimilarities, n_components=2, random_state=42)
-    >>> np.round(mds_result, 5)
-    array([[ 0.05352, -1.07253],
-           [ 1.74231, -0.75675],
-           [-1.79583,  1.82928]])
-    >>> np.round(stress, 5).item()
-    0.00128
+    >>> Z, stress = smacof(
+    ...     dissimilarities, n_components=2, n_init=1, eps=1e-6, random_state=42
+    ... )
+    >>> Z.shape
+    (3, 2)
+    >>> np.round(stress, 6).item()
+    3.2e-05
     """
 
+    if n_init == "warn":
+        warnings.warn(
+            "The default value of `n_init` will change from 8 to 1 in 1.9.",
+            FutureWarning,
+        )
+        n_init = 8
+
     dissimilarities = check_array(dissimilarities)
     random_state = check_random_state(random_state)
 
@@ -408,6 +427,7 @@ def smacof(
         return best_pos, best_stress
 
 
+# TODO(1.9): change default `n_init` to 1, see PR #31117
 class MDS(BaseEstimator):
     """Multidimensional scaling.
 
@@ -428,16 +448,22 @@ class MDS(BaseEstimator):
         initializations. The final results will be the best output of the runs,
         determined by the run with the smallest final stress.
 
+        .. versionchanged:: 1.9
+           The default value for `n_init` will change from 4 to 1 in version 1.9.
+
     max_iter : int, default=300
         Maximum number of iterations of the SMACOF algorithm for a single run.
 
     verbose : int, default=0
         Level of verbosity.
 
-    eps : float, default=1e-3
-        Relative tolerance with respect to stress at which to declare
-        convergence. The value of `eps` should be tuned separately depending
-        on whether or not `normalized_stress` is being used.
+    eps : float, default=1e-6
+        The tolerance with respect to stress (normalized by the sum of squared
+        embedding distances) at which to declare convergence.
+
+        .. versionchanged:: 1.7
+           The default value for `eps` has changed from 1e-3 to 1e-6, as a result
+           of a bugfix in the computation of the convergence criterion.
 
     n_jobs : int, default=None
         The number of jobs to use for the computation. If multiple
@@ -464,9 +490,9 @@ class MDS(BaseEstimator):
             ``fit_transform``.
 
     normalized_stress : bool or "auto" default="auto"
-        Whether use and return normalized stress value (Stress-1) instead of raw
-        stress. By default, metric MDS uses raw stress while non-metric MDS uses
-        normalized stress.
+        Whether to return normalized stress value (Stress-1) instead of raw
+        stress. By default, metric MDS returns raw stress while non-metric MDS
+        returns normalized stress.
 
         .. versionadded:: 1.2
 
@@ -539,7 +565,7 @@ class MDS(BaseEstimator):
     >>> X, _ = load_digits(return_X_y=True)
     >>> X.shape
     (1797, 64)
-    >>> embedding = MDS(n_components=2, normalized_stress='auto')
+    >>> embedding = MDS(n_components=2, n_init=1)
     >>> X_transformed = embedding.fit_transform(X[:100])
     >>> X_transformed.shape
     (100, 2)
@@ -554,7 +580,7 @@ class MDS(BaseEstimator):
     _parameter_constraints: dict = {
         "n_components": [Interval(Integral, 1, None, closed="left")],
         "metric": ["boolean"],
-        "n_init": [Interval(Integral, 1, None, closed="left")],
+        "n_init": [Interval(Integral, 1, None, closed="left"), StrOptions({"warn"})],
         "max_iter": [Interval(Integral, 1, None, closed="left")],
         "verbose": ["verbose"],
         "eps": [Interval(Real, 0.0, None, closed="left")],
@@ -569,10 +595,10 @@ def __init__(
         n_components=2,
         *,
         metric=True,
-        n_init=4,
+        n_init="warn",
         max_iter=300,
         verbose=0,
-        eps=1e-3,
+        eps=1e-6,
         n_jobs=None,
         random_state=None,
         dissimilarity="euclidean",
@@ -646,10 +672,20 @@ def fit_transform(self, X, y=None, init=None):
         X_new : ndarray of shape (n_samples, n_components)
             X transformed in the new space.
         """
+
+        if self.n_init == "warn":
+            warnings.warn(
+                "The default value of `n_init` will change from 4 to 1 in 1.9.",
+                FutureWarning,
+            )
+            self._n_init = 4
+        else:
+            self._n_init = self.n_init
+
         X = validate_data(self, X)
         if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":
             warnings.warn(
-                "The MDS API has changed. ``fit`` now constructs an"
+                "The MDS API has changed. ``fit`` now constructs a"
                 " dissimilarity matrix from data. To use a custom "
                 "dissimilarity matrix, set "
                 "``dissimilarity='precomputed'``."
@@ -665,7 +701,7 @@ def fit_transform(self, X, y=None, init=None):
             metric=self.metric,
             n_components=self.n_components,
             init=init,
-            n_init=self.n_init,
+            n_init=self._n_init,
             n_jobs=self.n_jobs,
             max_iter=self.max_iter,
             verbose=self.verbose,