diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index 0cd32023de46c..4a1a80c9065c2 100644 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -34,19 +34,21 @@ class OPTICS(ClusterMixin, BaseEstimator): """Estimate clustering structure from vector array. OPTICS (Ordering Points To Identify the Clustering Structure), closely - related to DBSCAN, finds core sample of high density and expands clusters - from them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable + related to DBSCAN, finds core samples of high density and expands clusters + from them [1]_. Unlike DBSCAN, it keeps cluster hierarchy for a variable neighborhood radius. Better suited for usage on large datasets than the - current sklearn implementation of DBSCAN. + current scikit-learn implementation of DBSCAN. - Clusters are then extracted using a DBSCAN-like method - (cluster_method = 'dbscan') or an automatic + Clusters are then extracted from the cluster-order using a + DBSCAN-like method (cluster_method = 'dbscan') or an automatic technique proposed in [1]_ (cluster_method = 'xi'). This implementation deviates from the original OPTICS by first performing - k-nearest-neighborhood searches on all points to identify core sizes, then - computing only the distances to unprocessed points when constructing the - cluster order. Note that we do not employ a heap to manage the expansion + k-nearest-neighborhood searches on all points to identify core sizes of + all points (instead of computing neighbors while looping through points). + Reachability distances to only unprocessed points are then computed, to + construct the cluster order, similar to the original OPTICS. + Note that we do not employ a heap to manage the expansion candidates, so the time complexity will be O(n^2). Read more in the :ref:`User Guide `. @@ -68,9 +70,9 @@ class OPTICS(ClusterMixin, BaseEstimator): metric : str or callable, default='minkowski' Metric to use for distance computation. Any metric from scikit-learn - or scipy.spatial.distance can be used. + or :mod:`scipy.spatial.distance` can be used. - If metric is a callable function, it is called on each + If `metric` is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less @@ -90,8 +92,7 @@ class OPTICS(ClusterMixin, BaseEstimator): 'yule'] Sparse matrices are only supported by scikit-learn metrics. - See the documentation for scipy.spatial.distance for details on these - metrics. + See :mod:`scipy.spatial.distance` for details on these metrics. .. note:: `'kulsinski'` is deprecated from SciPy 1.9 and will be removed in SciPy 1.11. @@ -105,9 +106,9 @@ class OPTICS(ClusterMixin, BaseEstimator): metric_params : dict, default=None Additional keyword arguments for the metric function. - cluster_method : str, default='xi' + cluster_method : {'xi', 'dbscan'}, default='xi' The extraction method used to extract clusters using the calculated - reachability and ordering. Possible values are "xi" and "dbscan". + reachability and ordering. eps : float, default=None The maximum distance between two samples for one to be considered as