From 75548b7ab2be3af3e36aa077461a18bfd8ab68e0 Mon Sep 17 00:00:00 2001 From: Claire Savard Date: Sun, 1 Dec 2019 20:18:31 -0700 Subject: [PATCH 1/2] fixed default values in dbscan --- sklearn/cluster/_dbscan.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 3b3ccb1fbe6dc..a47b242a19951 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -156,18 +156,18 @@ class DBSCAN(ClusterMixin, BaseEstimator): Parameters ---------- - eps : float, optional + eps : float, default=0.5 The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function. - min_samples : int, optional + min_samples : int, default=5 The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself. - metric : string, or callable + metric : string, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for @@ -179,27 +179,27 @@ class DBSCAN(ClusterMixin, BaseEstimator): .. versionadded:: 0.17 metric *precomputed* to accept precomputed sparse matrix. - metric_params : dict, optional + metric_params : dict, default=None Additional keyword arguments for the metric function. .. versionadded:: 0.19 - algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional + algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto' The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors. See NearestNeighbors module documentation for details. - leaf_size : int, optional (default = 30) + leaf_size : int, default=30 Leaf size passed to BallTree or cKDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem. - p : float, optional + p : float, default=None The power of the Minkowski metric to be used to calculate distance between points. - n_jobs : int or None, optional (default=None) + n_jobs : int or None, default=None The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` From 7dde01234bdeb7eed0c28faee3e7667981ee6a38 Mon Sep 17 00:00:00 2001 From: Claire Savard Date: Sun, 1 Dec 2019 20:41:33 -0700 Subject: [PATCH 2/2] fixed metric default value for dbscan --- sklearn/cluster/_dbscan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index a47b242a19951..a464e3951673a 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -167,7 +167,7 @@ class DBSCAN(ClusterMixin, BaseEstimator): The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself. - metric : string, default='euclidean' + metric : string, or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for