scikit-learn · qinhanmin2014 · Sep 3, 2018 · Sep 3, 2018
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -838,9 +838,9 @@ algorithm builds a *reachability* graph, which assigns each sample both a
 ``reachability_`` distance, and a spot within the cluster ``ordering_``
 attribute; these two attributes are assigned when the model is fitted, and are
 used to determine cluster membership. If OPTICS is run with the default value
-of *inf* set for ``max_bound``, then DBSCAN style cluster extraction can be
+of *inf* set for ``max_eps``, then DBSCAN style cluster extraction can be
 performed in linear time for any given ``eps`` value using the
-``extract_dbscan`` method. Setting ``max_bound`` to a lower value will result
+``extract_dbscan`` method. Setting ``max_eps`` to a lower value will result
 in shorter run times, and can be thought of as the maximum cluster object size
 (in diameter) that OPTICS will be able to extract.
 
@@ -892,10 +892,10 @@ larger parent cluster.
     shorter run time than OPTICS; however, for repeated runs at varying ``eps``
     values, a single run of OPTICS may require less cumulative runtime than
     DBSCAN. It is also important to note that OPTICS output can be unstable at
-    ``eps`` values very close to the initial ``max_bound`` value. OPTICS seems
+    ``eps`` values very close to the initial ``max_eps`` value. OPTICS seems
     to produce near identical results to DBSCAN provided that ``eps`` passed to
     ``extract_dbscan`` is a half order of magnitude less than the inital
-    ``max_bound`` that was used to fit; using a value close to ``max_bound``
+    ``max_eps`` that was used to fit; using a value close to ``max_eps``
     will throw a warning, and using a value larger will result in an exception. 
 
 .. topic:: Computational Complexity
@@ -909,7 +909,7 @@ larger parent cluster.
     multithreaded, and has better algorithmic runtime complexity than OPTICS--
     at the cost of worse memory scaling. For extremely large datasets that
     exhaust system memory using HDBSCAN, OPTICS will maintain *n* (as opposed
-    to *n^2* memory scaling); however, tuning of the ``max_bound`` parameter
+    to *n^2* memory scaling); however, tuning of the ``max_eps`` parameter
     will likely need to be used to give a solution in a reasonable amount of
     wall time.
 

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
@@ -21,7 +21,7 @@
 from ._optics_inner import quick_scan
 
 
-def optics(X, min_samples=5, max_bound=np.inf, metric='euclidean',
+def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
            p=2, metric_params=None, maxima_ratio=.75,
            rejection_ratio=.7, similarity_threshold=0.4,
            significant_min=.003, min_cluster_size_ratio=.005,
@@ -45,11 +45,11 @@ def optics(X, min_samples=5, max_bound=np.inf, metric='euclidean',
         The number of samples in a neighborhood for a point to be considered
         as a core point.
 
-    max_bound : float, optional
+    max_eps : float, optional
         The maximum distance between two samples for them to be considered
         as in the same neighborhood. This is also the largest object size
         expected within the dataset. Default value of "np.inf" will identify
-        clusters across all scales; reducing `max_bound` will result in
+        clusters across all scales; reducing `max_eps` will result in
         shorter run times.
 
     metric : string or callable, optional
@@ -147,7 +147,7 @@ def optics(X, min_samples=5, max_bound=np.inf, metric='euclidean',
     Record 28, no. 2 (1999): 49-60.
     """
 
-    clust = OPTICS(min_samples, max_bound, metric, p, metric_params,
+    clust = OPTICS(min_samples, max_eps, metric, p, metric_params,
                    maxima_ratio, rejection_ratio,
                    similarity_threshold, significant_min,
                    min_cluster_size_ratio, min_maxima_ratio,
@@ -172,11 +172,11 @@ class OPTICS(BaseEstimator, ClusterMixin):
         The number of samples in a neighborhood for a point to be considered
         as a core point.
 
-    max_bound : float, optional
+    max_eps : float, optional
         The maximum distance between two samples for them to be considered
         as in the same neighborhood. This is also the largest object size
         expected within the dataset. Default value of "np.inf" will identify
-        clusters across all scales; reducing `max_bound` will result in
+        clusters across all scales; reducing `max_eps` will result in
         shorter run times.
 
     metric : string or callable, optional
@@ -284,14 +284,14 @@ class OPTICS(BaseEstimator, ClusterMixin):
     Record 28, no. 2 (1999): 49-60.
     """
 
-    def __init__(self, min_samples=5, max_bound=np.inf, metric='euclidean',
+    def __init__(self, min_samples=5, max_eps=np.inf, metric='euclidean',
                  p=2, metric_params=None, maxima_ratio=.75,
                  rejection_ratio=.7, similarity_threshold=0.4,
                  significant_min=.003, min_cluster_size_ratio=.005,
                  min_maxima_ratio=0.001, algorithm='ball_tree',
                  leaf_size=30, n_jobs=None):
 
-        self.max_bound = max_bound
+        self.max_eps = max_eps
         self.min_samples = min_samples
         self.maxima_ratio = maxima_ratio
         self.rejection_ratio = rejection_ratio
@@ -310,7 +310,7 @@ def fit(self, X, y=None):
         """Perform OPTICS clustering
 
         Extracts an ordered list of points and reachability distances, and
-        performs initial clustering using `max_bound` distance specified at
+        performs initial clustering using `max_eps` distance specified at
         OPTICS object instantiation.
 
         Parameters
@@ -378,7 +378,7 @@ def fit(self, X, y=None):
     def _expand_cluster_order(self, point, X, nbrs):
         # As above, not parallelizable. Parallelizing would allow items in
         # the 'unprocessed' list to switch to 'processed'
-        if self.core_distances_[point] <= self.max_bound:
+        if self.core_distances_[point] <= self.max_eps:
             while not self._processed[point]:
                 self._processed[point] = True
                 self.ordering_.append(point)
@@ -389,7 +389,7 @@ def _expand_cluster_order(self, point, X, nbrs):
 
     def _set_reach_dist(self, point_index, X, nbrs):
         P = np.array(X[point_index]).reshape(1, -1)
-        indices = nbrs.radius_neighbors(P, radius=self.max_bound,
+        indices = nbrs.radius_neighbors(P, radius=self.max_eps,
                                         return_distance=False)[0]
 
         # Getting indices of neighbors that have not been processed
@@ -416,17 +416,17 @@ def _set_reach_dist(self, point_index, X, nbrs):
     def extract_dbscan(self, eps):
         """Performs DBSCAN extraction for an arbitrary epsilon.
 
-        Extraction runs in linear time. Note that if the `max_bound` OPTICS
+        Extraction runs in linear time. Note that if the `max_eps` OPTICS
         parameter was set to < inf for extracting reachability and ordering
         arrays, DBSCAN extractions will be unstable for `eps` values close to
-        `max_bound`. Setting `eps` < (`max_bound` / 5.0) will guarantee
+        `max_eps`. Setting `eps` < (`max_eps` / 5.0) will guarantee
         extraction parity with DBSCAN.
 
         Parameters
         ----------
         eps : float or int, required
-            DBSCAN `eps` parameter. Must be set to < `max_bound`. Equivalence
-            with DBSCAN algorithm is achieved if `eps` is < (`max_bound` / 5)
+            DBSCAN `eps` parameter. Must be set to < `max_eps`. Equivalence
+            with DBSCAN algorithm is achieved if `eps` is < (`max_eps` / 5)
 
         Returns
         -------
@@ -438,14 +438,14 @@ def extract_dbscan(self, eps):
         """
         check_is_fitted(self, 'reachability_')
 
-        if eps > self.max_bound:
+        if eps > self.max_eps:
             raise ValueError('Specify an epsilon smaller than %s. Got %s.'
-                             % (self.max_bound, eps))
+                             % (self.max_eps, eps))
 
-        if eps * 5.0 > (self.max_bound * 1.05):
+        if eps * 5.0 > (self.max_eps * 1.05):
             warnings.warn(
-                "Warning, max_bound (%s) is close to eps (%s): "
-                "Output may be unstable." % (self.max_bound, eps),
+                "Warning, max_eps (%s) is close to eps (%s): "
+                "Output may be unstable." % (self.max_eps, eps),
                 RuntimeWarning, stacklevel=2)
         # Stability warning is documented in _extract_dbscan method...
 

diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
@@ -27,7 +27,7 @@ def test_correct_number_of_clusters():
     X = generate_clustered_data(n_clusters=n_clusters)
     # Parameters chosen specifically for this task.
     # Compute OPTICS
-    clust = OPTICS(max_bound=5.0 * 6.0, min_samples=4, metric='euclidean')
+    clust = OPTICS(max_eps=5.0 * 6.0, min_samples=4, metric='euclidean')
     clust.fit(X)
     # number of clusters, ignoring noise if present
     n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
@@ -41,7 +41,7 @@ def test_minimum_number_of_sample_check():
 
     # Compute OPTICS
     X = [[1, 1]]
-    clust = OPTICS(max_bound=5.0 * 0.3, min_samples=10)
+    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10)
 
     # Run the fit
     assert_raise_message(ValueError, msg, clust.fit, X)
@@ -51,7 +51,7 @@ def test_empty_extract():
     # Test extract where fit() has not yet been run.
     msg = ("This OPTICS instance is not fitted yet. Call 'fit' with "
            "appropriate arguments before using this method.")
-    clust = OPTICS(max_bound=5.0 * 0.3, min_samples=10)
+    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10)
     assert_raise_message(ValueError, msg, clust.extract_dbscan, 0.01)
 
 
@@ -63,7 +63,7 @@ def test_bad_extract():
                                 cluster_std=0.4, random_state=0)
 
     # Compute OPTICS
-    clust = OPTICS(max_bound=5.0 * 0.003, min_samples=10)
+    clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10)
     clust2 = clust.fit(X)
     assert_raise_message(ValueError, msg, clust2.extract_dbscan, 0.3)
 
@@ -76,7 +76,7 @@ def test_close_extract():
                                 cluster_std=0.4, random_state=0)
 
     # Compute OPTICS
-    clust = OPTICS(max_bound=1.0, min_samples=10)
+    clust = OPTICS(max_eps=1.0, min_samples=10)
     clust3 = clust.fit(X)
     # check warning when centers are passed
     assert_warns(RuntimeWarning, clust3.extract_dbscan, .3)