Skip to content

Commit 941ffbd

Browse files
authored
MNT Correct typos and simplify logic (scikit-learn#29897)
1 parent 54d1ec9 commit 941ffbd

File tree

3 files changed

+29
-25
lines changed

3 files changed

+29
-25
lines changed

sklearn/cluster/_hdbscan/_linkage.pyx

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ cpdef cnp.ndarray[MST_edge_t, ndim=1, mode='c'] mst_from_mutual_reachability(
7474
Returns
7575
-------
7676
mst : ndarray of shape (n_samples - 1,), dtype=MST_edge_dtype
77-
The MST representation of the mutual-reahability graph. The MST is
78-
represented as a collecteion of edges.
77+
The MST representation of the mutual-reachability graph. The MST is
78+
represented as a collection of edges.
7979
"""
8080
cdef:
8181
# Note: we utilize ndarray's over memory-views to make use of numpy
@@ -136,8 +136,8 @@ cpdef cnp.ndarray[MST_edge_t, ndim=1, mode='c'] mst_from_data_matrix(
136136
Returns
137137
-------
138138
mst : ndarray of shape (n_samples - 1,), dtype=MST_edge_dtype
139-
The MST representation of the mutual-reahability graph. The MST is
140-
represented as a collecteion of edges.
139+
The MST representation of the mutual-reachability graph. The MST is
140+
represented as a collection of edges.
141141
"""
142142

143143
cdef:
@@ -163,6 +163,8 @@ cpdef cnp.ndarray[MST_edge_t, ndim=1, mode='c'] mst_from_data_matrix(
163163

164164
current_node = 0
165165

166+
# The following loop dynamically updates minimum reachability node-by-node,
167+
# avoiding unnecessary computation where possible.
166168
for i in range(0, n_samples - 1):
167169

168170
in_tree[current_node] = 1
@@ -194,25 +196,27 @@ cpdef cnp.ndarray[MST_edge_t, ndim=1, mode='c'] mst_from_data_matrix(
194196
next_node_core_dist,
195197
pair_distance
196198
)
197-
if mutual_reachability_distance > next_node_min_reach:
198-
if next_node_min_reach < new_reachability:
199-
new_reachability = next_node_min_reach
200-
source_node = next_node_source
201-
new_node = j
202-
continue
203199

200+
# If MRD(i, j) is smaller than node j's min_reachability, we update
201+
# node j's min_reachability for future reference.
204202
if mutual_reachability_distance < next_node_min_reach:
205203
min_reachability[j] = mutual_reachability_distance
206204
current_sources[j] = current_node
205+
206+
# If MRD(i, j) is also smaller than node i's current
207+
# min_reachability, we update and set their edge as the current
208+
# MST edge candidate.
207209
if mutual_reachability_distance < new_reachability:
208210
new_reachability = mutual_reachability_distance
209211
source_node = current_node
210212
new_node = j
211-
else:
212-
if next_node_min_reach < new_reachability:
213-
new_reachability = next_node_min_reach
214-
source_node = next_node_source
215-
new_node = j
213+
214+
# If the node j is closer to another node already in the tree, we
215+
# make their edge the current MST candidate edge.
216+
elif next_node_min_reach < new_reachability:
217+
new_reachability = next_node_min_reach
218+
source_node = next_node_source
219+
new_node = j
216220

217221
mst[i].current_node = source_node
218222
mst[i].next_node = new_node
@@ -227,8 +231,8 @@ cpdef cnp.ndarray[HIERARCHY_t, ndim=1, mode="c"] make_single_linkage(const MST_e
227231
Parameters
228232
----------
229233
mst : ndarray of shape (n_samples - 1,), dtype=MST_edge_dtype
230-
The MST representation of the mutual-reahability graph. The MST is
231-
represented as a collecteion of edges.
234+
The MST representation of the mutual-reachability graph. The MST is
235+
represented as a collection of edges.
232236
233237
Returns
234238
-------

sklearn/cluster/_hdbscan/_reachability.pyx

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def _dense_mutual_reachability_graph(
124124
"""
125125
cdef:
126126
intp_t i, j, n_samples = distance_matrix.shape[0]
127-
floating mutual_reachibility_distance
127+
floating mutual_reachability_distance
128128
floating[::1] core_distances
129129

130130
# We assume that the distance matrix is symmetric. We choose to sort every
@@ -141,12 +141,12 @@ def _dense_mutual_reachability_graph(
141141
# _openmp_effective_n_threads
142142
for i in range(n_samples):
143143
for j in range(n_samples):
144-
mutual_reachibility_distance = max(
144+
mutual_reachability_distance = max(
145145
core_distances[i],
146146
core_distances[j],
147147
distance_matrix[i, j],
148148
)
149-
distance_matrix[i, j] = mutual_reachibility_distance
149+
distance_matrix[i, j] = mutual_reachability_distance
150150

151151

152152
def _sparse_mutual_reachability_graph(
@@ -179,7 +179,7 @@ def _sparse_mutual_reachability_graph(
179179
"""
180180
cdef:
181181
integral i, col_ind, row_ind
182-
floating mutual_reachibility_distance
182+
floating mutual_reachability_distance
183183
floating[:] core_distances
184184
floating[:] row_data
185185

@@ -203,10 +203,10 @@ def _sparse_mutual_reachability_graph(
203203
for row_ind in range(n_samples):
204204
for i in range(indptr[row_ind], indptr[row_ind + 1]):
205205
col_ind = indices[i]
206-
mutual_reachibility_distance = max(
206+
mutual_reachability_distance = max(
207207
core_distances[row_ind], core_distances[col_ind], data[i]
208208
)
209-
if isfinite(mutual_reachibility_distance):
210-
data[i] = mutual_reachibility_distance
209+
if isfinite(mutual_reachability_distance):
210+
data[i] = mutual_reachability_distance
211211
elif max_distance > 0:
212212
data[i] = max_distance

sklearn/cluster/_hdbscan/_tree.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ cpdef cnp.ndarray[CONDENSED_t, ndim=1, mode='c'] _condense_tree(
184184
left_count = 1
185185

186186
if right >= n_samples:
187-
right_count = <cnp.intp_t> hierarchy[right - n_samples].cluster_size
187+
right_count = hierarchy[right - n_samples].cluster_size
188188
else:
189189
right_count = 1
190190

0 commit comments

Comments
 (0)