Skip to content

Commit b5f99d8

Browse files
rick2047jnothman
authored andcommitted
Fix for AttributeError thrown when calling metrics.pairwise_distances with binary metrics and Y is None (scikit-learn#13864)
1 parent 65b279e commit b5f99d8

File tree

3 files changed

+39
-7
lines changed

3 files changed

+39
-7
lines changed

doc/whats_new/v0.21.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,29 @@
22

33
.. currentmodule:: sklearn
44

5+
.. _changes_0_21_1:
6+
7+
Version 0.21.1
8+
==============
9+
10+
**May 2019**
11+
12+
13+
This is a bug-fix release with some minor documentation improvements and
14+
enhancements to features released in 0.21.0.
15+
16+
Changelog
17+
---------
18+
19+
:mod:`sklearn.metrics`
20+
......................
21+
22+
- |Fix| Fixed a bug in :class:`metrics.pairwise_distances` where it would raise
23+
``AttributeError`` for boolean metrics when ``X`` had a boolean dtype and
24+
``Y == None``.
25+
:issue:`13864` by :user:`Paresh Mathur <rick2047>`.
26+
27+
528
.. _changes_0_21:
629

730
Version 0.21.0

sklearn/metrics/pairwise.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ def _euclidean_distances_upcast(X, XX=None, Y=None, YY=None):
306306
maxmem = max(
307307
((x_density * n_samples_X + y_density * n_samples_Y) * n_features
308308
+ (x_density * n_samples_X * y_density * n_samples_Y)) / 10,
309-
10 * 2**17)
309+
10 * 2 ** 17)
310310

311311
# The increase amount of memory in 8-byte blocks is:
312312
# - x_density * batch_size * n_features (copy of chunk of X)
@@ -315,7 +315,7 @@ def _euclidean_distances_upcast(X, XX=None, Y=None, YY=None):
315315
# Hence x² + (xd+yd)kx = M, where x=batch_size, k=n_features, M=maxmem
316316
# xd=x_density and yd=y_density
317317
tmp = (x_density + y_density) * n_features
318-
batch_size = (-tmp + np.sqrt(tmp**2 + 4 * maxmem)) / 2
318+
batch_size = (-tmp + np.sqrt(tmp ** 2 + 4 * maxmem)) / 2
319319
batch_size = max(int(batch_size), 1)
320320

321321
x_batches = gen_batches(X.shape[0], batch_size)
@@ -916,7 +916,7 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):
916916
K = safe_sparse_dot(X, Y.T, dense_output=True)
917917
K *= gamma
918918
K += coef0
919-
np.tanh(K, K) # compute tanh in-place
919+
np.tanh(K, K) # compute tanh in-place
920920
return K
921921

922922

@@ -949,7 +949,7 @@ def rbf_kernel(X, Y=None, gamma=None):
949949

950950
K = euclidean_distances(X, Y, squared=True)
951951
K *= -gamma
952-
np.exp(K, K) # exponentiate K in-place
952+
np.exp(K, K) # exponentiate K in-place
953953
return K
954954

955955

@@ -983,7 +983,7 @@ def laplacian_kernel(X, Y=None, gamma=None):
983983
gamma = 1.0 / X.shape[1]
984984

985985
K = -gamma * manhattan_distances(X, Y)
986-
np.exp(K, K) # exponentiate K in-place
986+
np.exp(K, K) # exponentiate K in-place
987987
return K
988988

989989

@@ -1561,7 +1561,8 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=None, **kwds):
15611561

15621562
dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None
15631563

1564-
if dtype == bool and (X.dtype != bool or Y.dtype != bool):
1564+
if (dtype == bool and
1565+
(X.dtype != bool or (Y is not None and Y.dtype != bool))):
15651566
msg = "Data was converted to boolean for metric %s" % metric
15661567
warnings.warn(msg, DataConversionWarning)
15671568

@@ -1592,7 +1593,6 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=None, **kwds):
15921593
'yule',
15931594
]
15941595

1595-
15961596
# Helper functions - distance
15971597
PAIRWISE_KERNEL_FUNCTIONS = {
15981598
# If updating this dictionary, update the doc in both distance_metrics()

sklearn/metrics/tests/test_pairwise.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,15 @@ def test_pairwise_boolean_distance(metric):
173173
with pytest.warns(DataConversionWarning, match=msg):
174174
pairwise_distances(X, metric=metric)
175175

176+
# Check that the warning is raised if X is boolean by Y is not boolean:
177+
with pytest.warns(DataConversionWarning, match=msg):
178+
pairwise_distances(X.astype(bool), Y=Y, metric=metric)
179+
180+
# Check that no warning is raised if X is already boolean and Y is None:
181+
with pytest.warns(None) as records:
182+
pairwise_distances(X.astype(bool), metric=metric)
183+
assert len(records) == 0
184+
176185

177186
def test_no_data_conversion_warning():
178187
# No warnings issued if metric is not a boolean distance function

0 commit comments

Comments
 (0)