Skip to content

Commit c61a694

Browse files
committed
COSMIT Unix line endings
1 parent 413e035 commit c61a694

File tree

1 file changed

+178
-178
lines changed

1 file changed

+178
-178
lines changed
Lines changed: 178 additions & 178 deletions
Original file line numberDiff line numberDiff line change
@@ -1,178 +1,178 @@
1-
from functools import partial
2-
3-
import pytest
4-
import numpy as np
5-
6-
from sklearn.metrics.cluster import adjusted_mutual_info_score
7-
from sklearn.metrics.cluster import adjusted_rand_score
8-
from sklearn.metrics.cluster import completeness_score
9-
from sklearn.metrics.cluster import fowlkes_mallows_score
10-
from sklearn.metrics.cluster import homogeneity_score
11-
from sklearn.metrics.cluster import mutual_info_score
12-
from sklearn.metrics.cluster import normalized_mutual_info_score
13-
from sklearn.metrics.cluster import v_measure_score
14-
from sklearn.metrics.cluster import silhouette_score
15-
from sklearn.metrics.cluster import calinski_harabaz_score
16-
17-
from sklearn.utils.testing import assert_allclose
18-
19-
20-
# Dictionaries of metrics
21-
# ------------------------
22-
# The goal of having those dictionaries is to have an easy way to call a
23-
# particular metric and associate a name to each function:
24-
# - SUPERVISED_METRICS: all supervised cluster metrics - (when given a
25-
# ground truth value)
26-
# - UNSUPERVISED_METRICS: all unsupervised cluster metrics
27-
#
28-
# Those dictionaries will be used to test systematically some invariance
29-
# properties, e.g. invariance toward several input layout.
30-
#
31-
32-
SUPERVISED_METRICS = {
33-
"adjusted_mutual_info_score": adjusted_mutual_info_score,
34-
"adjusted_rand_score": adjusted_rand_score,
35-
"completeness_score": completeness_score,
36-
"homogeneity_score": homogeneity_score,
37-
"mutual_info_score": mutual_info_score,
38-
"normalized_mutual_info_score": normalized_mutual_info_score,
39-
"v_measure_score": v_measure_score,
40-
"fowlkes_mallows_score": fowlkes_mallows_score
41-
}
42-
43-
UNSUPERVISED_METRICS = {
44-
"silhouette_score": silhouette_score,
45-
"silhouette_manhattan": partial(silhouette_score, metric='manhattan'),
46-
"calinski_harabaz_score": calinski_harabaz_score
47-
}
48-
49-
# Lists of metrics with common properties
50-
# ---------------------------------------
51-
# Lists of metrics with common properties are used to test systematically some
52-
# functionalities and invariance, e.g. SYMMETRIC_METRICS lists all metrics
53-
# that are symmetric with respect to their input argument y_true and y_pred.
54-
#
55-
# --------------------------------------------------------------------
56-
# Symmetric with respect to their input arguments y_true and y_pred.
57-
# Symmetric metrics only apply to supervised clusters.
58-
SYMMETRIC_METRICS = [
59-
"adjusted_rand_score", "v_measure_score",
60-
"mutual_info_score", "adjusted_mutual_info_score",
61-
"normalized_mutual_info_score", "fowlkes_mallows_score"
62-
]
63-
64-
NON_SYMMETRIC_METRICS = ["homogeneity_score", "completeness_score"]
65-
66-
# Metrics whose upper bound is 1
67-
NORMALIZED_METRICS = [
68-
"adjusted_rand_score", "homogeneity_score", "completeness_score",
69-
"v_measure_score", "adjusted_mutual_info_score", "fowlkes_mallows_score",
70-
"normalized_mutual_info_score"
71-
]
72-
73-
74-
rng = np.random.RandomState(0)
75-
y1 = rng.randint(3, size=30)
76-
y2 = rng.randint(3, size=30)
77-
78-
79-
def test_symmetric_non_symmetric_union():
80-
assert (sorted(SYMMETRIC_METRICS + NON_SYMMETRIC_METRICS) ==
81-
sorted(SUPERVISED_METRICS))
82-
83-
84-
@pytest.mark.parametrize(
85-
'metric_name, y1, y2',
86-
[(name, y1, y2) for name in SYMMETRIC_METRICS]
87-
)
88-
def test_symmetry(metric_name, y1, y2):
89-
metric = SUPERVISED_METRICS[metric_name]
90-
assert metric(y1, y2) == pytest.approx(metric(y2, y1))
91-
92-
93-
@pytest.mark.parametrize(
94-
'metric_name, y1, y2',
95-
[(name, y1, y2) for name in NON_SYMMETRIC_METRICS]
96-
)
97-
def test_non_symmetry(metric_name, y1, y2):
98-
metric = SUPERVISED_METRICS[metric_name]
99-
assert metric(y1, y2) != pytest.approx(metric(y2, y1))
100-
101-
102-
@pytest.mark.parametrize(
103-
"metric_name",
104-
[name for name in NORMALIZED_METRICS]
105-
)
106-
def test_normalized_output(metric_name):
107-
upper_bound_1 = [0, 0, 0, 1, 1, 1]
108-
upper_bound_2 = [0, 0, 0, 1, 1, 1]
109-
metric = SUPERVISED_METRICS[metric_name]
110-
assert metric([0, 0, 0, 1, 1], [0, 0, 0, 1, 2]) > 0.0
111-
assert metric([0, 0, 1, 1, 2], [0, 0, 1, 1, 1]) > 0.0
112-
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
113-
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
114-
assert metric(upper_bound_1, upper_bound_2) == pytest.approx(1.0)
115-
116-
lower_bound_1 = [0, 0, 0, 0, 0, 0]
117-
lower_bound_2 = [0, 1, 2, 3, 4, 5]
118-
score = np.array([metric(lower_bound_1, lower_bound_2),
119-
metric(lower_bound_2, lower_bound_1)])
120-
assert not (score < 0).any()
121-
122-
123-
# All clustering metrics do not change score due to permutations of labels
124-
# that is when 0 and 1 exchanged.
125-
@pytest.mark.parametrize(
126-
"metric_name",
127-
[name for name in dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)]
128-
)
129-
def test_permute_labels(metric_name):
130-
y_label = np.array([0, 0, 0, 1, 1, 0, 1])
131-
y_pred = np.array([1, 0, 1, 0, 1, 1, 0])
132-
if metric_name in SUPERVISED_METRICS:
133-
metric = SUPERVISED_METRICS[metric_name]
134-
score_1 = metric(y_pred, y_label)
135-
assert_allclose(score_1, metric(1 - y_pred, y_label))
136-
assert_allclose(score_1, metric(1 - y_pred, 1 - y_label))
137-
assert_allclose(score_1, metric(y_pred, 1 - y_label))
138-
else:
139-
metric = UNSUPERVISED_METRICS[metric_name]
140-
X = np.random.randint(10, size=(7, 10))
141-
score_1 = metric(X, y_pred)
142-
assert_allclose(score_1, metric(X, 1 - y_pred))
143-
144-
145-
# For all clustering metrics Input parameters can be both
146-
@pytest.mark.parametrize(
147-
"metric_name",
148-
[name for name in dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)]
149-
)
150-
# in the form of arrays lists, positive, negetive or string
151-
def test_format_invariance(metric_name):
152-
y_true = [0, 0, 0, 0, 1, 1, 1, 1]
153-
y_pred = [0, 1, 2, 3, 4, 5, 6, 7]
154-
155-
def generate_formats(y):
156-
y = np.array(y)
157-
yield y, 'array of ints'
158-
yield y.tolist(), 'list of ints'
159-
yield [str(x) for x in y.tolist()], 'list of strs'
160-
yield y - 1, 'including negative ints'
161-
yield y + 1, 'strictly positive ints'
162-
163-
if metric_name in SUPERVISED_METRICS:
164-
metric = SUPERVISED_METRICS[metric_name]
165-
score_1 = metric(y_true, y_pred)
166-
y_true_gen = generate_formats(y_true)
167-
y_pred_gen = generate_formats(y_pred)
168-
for (y_true_fmt, fmt_name), (y_pred_fmt, _) in zip(y_true_gen,
169-
y_pred_gen):
170-
assert score_1 == metric(y_true_fmt, y_pred_fmt)
171-
else:
172-
metric = UNSUPERVISED_METRICS[metric_name]
173-
X = np.random.randint(10, size=(8, 10))
174-
score_1 = metric(X, y_true)
175-
assert score_1 == metric(X.astype(float), y_true)
176-
y_true_gen = generate_formats(y_true)
177-
for (y_true_fmt, fmt_name) in y_true_gen:
178-
assert score_1 == metric(X, y_true_fmt)
1+
from functools import partial
2+
3+
import pytest
4+
import numpy as np
5+
6+
from sklearn.metrics.cluster import adjusted_mutual_info_score
7+
from sklearn.metrics.cluster import adjusted_rand_score
8+
from sklearn.metrics.cluster import completeness_score
9+
from sklearn.metrics.cluster import fowlkes_mallows_score
10+
from sklearn.metrics.cluster import homogeneity_score
11+
from sklearn.metrics.cluster import mutual_info_score
12+
from sklearn.metrics.cluster import normalized_mutual_info_score
13+
from sklearn.metrics.cluster import v_measure_score
14+
from sklearn.metrics.cluster import silhouette_score
15+
from sklearn.metrics.cluster import calinski_harabaz_score
16+
17+
from sklearn.utils.testing import assert_allclose
18+
19+
20+
# Dictionaries of metrics
21+
# ------------------------
22+
# The goal of having those dictionaries is to have an easy way to call a
23+
# particular metric and associate a name to each function:
24+
# - SUPERVISED_METRICS: all supervised cluster metrics - (when given a
25+
# ground truth value)
26+
# - UNSUPERVISED_METRICS: all unsupervised cluster metrics
27+
#
28+
# Those dictionaries will be used to test systematically some invariance
29+
# properties, e.g. invariance toward several input layout.
30+
#
31+
32+
SUPERVISED_METRICS = {
33+
"adjusted_mutual_info_score": adjusted_mutual_info_score,
34+
"adjusted_rand_score": adjusted_rand_score,
35+
"completeness_score": completeness_score,
36+
"homogeneity_score": homogeneity_score,
37+
"mutual_info_score": mutual_info_score,
38+
"normalized_mutual_info_score": normalized_mutual_info_score,
39+
"v_measure_score": v_measure_score,
40+
"fowlkes_mallows_score": fowlkes_mallows_score
41+
}
42+
43+
UNSUPERVISED_METRICS = {
44+
"silhouette_score": silhouette_score,
45+
"silhouette_manhattan": partial(silhouette_score, metric='manhattan'),
46+
"calinski_harabaz_score": calinski_harabaz_score
47+
}
48+
49+
# Lists of metrics with common properties
50+
# ---------------------------------------
51+
# Lists of metrics with common properties are used to test systematically some
52+
# functionalities and invariance, e.g. SYMMETRIC_METRICS lists all metrics
53+
# that are symmetric with respect to their input argument y_true and y_pred.
54+
#
55+
# --------------------------------------------------------------------
56+
# Symmetric with respect to their input arguments y_true and y_pred.
57+
# Symmetric metrics only apply to supervised clusters.
58+
SYMMETRIC_METRICS = [
59+
"adjusted_rand_score", "v_measure_score",
60+
"mutual_info_score", "adjusted_mutual_info_score",
61+
"normalized_mutual_info_score", "fowlkes_mallows_score"
62+
]
63+
64+
NON_SYMMETRIC_METRICS = ["homogeneity_score", "completeness_score"]
65+
66+
# Metrics whose upper bound is 1
67+
NORMALIZED_METRICS = [
68+
"adjusted_rand_score", "homogeneity_score", "completeness_score",
69+
"v_measure_score", "adjusted_mutual_info_score", "fowlkes_mallows_score",
70+
"normalized_mutual_info_score"
71+
]
72+
73+
74+
rng = np.random.RandomState(0)
75+
y1 = rng.randint(3, size=30)
76+
y2 = rng.randint(3, size=30)
77+
78+
79+
def test_symmetric_non_symmetric_union():
80+
assert (sorted(SYMMETRIC_METRICS + NON_SYMMETRIC_METRICS) ==
81+
sorted(SUPERVISED_METRICS))
82+
83+
84+
@pytest.mark.parametrize(
85+
'metric_name, y1, y2',
86+
[(name, y1, y2) for name in SYMMETRIC_METRICS]
87+
)
88+
def test_symmetry(metric_name, y1, y2):
89+
metric = SUPERVISED_METRICS[metric_name]
90+
assert metric(y1, y2) == pytest.approx(metric(y2, y1))
91+
92+
93+
@pytest.mark.parametrize(
94+
'metric_name, y1, y2',
95+
[(name, y1, y2) for name in NON_SYMMETRIC_METRICS]
96+
)
97+
def test_non_symmetry(metric_name, y1, y2):
98+
metric = SUPERVISED_METRICS[metric_name]
99+
assert metric(y1, y2) != pytest.approx(metric(y2, y1))
100+
101+
102+
@pytest.mark.parametrize(
103+
"metric_name",
104+
[name for name in NORMALIZED_METRICS]
105+
)
106+
def test_normalized_output(metric_name):
107+
upper_bound_1 = [0, 0, 0, 1, 1, 1]
108+
upper_bound_2 = [0, 0, 0, 1, 1, 1]
109+
metric = SUPERVISED_METRICS[metric_name]
110+
assert metric([0, 0, 0, 1, 1], [0, 0, 0, 1, 2]) > 0.0
111+
assert metric([0, 0, 1, 1, 2], [0, 0, 1, 1, 1]) > 0.0
112+
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
113+
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
114+
assert metric(upper_bound_1, upper_bound_2) == pytest.approx(1.0)
115+
116+
lower_bound_1 = [0, 0, 0, 0, 0, 0]
117+
lower_bound_2 = [0, 1, 2, 3, 4, 5]
118+
score = np.array([metric(lower_bound_1, lower_bound_2),
119+
metric(lower_bound_2, lower_bound_1)])
120+
assert not (score < 0).any()
121+
122+
123+
# All clustering metrics do not change score due to permutations of labels
124+
# that is when 0 and 1 exchanged.
125+
@pytest.mark.parametrize(
126+
"metric_name",
127+
[name for name in dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)]
128+
)
129+
def test_permute_labels(metric_name):
130+
y_label = np.array([0, 0, 0, 1, 1, 0, 1])
131+
y_pred = np.array([1, 0, 1, 0, 1, 1, 0])
132+
if metric_name in SUPERVISED_METRICS:
133+
metric = SUPERVISED_METRICS[metric_name]
134+
score_1 = metric(y_pred, y_label)
135+
assert_allclose(score_1, metric(1 - y_pred, y_label))
136+
assert_allclose(score_1, metric(1 - y_pred, 1 - y_label))
137+
assert_allclose(score_1, metric(y_pred, 1 - y_label))
138+
else:
139+
metric = UNSUPERVISED_METRICS[metric_name]
140+
X = np.random.randint(10, size=(7, 10))
141+
score_1 = metric(X, y_pred)
142+
assert_allclose(score_1, metric(X, 1 - y_pred))
143+
144+
145+
# For all clustering metrics Input parameters can be both
146+
@pytest.mark.parametrize(
147+
"metric_name",
148+
[name for name in dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)]
149+
)
150+
# in the form of arrays lists, positive, negetive or string
151+
def test_format_invariance(metric_name):
152+
y_true = [0, 0, 0, 0, 1, 1, 1, 1]
153+
y_pred = [0, 1, 2, 3, 4, 5, 6, 7]
154+
155+
def generate_formats(y):
156+
y = np.array(y)
157+
yield y, 'array of ints'
158+
yield y.tolist(), 'list of ints'
159+
yield [str(x) for x in y.tolist()], 'list of strs'
160+
yield y - 1, 'including negative ints'
161+
yield y + 1, 'strictly positive ints'
162+
163+
if metric_name in SUPERVISED_METRICS:
164+
metric = SUPERVISED_METRICS[metric_name]
165+
score_1 = metric(y_true, y_pred)
166+
y_true_gen = generate_formats(y_true)
167+
y_pred_gen = generate_formats(y_pred)
168+
for (y_true_fmt, fmt_name), (y_pred_fmt, _) in zip(y_true_gen,
169+
y_pred_gen):
170+
assert score_1 == metric(y_true_fmt, y_pred_fmt)
171+
else:
172+
metric = UNSUPERVISED_METRICS[metric_name]
173+
X = np.random.randint(10, size=(8, 10))
174+
score_1 = metric(X, y_true)
175+
assert score_1 == metric(X.astype(float), y_true)
176+
y_true_gen = generate_formats(y_true)
177+
for (y_true_fmt, fmt_name) in y_true_gen:
178+
assert score_1 == metric(X, y_true_fmt)

0 commit comments

Comments
 (0)