From a848ed79cbe0a054d76897d9e458af131a72fe13 Mon Sep 17 00:00:00 2001 From: zhaowei Date: Mon, 25 Jan 2021 20:07:34 +0800 Subject: [PATCH 01/10] Bug fix: Label propagation sometimes produces label_distributions that contain Nan.(#9292) --- sklearn/semi_supervised/_label_propagation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 1a8f1a75bda38..9578cd0d1a801 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -278,6 +278,7 @@ def fit(self, X, y): if self._variant == 'propagation': normalizer = np.sum( self.label_distributions_, axis=1)[:, np.newaxis] + normalizer[normalizer == 0] = 1 self.label_distributions_ /= normalizer self.label_distributions_ = np.where(unlabeled, self.label_distributions_, From 1c3da3f32fdf54ec53e036a57303b5927257210e Mon Sep 17 00:00:00 2001 From: zhaowei Date: Wed, 27 Jan 2021 15:14:32 +0800 Subject: [PATCH 02/10] add label propagation nan test --- sklearn/semi_supervised/tests/test_label_propagation.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 015f6fa191853..f4ef231c0d486 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -203,3 +203,12 @@ def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5): model = label_propagation.LabelPropagation(kernel=topk_rbf) model.fit(X_train, y_train) assert model.score(X_test, y_test) >= 0.9 + +def test_label_propagation_non_zero_normalizer_during_iter(): + # https://github.com/scikit-learn/scikit-learn/pull/19271 + X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) + y = np.array([0, 1, -1, -1]) + mdl = label_propagation.LabelPropagation(kernel='knn', + max_iter=100, + n_neighbors=1) + assert_no_warnings(mdl.fit, X, y) From bf402e6e86b6b2c0b33b7e7c72cadc61109cfbf3 Mon Sep 17 00:00:00 2001 From: zhaowei Date: Wed, 27 Jan 2021 15:20:50 +0800 Subject: [PATCH 03/10] pass lint --- sklearn/semi_supervised/tests/test_label_propagation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index f4ef231c0d486..1a4fa31eda156 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -204,11 +204,12 @@ def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5): model.fit(X_train, y_train) assert model.score(X_test, y_test) >= 0.9 + def test_label_propagation_non_zero_normalizer_during_iter(): # https://github.com/scikit-learn/scikit-learn/pull/19271 X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) y = np.array([0, 1, -1, -1]) mdl = label_propagation.LabelPropagation(kernel='knn', - max_iter=100, - n_neighbors=1) + max_iter=100, + n_neighbors=1) assert_no_warnings(mdl.fit, X, y) From 86ec5b755c7db9a36e1ba5f8cdfbee302e98d374 Mon Sep 17 00:00:00 2001 From: zhaowei Date: Wed, 27 Jan 2021 20:34:41 +0800 Subject: [PATCH 04/10] merge test --- .../tests/test_label_propagation.py | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 1a4fa31eda156..6d7f808bb99ee 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -157,15 +157,19 @@ def test_convergence_warning(): assert_no_warnings(mdl.fit, X, y) -def test_label_propagation_non_zero_normalizer(): +@pytest.mark.parametrize("label_propagation_class", + [label_propagation.LabelSpreading, + label_propagation.LabelPropagation]) +def test_label_propagation_non_zero_normalizer(label_propagation_class): # check that we don't divide by zero in case of null normalizer # non-regression test for # https://github.com/scikit-learn/scikit-learn/pull/15946 + # https://github.com/scikit-learn/scikit-learn/pull/19271 X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) y = np.array([0, 1, -1, -1]) - mdl = label_propagation.LabelSpreading(kernel='knn', - max_iter=100, - n_neighbors=1) + mdl = label_propagation_class(kernel='knn', + max_iter=100, + n_neighbors=1) assert_no_warnings(mdl.fit, X, y) @@ -203,13 +207,3 @@ def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5): model = label_propagation.LabelPropagation(kernel=topk_rbf) model.fit(X_train, y_train) assert model.score(X_test, y_test) >= 0.9 - - -def test_label_propagation_non_zero_normalizer_during_iter(): - # https://github.com/scikit-learn/scikit-learn/pull/19271 - X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) - y = np.array([0, 1, -1, -1]) - mdl = label_propagation.LabelPropagation(kernel='knn', - max_iter=100, - n_neighbors=1) - assert_no_warnings(mdl.fit, X, y) From 0249155ff0c8db2f3767195cae3b4b15c40e38e4 Mon Sep 17 00:00:00 2001 From: ZhaoweiWang Date: Sat, 30 Jan 2021 11:49:22 +0800 Subject: [PATCH 05/10] Update sklearn/semi_supervised/tests/test_label_propagation.py Co-authored-by: Thomas J. Fan --- sklearn/semi_supervised/tests/test_label_propagation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 6d7f808bb99ee..75749ddbf8933 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -160,7 +160,7 @@ def test_convergence_warning(): @pytest.mark.parametrize("label_propagation_class", [label_propagation.LabelSpreading, label_propagation.LabelPropagation]) -def test_label_propagation_non_zero_normalizer(label_propagation_class): +def test_label_propagation_non_zero_normalizer(LabelPropagationCls): # check that we don't divide by zero in case of null normalizer # non-regression test for # https://github.com/scikit-learn/scikit-learn/pull/15946 From b6ebeed5b5c323caa8a6217e9488dd8e7bc7e21f Mon Sep 17 00:00:00 2001 From: ZhaoweiWang Date: Sat, 30 Jan 2021 11:49:30 +0800 Subject: [PATCH 06/10] Update sklearn/semi_supervised/tests/test_label_propagation.py Co-authored-by: Thomas J. Fan --- sklearn/semi_supervised/tests/test_label_propagation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 75749ddbf8933..e3546deb66e22 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -164,7 +164,7 @@ def test_label_propagation_non_zero_normalizer(LabelPropagationCls): # check that we don't divide by zero in case of null normalizer # non-regression test for # https://github.com/scikit-learn/scikit-learn/pull/15946 - # https://github.com/scikit-learn/scikit-learn/pull/19271 + # https://github.com/scikit-learn/scikit-learn/issues/9292 X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) y = np.array([0, 1, -1, -1]) mdl = label_propagation_class(kernel='knn', From 25bdbfd90ac63fb6f8de9fde66dc769917126cf6 Mon Sep 17 00:00:00 2001 From: ZhaoweiWang Date: Sat, 30 Jan 2021 11:49:39 +0800 Subject: [PATCH 07/10] Update sklearn/semi_supervised/tests/test_label_propagation.py Co-authored-by: Thomas J. Fan --- sklearn/semi_supervised/tests/test_label_propagation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index e3546deb66e22..6148c650e84b1 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -157,7 +157,7 @@ def test_convergence_warning(): assert_no_warnings(mdl.fit, X, y) -@pytest.mark.parametrize("label_propagation_class", +@pytest.mark.parametrize("LabelPropagationCls", [label_propagation.LabelSpreading, label_propagation.LabelPropagation]) def test_label_propagation_non_zero_normalizer(LabelPropagationCls): From 16e771accd259c3b045a85c73b6f5557706acb2b Mon Sep 17 00:00:00 2001 From: ZhaoweiWang Date: Sat, 30 Jan 2021 22:44:26 +0800 Subject: [PATCH 08/10] Update sklearn/semi_supervised/tests/test_label_propagation.py Co-authored-by: Thomas J. Fan --- sklearn/semi_supervised/tests/test_label_propagation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 6148c650e84b1..a3fdfb853e57b 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -167,7 +167,7 @@ def test_label_propagation_non_zero_normalizer(LabelPropagationCls): # https://github.com/scikit-learn/scikit-learn/issues/9292 X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) y = np.array([0, 1, -1, -1]) - mdl = label_propagation_class(kernel='knn', + mdl = LabelPropagationCls(kernel='knn', max_iter=100, n_neighbors=1) assert_no_warnings(mdl.fit, X, y) From 8339918d1b4766c84c9b2571428ec043e00a76cd Mon Sep 17 00:00:00 2001 From: zhaowei Date: Sat, 30 Jan 2021 23:14:43 +0800 Subject: [PATCH 09/10] pass lint --- sklearn/semi_supervised/tests/test_label_propagation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 6d7f808bb99ee..4060233a9c53b 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -167,9 +167,7 @@ def test_label_propagation_non_zero_normalizer(label_propagation_class): # https://github.com/scikit-learn/scikit-learn/pull/19271 X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) y = np.array([0, 1, -1, -1]) - mdl = label_propagation_class(kernel='knn', - max_iter=100, - n_neighbors=1) + mdl = label_propagation_class(kernel='knn', max_iter=100, n_neighbors=1) assert_no_warnings(mdl.fit, X, y) From ef0aa0fa3b4c7f70efdef3806d63ceab51b5ab88 Mon Sep 17 00:00:00 2001 From: zhaowei Date: Sat, 30 Jan 2021 23:57:31 +0800 Subject: [PATCH 10/10] doc --- doc/whats_new/v1.0.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 382ff363e0db7..215403fe61620 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -146,6 +146,13 @@ Changelog for non-English characters. :pr:`18959` by :user:`Zero ` and :user:`wstates `. +:mod:`sklearn.semi_supervised` +................................. + +- |Fix| Avoid NaN during label propagation in + :class:`~sklearn.semi_supervised.LabelPropagation`. + :pr:`19271` by :user:`Zhaowei Wang `. + Code and Documentation Contributors -----------------------------------