From 2706b2e70e55a130b6671e85eea78c6ea9108e0b Mon Sep 17 00:00:00 2001 From: ngshya Date: Sat, 21 Dec 2019 20:17:07 +0100 Subject: [PATCH 01/11] Managed the case where the sum by row is zero. --- sklearn/semi_supervised/_label_propagation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 0ec687aae7d20..157938322bff1 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -288,6 +288,7 @@ def fit(self, X, y): ) self.n_iter_ += 1 + self.label_distributions_[np.sum(self.label_distributions_, axis=1) == 0, :] = 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] self.label_distributions_ /= normalizer From 2fd9ccb2ecf8f6c7f6c5cff4d73b824f12092371 Mon Sep 17 00:00:00 2001 From: ngshya Date: Sat, 21 Dec 2019 20:30:21 +0100 Subject: [PATCH 02/11] Managed the case where the sum by row is zero. --- sklearn/semi_supervised/_label_propagation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 157938322bff1..176bdbc8c9264 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -288,7 +288,8 @@ def fit(self, X, y): ) self.n_iter_ += 1 - self.label_distributions_[np.sum(self.label_distributions_, axis=1) == 0, :] = 1 + self.label_distributions_[np.sum(self.label_distributions_, + axis=1) == 0, :] = 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] self.label_distributions_ /= normalizer From 101e125ee7b9c4153d2677a72e1623866ace78c7 Mon Sep 17 00:00:00 2001 From: ngshya Date: Sat, 21 Dec 2019 20:38:01 +0100 Subject: [PATCH 03/11] Managed the case where the sum by row is zero. --- sklearn/semi_supervised/_label_propagation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 176bdbc8c9264..b96a016513c55 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -288,8 +288,8 @@ def fit(self, X, y): ) self.n_iter_ += 1 - self.label_distributions_[np.sum(self.label_distributions_, - axis=1) == 0, :] = 1 + l_bool_zeros = np.sum(self.label_distributions_, axis=1) == 0 + self.label_distributions_[l_bool_zeros, :] = 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] self.label_distributions_ /= normalizer From f99bf29e7bca6ab0ce74a12c2452187528e32043 Mon Sep 17 00:00:00 2001 From: ngshya Date: Sun, 22 Dec 2019 10:58:52 +0100 Subject: [PATCH 04/11] Added the non regression test for #15946. --- sklearn/semi_supervised/tests/test_label_propagation.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 7e20350b20b2f..d9788a65d641e 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -152,3 +152,10 @@ def test_convergence_warning(): mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y) + +def test_non_zero_normalizer(): + # This is a non-regression test for #15946 + X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) + y = np.array([0, 1, -1, -1]) + mdl = label_propagation.LabelSpreading(kernel='knn', max_iter=100, n_neighbors=1) + assert_no_warnings(mdl.fit, X, y) From bce6500da7e1667aa4483fbb57be4e7cdfff5184 Mon Sep 17 00:00:00 2001 From: ngshya Date: Sun, 22 Dec 2019 11:03:10 +0100 Subject: [PATCH 05/11] Solved the issue of long line. --- sklearn/semi_supervised/tests/test_label_propagation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index d9788a65d641e..2213b82f24335 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -157,5 +157,7 @@ def test_non_zero_normalizer(): # This is a non-regression test for #15946 X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) y = np.array([0, 1, -1, -1]) - mdl = label_propagation.LabelSpreading(kernel='knn', max_iter=100, n_neighbors=1) + mdl = label_propagation.LabelSpreading(kernel='knn', + max_iter=100, + n_neighbors=1) assert_no_warnings(mdl.fit, X, y) From 607f31f4ed2a5ef6fa9ba1c0639a84c39e5d8e80 Mon Sep 17 00:00:00 2001 From: ngshya Date: Sun, 22 Dec 2019 11:05:28 +0100 Subject: [PATCH 06/11] Removed trailing space. --- sklearn/semi_supervised/tests/test_label_propagation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 2213b82f24335..eefffdfdb0dd3 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -152,12 +152,13 @@ def test_convergence_warning(): mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y) - + + def test_non_zero_normalizer(): # This is a non-regression test for #15946 X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) y = np.array([0, 1, -1, -1]) - mdl = label_propagation.LabelSpreading(kernel='knn', - max_iter=100, + mdl = label_propagation.LabelSpreading(kernel='knn', + max_iter=100, n_neighbors=1) assert_no_warnings(mdl.fit, X, y) From 8876b7403db09f9318f396d5e27b4816b469ebee Mon Sep 17 00:00:00 2001 From: ngshya Date: Sat, 28 Dec 2019 07:49:38 +0100 Subject: [PATCH 07/11] removed blank line at the end of the file --- sklearn/semi_supervised/tests/test_label_propagation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 0246086bf6d5b..35207ac1303b4 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -201,4 +201,3 @@ def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5): model = label_propagation.LabelPropagation(kernel=topk_rbf) model.fit(X_train, y_train) assert model.score(X_test, y_test) >= 0.9 - From 38b1e583df861d66211b7d38521e5d462131f69b Mon Sep 17 00:00:00 2001 From: ngshya Date: Sat, 28 Dec 2019 07:56:16 +0100 Subject: [PATCH 08/11] remove blank lines with spaces --- sklearn/semi_supervised/tests/test_label_propagation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 35207ac1303b4..f3d0aa3f3d71b 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -166,7 +166,7 @@ def test_non_zero_normalizer(): n_neighbors=1) assert_no_warnings(mdl.fit, X, y) - + def test_predict_sparse_callable_kernel(): # This is a non-regression test for #15866 From 855e9883bb55fbe543e01d03365442bd709650fc Mon Sep 17 00:00:00 2001 From: ngshya Date: Wed, 8 Jan 2020 07:48:54 +0100 Subject: [PATCH 09/11] modified normalizer instead of label_distributions_ --- sklearn/semi_supervised/_label_propagation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index d388f91e0a60b..0d88f10ca7c24 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -289,9 +289,8 @@ def fit(self, X, y): ) self.n_iter_ += 1 - l_bool_zeros = np.sum(self.label_distributions_, axis=1) == 0 - self.label_distributions_[l_bool_zeros, :] = 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] + normalizer = np.array([[1] if x[0] == 0 else x for x in normalizer]) self.label_distributions_ /= normalizer # set the transduction item From a70d944196f995f0466621873a9e25a1438023f0 Mon Sep 17 00:00:00 2001 From: ngshya Date: Wed, 8 Jan 2020 22:54:06 +0100 Subject: [PATCH 10/11] more idiomatic numpy way to manage normalizer == 0 --- sklearn/semi_supervised/_label_propagation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 0d88f10ca7c24..a07b717d6f932 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -290,7 +290,7 @@ def fit(self, X, y): self.n_iter_ += 1 normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis] - normalizer = np.array([[1] if x[0] == 0 else x for x in normalizer]) + normalizer[normalizer == 0] = 1 self.label_distributions_ /= normalizer # set the transduction item From 2db2541073471962cb5ac9b0f898a59425513e10 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 9 Jan 2020 19:10:22 +0100 Subject: [PATCH 11/11] nitpicks --- sklearn/semi_supervised/tests/test_label_propagation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index f3d0aa3f3d71b..015f6fa191853 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -157,8 +157,10 @@ def test_convergence_warning(): assert_no_warnings(mdl.fit, X, y) -def test_non_zero_normalizer(): - # This is a non-regression test for #15946 +def test_label_propagation_non_zero_normalizer(): + # check that we don't divide by zero in case of null normalizer + # non-regression test for + # https://github.com/scikit-learn/scikit-learn/pull/15946 X = np.array([[100., 100.], [100., 100.], [0., 0.], [0., 0.]]) y = np.array([0, 1, -1, -1]) mdl = label_propagation.LabelSpreading(kernel='knn',