From bdd5b93506f61c417c280ce93f49869ab5496997 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Mon, 18 Feb 2019 18:48:10 +0100 Subject: [PATCH 01/18] Replace n_node_samples by weighted_n_node_samples in partial dependence computation --- sklearn/ensemble/_gradient_boosting.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/ensemble/_gradient_boosting.pyx b/sklearn/ensemble/_gradient_boosting.pyx index 2902502927cea..120de8b3abeb8 100644 --- a/sklearn/ensemble/_gradient_boosting.pyx +++ b/sklearn/ensemble/_gradient_boosting.pyx @@ -341,15 +341,15 @@ cpdef _partial_dependence_tree(Tree tree, DTYPE_t[:, ::1] X, # push left child node_stack[stack_size] = root_node + current_node.left_child current_weight = weight_stack[stack_size] - left_sample_frac = root_node[current_node.left_child].n_node_samples / \ - current_node.n_node_samples + left_sample_frac = root_node[current_node.left_child].weighted_n_node_samples / \ + current_node.weighted_n_node_samples if left_sample_frac <= 0.0 or left_sample_frac >= 1.0: - raise ValueError("left_sample_frac:%f, " - "n_samples current: %d, " - "n_samples left: %d" + raise ValueError("left_sample_frac:%d, " + "weighted_n_node_samples current: %d, " + "weighted_n_node_samples left: %d" % (left_sample_frac, - current_node.n_node_samples, - root_node[current_node.left_child].n_node_samples)) + current_node.weighted_n_node_samples, + root_node[current_node.left_child].weighted_n_node_samples)) weight_stack[stack_size] = current_weight * left_sample_frac stack_size +=1 From 74b1290a693a7931047734fa1c84c3cfeda1152c Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Thu, 28 Feb 2019 16:02:41 +0100 Subject: [PATCH 02/18] Add tests for both no-op and real sample weights --- .../ensemble/tests/test_partial_dependence.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 5bdb563199ebf..3d4a55c7b5339 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -18,6 +18,7 @@ # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] y = [-1, -1, -1, 1, 1, 1] +w = [1.5, 2, 3.5, 4, 3, 2.5] T = [[-1, -1], [2, 2], [3, 2]] true_result = [-1, 1, 1] @@ -47,6 +48,25 @@ def test_partial_dependence_classifier(): assert axes is None assert_array_equal(pdp, pdp_2) + # with trivial (noop) sample weights + clf.fit(X, y, sample_weight=np.ones(len(y))) + + pdp_w1, axes_w1 = partial_dependence(clf, [0], X=X, grid_resolution=5) + + assert pdp_w1.shape == (1, 4) + assert axes_w1[0].shape[0] == 4 + assert_array_equal(pdp_w1, pdp) + + # with non-trivial sample weights + clf.fit(X, y, sample_weight=w) + + pdp_w2, axes_w2 = partial_dependence(clf, [0], X=X, grid_resolution=5) + + # only 4 grid points instead of 5 because only 4 unique X[:,0] vals + assert pdp_w2.shape == (1, 4) + assert axes_w2[0].shape[0] == 4 + assert pdp_w2[0, 0] != pdp_w1[0, 0] + def test_partial_dependence_multiclass(): # Test partial dependence for multi-class classifier From 4da517eda2f3bea41538a9af87a837bb953de8f7 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Thu, 28 Feb 2019 16:10:05 +0100 Subject: [PATCH 03/18] Improve naming and remove useless comment --- sklearn/ensemble/tests/test_partial_dependence.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 3d4a55c7b5339..4cadfb674fd7e 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -48,24 +48,23 @@ def test_partial_dependence_classifier(): assert axes is None assert_array_equal(pdp, pdp_2) - # with trivial (noop) sample weights + # with trivial (no-op) sample weights clf.fit(X, y, sample_weight=np.ones(len(y))) - pdp_w1, axes_w1 = partial_dependence(clf, [0], X=X, grid_resolution=5) + pdp_w, axes_w = partial_dependence(clf, [0], X=X, grid_resolution=5) - assert pdp_w1.shape == (1, 4) - assert axes_w1[0].shape[0] == 4 - assert_array_equal(pdp_w1, pdp) + assert pdp_w.shape == (1, 4) + assert axes_w[0].shape[0] == 4 + assert_array_equal(pdp_w, pdp) # with non-trivial sample weights clf.fit(X, y, sample_weight=w) pdp_w2, axes_w2 = partial_dependence(clf, [0], X=X, grid_resolution=5) - # only 4 grid points instead of 5 because only 4 unique X[:,0] vals assert pdp_w2.shape == (1, 4) assert axes_w2[0].shape[0] == 4 - assert pdp_w2[0, 0] != pdp_w1[0, 0] + assert pdp_w2[0, 0] != pdp_w[0, 0] def test_partial_dependence_multiclass(): From 0017f901d0fcadce5a806970e5384e0ec956e645 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Tue, 12 Mar 2019 22:48:59 +0100 Subject: [PATCH 04/18] Fix small test issues --- sklearn/ensemble/tests/test_partial_dependence.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 4cadfb674fd7e..d92b4eff0afc3 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -4,7 +4,7 @@ import pytest import numpy as np -from numpy.testing import assert_array_equal +from numpy.testing import assert_array_equal, assert_allclose from sklearn.utils.testing import assert_raises from sklearn.utils.testing import if_matplotlib @@ -18,9 +18,7 @@ # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] y = [-1, -1, -1, 1, 1, 1] -w = [1.5, 2, 3.5, 4, 3, 2.5] -T = [[-1, -1], [2, 2], [3, 2]] -true_result = [-1, 1, 1] +sample_weight = [1.5, 2, 3.5, 4, 3, 2.5] # also load the boston dataset boston = datasets.load_boston() @@ -55,16 +53,16 @@ def test_partial_dependence_classifier(): assert pdp_w.shape == (1, 4) assert axes_w[0].shape[0] == 4 - assert_array_equal(pdp_w, pdp) + assert_allclose(pdp_w, pdp) # with non-trivial sample weights - clf.fit(X, y, sample_weight=w) + clf.fit(X, y, sample_weight=sample_weight) pdp_w2, axes_w2 = partial_dependence(clf, [0], X=X, grid_resolution=5) assert pdp_w2.shape == (1, 4) assert axes_w2[0].shape[0] == 4 - assert pdp_w2[0, 0] != pdp_w[0, 0] + assert np.sum(np.abs(pdp_w2 - pdp_w)) / np.sum(np.abs(pdp_w)) > 0.2 def test_partial_dependence_multiclass(): From 8bc07baa7c8e22e3fca8dc709469ff5a2a5a179d Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 00:16:11 +0100 Subject: [PATCH 05/18] Fix test for binary classification --- sklearn/ensemble/tests/test_partial_dependence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index d92b4eff0afc3..11fa79a8c6508 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -62,7 +62,7 @@ def test_partial_dependence_classifier(): assert pdp_w2.shape == (1, 4) assert axes_w2[0].shape[0] == 4 - assert np.sum(np.abs(pdp_w2 - pdp_w)) / np.sum(np.abs(pdp_w)) > 0.2 + assert np.sum(np.abs(pdp_w2 - pdp_w)) / np.sum(np.abs(pdp_w)) > 0.05 def test_partial_dependence_multiclass(): From bef200fcf412b6dfcb395984e54d11277fbcf11f Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 00:29:38 +0100 Subject: [PATCH 06/18] Add test for regressions based on example from initial issue --- .../ensemble/tests/test_partial_dependence.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 11fa79a8c6508..aad1bc5d1f52f 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -92,6 +92,31 @@ def test_partial_dependence_regressor(): assert pdp.shape == (1, grid_resolution) assert axes[0].shape[0] == grid_resolution + # Test near perfect correlation between pdp and diagonal when sample weights + # emphasize y = x predictions + N = 1000 + rng = np.random.RandomState(123456) + X_ = np.vstack((rng.randint(2, size=(1, N)), rng.rand(N, 1).T)).T + + mask_0 = np.where(X_[:, 0] == 0) + mask_1 = np.where(X_[:, 0] == 1) + + y_ = np.zeros(N) + y_[mask_0] = X_[:, 1][mask_0] + y_[mask_1] = -X_[:, 1][mask_1] + + sample_weight_ = np.zeros(N) + sample_weight_[mask_0] = 1000. + sample_weight_[mask_1] = 1. + + gbt = GradientBoostingRegressor() + gbt.fit(X_, y_, sample_weight=sample_weight_) + + grid = np.arange(0, 1, 0.01) + pdp = partial_dependence(gbt, [1], grid=grid) + + assert np.corrcoef(np.ravel(pdp[0]), grid)[0, 1] > 0.999 + def test_partial_dependecy_input(): # Test input validation of partial dependence. From 76a828371372cdb9f76f095d6f9052373cf89df9 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 00:40:00 +0100 Subject: [PATCH 07/18] Edit whats_new --- doc/whats_new/v0.21.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 0525a40467e74..1733f19f7fb2d 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -177,6 +177,11 @@ Support for Python 3.4 and below has been officially dropped. with the document and the caller functions. :issue:`6463` by :user:`movelikeriver `. +- |Fix| :func:`ensemble._gradient_boosting._partial_dependence_tree` now takes + sample weights into account for the partial dependence computation when the + gradient boosting model has been trained with sample weights. + :issue:`13192` by :user:`Samuel O. Ronsin `. + :mod:`sklearn.externals` ........................ From e4728c9438e218338ae8b82569f708181be4dec9 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 00:45:22 +0100 Subject: [PATCH 08/18] 79 --- sklearn/ensemble/tests/test_partial_dependence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index aad1bc5d1f52f..42f9275386058 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -92,8 +92,8 @@ def test_partial_dependence_regressor(): assert pdp.shape == (1, grid_resolution) assert axes[0].shape[0] == grid_resolution - # Test near perfect correlation between pdp and diagonal when sample weights - # emphasize y = x predictions + # Test near perfect correlation between partial dependence and diagonal + # when sample weights emphasize y = x predictions N = 1000 rng = np.random.RandomState(123456) X_ = np.vstack((rng.randint(2, size=(1, N)), rng.rand(N, 1).T)).T From bb1766083928fd4d5328403025706d9b57e003d9 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 10:30:11 +0100 Subject: [PATCH 09/18] Simplify test code for regression partial dependence --- .../ensemble/tests/test_partial_dependence.py | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 42f9275386058..79310bb097de5 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -96,26 +96,23 @@ def test_partial_dependence_regressor(): # when sample weights emphasize y = x predictions N = 1000 rng = np.random.RandomState(123456) - X_ = np.vstack((rng.randint(2, size=(1, N)), rng.rand(N, 1).T)).T + mask = rng.randint(2, size=N, dtype=bool) - mask_0 = np.where(X_[:, 0] == 0) - mask_1 = np.where(X_[:, 0] == 1) + x = rng.rand(N, 1) + # set y = x on mask and y = -x outside + y = np.ravel(x.copy()) + y[~mask] = -y[~mask] + X = np.hstack((mask[:,np.newaxis], x)) + # sample weights to emphasize data points where y = x + sample_weight = np.ones(N) + sample_weight[mask] = 1000. - y_ = np.zeros(N) - y_[mask_0] = X_[:, 1][mask_0] - y_[mask_1] = -X_[:, 1][mask_1] - - sample_weight_ = np.zeros(N) - sample_weight_[mask_0] = 1000. - sample_weight_[mask_1] = 1. - - gbt = GradientBoostingRegressor() - gbt.fit(X_, y_, sample_weight=sample_weight_) + clf.fit(X, y, sample_weight=None) grid = np.arange(0, 1, 0.01) - pdp = partial_dependence(gbt, [1], grid=grid) + pdp = partial_dependence(clf, [1], grid=grid) - assert np.corrcoef(np.ravel(pdp[0]), grid)[0, 1] > 0.999 + assert np.corrcoef(np.ravel(pdp[0]), grid)[0, 1] > 0.99 def test_partial_dependecy_input(): From 7c32edb8096750367b2931f27ba6823b7be6f2f6 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 10:32:56 +0100 Subject: [PATCH 10/18] PEP8 --- sklearn/ensemble/tests/test_partial_dependence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 79310bb097de5..e00d8cb0c9942 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -102,7 +102,7 @@ def test_partial_dependence_regressor(): # set y = x on mask and y = -x outside y = np.ravel(x.copy()) y[~mask] = -y[~mask] - X = np.hstack((mask[:,np.newaxis], x)) + X = np.hstack((mask[:, np.newaxis], x)) # sample weights to emphasize data points where y = x sample_weight = np.ones(N) sample_weight[mask] = 1000. From 8e9a86db25cfca3f8974ecd02152ab6404793e51 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 10:51:40 +0100 Subject: [PATCH 11/18] Facepalm --- sklearn/ensemble/tests/test_partial_dependence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index e00d8cb0c9942..27155b6ceffd9 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -107,7 +107,7 @@ def test_partial_dependence_regressor(): sample_weight = np.ones(N) sample_weight[mask] = 1000. - clf.fit(X, y, sample_weight=None) + clf.fit(X, y, sample_weight=sample_weight) grid = np.arange(0, 1, 0.01) pdp = partial_dependence(clf, [1], grid=grid) From 00372bf85a098cdd3f37860a5283ab34ac951a58 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 12:11:14 +0100 Subject: [PATCH 12/18] Refer to the public function in whats_new --- doc/whats_new/v0.21.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 1733f19f7fb2d..f0da564b763ef 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -177,8 +177,8 @@ Support for Python 3.4 and below has been officially dropped. with the document and the caller functions. :issue:`6463` by :user:`movelikeriver `. -- |Fix| :func:`ensemble._gradient_boosting._partial_dependence_tree` now takes - sample weights into account for the partial dependence computation when the +- |Fix| :func:`ensemble.partial_dependence` now takes sample weights into + account for the partial dependence computation when the gradient boosting model has been trained with sample weights. :issue:`13192` by :user:`Samuel O. Ronsin `. From de7efa55d363297bb5746d5864f3af242129819a Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 16:32:02 +0100 Subject: [PATCH 13/18] Make the sample weight test standalone for further reuse --- sklearn/ensemble/tests/test_partial_dependence.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 27155b6ceffd9..d3c1b7f7c6a2b 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -62,7 +62,7 @@ def test_partial_dependence_classifier(): assert pdp_w2.shape == (1, 4) assert axes_w2[0].shape[0] == 4 - assert np.sum(np.abs(pdp_w2 - pdp_w)) / np.sum(np.abs(pdp_w)) > 0.05 + assert np.min(np.abs(pdp_w2 - pdp_w)) / np.mean(np.abs(pdp_w)) > 0.25 def test_partial_dependence_multiclass(): @@ -92,6 +92,8 @@ def test_partial_dependence_regressor(): assert pdp.shape == (1, grid_resolution) assert axes[0].shape[0] == grid_resolution + +def test_partial_dependence_sample_weight(): # Test near perfect correlation between partial dependence and diagonal # when sample weights emphasize y = x predictions N = 1000 @@ -107,6 +109,7 @@ def test_partial_dependence_regressor(): sample_weight = np.ones(N) sample_weight[mask] = 1000. + clf = GradientBoostingRegressor(n_estimators=10, random_state=1) clf.fit(X, y, sample_weight=sample_weight) grid = np.arange(0, 1, 0.01) From fb7aa996f8abc5320bd40b5ea2de825d93642baa Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 16:37:52 +0100 Subject: [PATCH 14/18] Fix PR number --- doc/whats_new/v0.21.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index f0da564b763ef..0df4b34421581 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -180,7 +180,7 @@ Support for Python 3.4 and below has been officially dropped. - |Fix| :func:`ensemble.partial_dependence` now takes sample weights into account for the partial dependence computation when the gradient boosting model has been trained with sample weights. - :issue:`13192` by :user:`Samuel O. Ronsin `. + :issue:`13193` by :user:`Samuel O. Ronsin `. :mod:`sklearn.externals` ........................ From a3198c0a6798c11e3babdb0d18b7abee1c642804 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 17:29:58 +0100 Subject: [PATCH 15/18] Testing with L1 relative distance computed as averages --- sklearn/ensemble/tests/test_partial_dependence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index d3c1b7f7c6a2b..2f844954a3093 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -62,7 +62,7 @@ def test_partial_dependence_classifier(): assert pdp_w2.shape == (1, 4) assert axes_w2[0].shape[0] == 4 - assert np.min(np.abs(pdp_w2 - pdp_w)) / np.mean(np.abs(pdp_w)) > 0.25 + assert np.mean(np.abs(pdp_w2 - pdp_w)) / np.mean(np.abs(pdp_w)) > 0.25 def test_partial_dependence_multiclass(): From 7da303890d68ea8a19d457324c7a39ce94905809 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 13 Mar 2019 19:23:13 +0100 Subject: [PATCH 16/18] Testing element-wise --- sklearn/ensemble/tests/test_partial_dependence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 2f844954a3093..2053ff75088a8 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -62,7 +62,7 @@ def test_partial_dependence_classifier(): assert pdp_w2.shape == (1, 4) assert axes_w2[0].shape[0] == 4 - assert np.mean(np.abs(pdp_w2 - pdp_w)) / np.mean(np.abs(pdp_w)) > 0.25 + assert np.all(np.abs(pdp_w2 - pdp_w) / np.abs(pdp_w)) > 0.2 def test_partial_dependence_multiclass(): From 1db74272ca6232a38408a0debb76b9f5eccfa1b1 Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 27 Mar 2019 16:27:16 +0100 Subject: [PATCH 17/18] Fix and simplify unit test for binary classification --- sklearn/ensemble/tests/test_partial_dependence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 2053ff75088a8..953231e2bdce5 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -18,7 +18,7 @@ # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] y = [-1, -1, -1, 1, 1, 1] -sample_weight = [1.5, 2, 3.5, 4, 3, 2.5] +sample_weight = [1, 1, 1, 2, 2, 2] # also load the boston dataset boston = datasets.load_boston() @@ -62,7 +62,7 @@ def test_partial_dependence_classifier(): assert pdp_w2.shape == (1, 4) assert axes_w2[0].shape[0] == 4 - assert np.all(np.abs(pdp_w2 - pdp_w) / np.abs(pdp_w)) > 0.2 + assert np.all(np.abs(pdp_w2 - pdp_w) / np.abs(pdp_w) > 0.1) def test_partial_dependence_multiclass(): From a9ac018e45a8beecf789cb85e842c8aa5532a93b Mon Sep 17 00:00:00 2001 From: "Samuel O. Ronsin" Date: Wed, 27 Mar 2019 16:29:33 +0100 Subject: [PATCH 18/18] Clarify functional test --- sklearn/ensemble/tests/test_partial_dependence.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/tests/test_partial_dependence.py b/sklearn/ensemble/tests/test_partial_dependence.py index 953231e2bdce5..2321d455aa4e3 100644 --- a/sklearn/ensemble/tests/test_partial_dependence.py +++ b/sklearn/ensemble/tests/test_partial_dependence.py @@ -100,11 +100,11 @@ def test_partial_dependence_sample_weight(): rng = np.random.RandomState(123456) mask = rng.randint(2, size=N, dtype=bool) - x = rng.rand(N, 1) + x = rng.rand(N) # set y = x on mask and y = -x outside - y = np.ravel(x.copy()) + y = x.copy() y[~mask] = -y[~mask] - X = np.hstack((mask[:, np.newaxis], x)) + X = np.c_[mask, x] # sample weights to emphasize data points where y = x sample_weight = np.ones(N) sample_weight[mask] = 1000.