From 16729aca12eed17bb2cb1242423ce265ae4934b8 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sat, 11 Mar 2017 23:02:21 -0500 Subject: [PATCH 01/29] Fixed depth formula in iforest --- sklearn/ensemble/iforest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index d34093c2acb8b..fbcd0252673b8 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -300,7 +300,7 @@ def _average_path_length(n_samples_leaf): if n_samples_leaf <= 1: return 1. else: - return 2. * (np.log(n_samples_leaf) + 0.5772156649) - 2. * ( + return 2. * (np.log(n_samples_leaf - 1.) + 0.5772156649) - 2. * ( n_samples_leaf - 1.) / n_samples_leaf else: From 86ab1262542c42c3e071309005bcbaf3d3282eec Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 18:49:04 -0400 Subject: [PATCH 02/29] Added non-regression test for issue #8549 --- sklearn/ensemble/tests/test_iforest.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 767444f923f77..5608a655e5840 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -8,6 +8,7 @@ import numpy as np +from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raises @@ -19,6 +20,7 @@ from sklearn.model_selection import ParameterGrid from sklearn.ensemble import IsolationForest +from sklearn.ensemble.iforest import _average_path_length from sklearn.model_selection import train_test_split from sklearn.datasets import load_boston, load_iris from sklearn.utils import check_random_state @@ -42,7 +44,6 @@ boston.data = boston.data[perm] boston.target = boston.target[perm] - def test_iforest(): """Check Isolation Forest for various parameter settings.""" X_train = np.array([[0, 1], [1, 2]]) @@ -194,7 +195,6 @@ def test_iforest_works(): assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2])) assert_array_equal(pred, 6 * [1] + 2 * [-1]) - def test_max_samples_consistency(): # Make sure validated max_samples in iforest and BaseBagging are identical X = iris.data @@ -211,3 +211,11 @@ def test_iforest_subsampled_features(): clf = IsolationForest(max_features=0.8) clf.fit(X_train, y_train) clf.predict(X_test) + +def test_iforest_average_path_length(): + """ It tests non-regression for #8549 which used the wrong formula for average path length """ + assert_almost_equal(_average_path_length(1), 1., decimal=10) + assert_almost_equal(_average_path_length(5), 2.327020052, decimal=10) + assert_almost_equal(_average_path_length(999), 12.9679398844, decimal=10) + + From 6de409d809ddf706d3f229d9ebf2377226970479 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 18:52:02 -0400 Subject: [PATCH 03/29] reverted some whitespace changes --- sklearn/ensemble/tests/test_iforest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 5608a655e5840..02ae90fe451d6 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -44,6 +44,7 @@ boston.data = boston.data[perm] boston.target = boston.target[perm] + def test_iforest(): """Check Isolation Forest for various parameter settings.""" X_train = np.array([[0, 1], [1, 2]]) @@ -195,6 +196,7 @@ def test_iforest_works(): assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2])) assert_array_equal(pred, 6 * [1] + 2 * [-1]) + def test_max_samples_consistency(): # Make sure validated max_samples in iforest and BaseBagging are identical X = iris.data @@ -217,5 +219,3 @@ def test_iforest_average_path_length(): assert_almost_equal(_average_path_length(1), 1., decimal=10) assert_almost_equal(_average_path_length(5), 2.327020052, decimal=10) assert_almost_equal(_average_path_length(999), 12.9679398844, decimal=10) - - From 29fa2c0b567cffa6e5068a0fc4ef98303998489a Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 19:40:03 -0400 Subject: [PATCH 04/29] Made changes to what's new and whitespace changes --- doc/whats_new.rst | 21 ++++++++++++--------- sklearn/ensemble/tests/test_iforest.py | 4 +++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 11d7add579e8b..07adac548a2db 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -28,7 +28,6 @@ cannot assure that this list is complete.) Changelog --------- -New features ............ - Added the :class:`neighbors.LocalOutlierFactor` class for anomaly @@ -156,7 +155,11 @@ Enhancements Bug fixes ......... - - Fixed a bug where :class:`sklearn.cluster.DBSCAN` gives incorrect + - Fix a bug regarding fitting :class:`sklearn.ensemble.IsolationForest` with + an incorrect formula for the _average_path_length method + :issue:`8549` by `Peter Wang `_. + + - Fixed a bug where :class:`sklearn.cluster.DBSCAN` gives incorrect result when input is a precomputed sparse matrix with initial rows all zero. :issue:`8306` by :user:`Akshay Gupta ` @@ -167,7 +170,7 @@ Bug fixes - Fixed a bug where :func:`sklearn.model_selection.BaseSearchCV.inverse_transform` returns self.best_estimator_.transform() instead of self.best_estimator_.inverse_transform() - :issue:`8344` by :user:`Akshay Gupta ` + :issue:`8344` by :user:`Akshay Gupta ` - Fixed a bug where :class:`sklearn.linear_model.RandomizedLasso` and :class:`sklearn.linear_model.RandomizedLogisticRegression` breaks for @@ -274,13 +277,13 @@ API changes summary selection classes to be used with tools such as :func:`sklearn.model_selection.cross_val_predict`. :issue:`2879` by :user:`Stephen Hoover `. - - - Estimators with both methods ``decision_function`` and ``predict_proba`` - are now required to have a monotonic relation between them. The - method ``check_decision_proba_consistency`` has been added in - **sklearn.utils.estimator_checks** to check their consistency. + + - Estimators with both methods ``decision_function`` and ``predict_proba`` + are now required to have a monotonic relation between them. The + method ``check_decision_proba_consistency`` has been added in + **sklearn.utils.estimator_checks** to check their consistency. :issue:`7578` by :user:`Shubham Bhardwaj ` - + .. _changes_0_18_1: diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 02ae90fe451d6..5062b38106a09 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -215,7 +215,9 @@ def test_iforest_subsampled_features(): clf.predict(X_test) def test_iforest_average_path_length(): - """ It tests non-regression for #8549 which used the wrong formula for average path length """ + # It tests non-regression for #8549 which used the wrong formula + # for average path length + assert_almost_equal(_average_path_length(1), 1., decimal=10) assert_almost_equal(_average_path_length(5), 2.327020052, decimal=10) assert_almost_equal(_average_path_length(999), 12.9679398844, decimal=10) From 0832c73125754a9761f3f7fe8433d6e9bb13a4a4 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 19:46:10 -0400 Subject: [PATCH 05/29] Update whats_new.rst --- doc/whats_new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 07adac548a2db..f5926616a4590 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -28,6 +28,7 @@ cannot assure that this list is complete.) Changelog --------- +-New features ............ - Added the :class:`neighbors.LocalOutlierFactor` class for anomaly From e5e40b3a9b9e403f5f5c35761a2d316d7ffb8b16 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 19:50:56 -0400 Subject: [PATCH 06/29] Update whats_new.rst --- doc/whats_new.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index f5926616a4590..561f6fdafb44a 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -18,7 +18,9 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. -* *to be listed* + - Made a change to :class:`sklearn.ensemble.IsolationForest` by + correcting a formula for the _average_path_length method + :issue:`8549` by `Peter Wang `_. Details are listed in the changelog below. @@ -28,7 +30,7 @@ cannot assure that this list is complete.) Changelog --------- --New features +- New features ............ - Added the :class:`neighbors.LocalOutlierFactor` class for anomaly From 5df8e14257bc8fdb2840f4dda7ef6da538876d8a Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 20:20:58 -0400 Subject: [PATCH 07/29] fixed faulty whitespace --- sklearn/ensemble/tests/test_iforest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 5062b38106a09..716311ed95668 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -214,6 +214,7 @@ def test_iforest_subsampled_features(): clf.fit(X_train, y_train) clf.predict(X_test) + def test_iforest_average_path_length(): # It tests non-regression for #8549 which used the wrong formula # for average path length From df5acc470475693fb7c5bbf03f0abee83e8b2d15 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 20:22:18 -0400 Subject: [PATCH 08/29] faulty whitespace fix and change to whats new --- doc/whats_new.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 561f6fdafb44a..82372d406498a 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -18,9 +18,7 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. - - Made a change to :class:`sklearn.ensemble.IsolationForest` by - correcting a formula for the _average_path_length method - :issue:`8549` by `Peter Wang `_. + * :class:`sklearn.ensemble.IsolationForest` (bug fix) Details are listed in the changelog below. From 06225a956130a21d7590c0a5924a5dcf9cdd06ed Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 22:34:19 -0400 Subject: [PATCH 09/29] added constants to iforest average_path_length and the according non regression test --- sklearn/ensemble/iforest.py | 4 ++-- sklearn/ensemble/tests/test_iforest.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index fbcd0252673b8..d06890cd25856 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -300,7 +300,7 @@ def _average_path_length(n_samples_leaf): if n_samples_leaf <= 1: return 1. else: - return 2. * (np.log(n_samples_leaf - 1.) + 0.5772156649) - 2. * ( + return 2. * (np.log(n_samples_leaf - 1.) + np.euler_gamma) - 2. * ( n_samples_leaf - 1.) / n_samples_leaf else: @@ -314,7 +314,7 @@ def _average_path_length(n_samples_leaf): average_path_length[mask] = 1. average_path_length[not_mask] = 2. * ( - np.log(n_samples_leaf[not_mask]) + 0.5772156649) - 2. * ( + np.log(n_samples_leaf[not_mask]) + np.euler_gamma) - 2. * ( n_samples_leaf[not_mask] - 1.) / n_samples_leaf[not_mask] return average_path_length.reshape(n_samples_leaf_shape) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 716311ed95668..fbbd84505600e 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -219,6 +219,8 @@ def test_iforest_average_path_length(): # It tests non-regression for #8549 which used the wrong formula # for average path length - assert_almost_equal(_average_path_length(1), 1., decimal=10) - assert_almost_equal(_average_path_length(5), 2.327020052, decimal=10) - assert_almost_equal(_average_path_length(999), 12.9679398844, decimal=10) + result_one = 2. * (np.log(4.) + np.euler_gamma) - 2. * (4. / 5.) + result_two = 2. * (np.log(998.) + np.euler_gamma) - 2. * ( 998./999.) + assert_almost_equal(_average_path_length(1), 1., decimal=10, ) + assert_almost_equal(_average_path_length(5), result_one, decimal=10) + assert_almost_equal(_average_path_length(999), result_two, decimal=10) From aaaea54c9c78b69987a9901d5ccecc2049af4737 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Mon, 13 Mar 2017 13:41:04 +1100 Subject: [PATCH 10/29] COSMIT --- sklearn/ensemble/tests/test_iforest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index fbbd84505600e..04547b76cf640 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -219,8 +219,8 @@ def test_iforest_average_path_length(): # It tests non-regression for #8549 which used the wrong formula # for average path length - result_one = 2. * (np.log(4.) + np.euler_gamma) - 2. * (4. / 5.) - result_two = 2. * (np.log(998.) + np.euler_gamma) - 2. * ( 998./999.) - assert_almost_equal(_average_path_length(1), 1., decimal=10, ) + result_one = 2. * (np.log(4.) + np.euler_gamma) - 2. * 4. / 5. + result_two = 2. * (np.log(998.) + np.euler_gamma) - 2. * 998. / 999. + assert_almost_equal(_average_path_length(1), 1., decimal=10) assert_almost_equal(_average_path_length(5), result_one, decimal=10) assert_almost_equal(_average_path_length(999), result_two, decimal=10) From c9256be6b6d9a14aed09962b129645fa951c940a Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 22:42:26 -0400 Subject: [PATCH 11/29] Update whats_new.rst --- doc/whats_new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 82372d406498a..6af88cffc793b 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -28,7 +28,7 @@ cannot assure that this list is complete.) Changelog --------- -- New features +-New features ............ - Added the :class:`neighbors.LocalOutlierFactor` class for anomaly From fd62429f4b620bffc05dc946d8c50034b2a47b50 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 23:35:58 -0400 Subject: [PATCH 12/29] Corrected IsolationForest average path formula and added integer array equiv test --- sklearn/ensemble/iforest.py | 2 +- sklearn/ensemble/tests/test_iforest.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index d06890cd25856..fecb94d0aca8d 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -314,7 +314,7 @@ def _average_path_length(n_samples_leaf): average_path_length[mask] = 1. average_path_length[not_mask] = 2. * ( - np.log(n_samples_leaf[not_mask]) + np.euler_gamma) - 2. * ( + np.log(n_samples_leaf[not_mask] - 1.) + np.euler_gamma) - 2. * ( n_samples_leaf[not_mask] - 1.) / n_samples_leaf[not_mask] return average_path_length.reshape(n_samples_leaf_shape) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index fbbd84505600e..4b03027e9c79c 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -217,10 +217,16 @@ def test_iforest_subsampled_features(): def test_iforest_average_path_length(): # It tests non-regression for #8549 which used the wrong formula - # for average path length + # for average path length, strictly for the integer case result_one = 2. * (np.log(4.) + np.euler_gamma) - 2. * (4. / 5.) result_two = 2. * (np.log(998.) + np.euler_gamma) - 2. * ( 998./999.) assert_almost_equal(_average_path_length(1), 1., decimal=10, ) assert_almost_equal(_average_path_length(5), result_one, decimal=10) assert_almost_equal(_average_path_length(999), result_two, decimal=10) + + +def test_average_path_length_arr_int(): + # It tests non-regression for #8549 for integer array equivalence + + assert_almost_equal(_average_path_length(5), _average_path_length(np.array([5])), decimal=10) From 7e3a0c5d85c3b686f1975067dc9046f45f127d8c Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Sun, 12 Mar 2017 23:49:46 -0400 Subject: [PATCH 13/29] changed line to under 80 char --- sklearn/ensemble/tests/test_iforest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index d0f4ae9243def..546f161958fe7 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -229,4 +229,6 @@ def test_iforest_average_path_length(): def test_average_path_length_arr_int(): # It tests non-regression for #8549 for integer array equivalence - assert_almost_equal(_average_path_length(5), _average_path_length(np.array([5])), decimal=10) + assert_almost_equal(_average_path_length(5), + _average_path_length(np.array([5])), + decimal=10) From 96d0489856c618eb1aae1a2048ca2fb37d6b7507 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 00:21:31 -0400 Subject: [PATCH 14/29] Update whats_new.rst --- doc/whats_new.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 6af88cffc793b..554e662a76975 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -28,7 +28,7 @@ cannot assure that this list is complete.) Changelog --------- --New features +New features ............ - Added the :class:`neighbors.LocalOutlierFactor` class for anomaly @@ -156,8 +156,8 @@ Enhancements Bug fixes ......... - - Fix a bug regarding fitting :class:`sklearn.ensemble.IsolationForest` with - an incorrect formula for the _average_path_length method + - Fixex a bug where :class:`sklearn.ensemble.IsolationForest` used an + an incorrect formula for the average path length :issue:`8549` by `Peter Wang `_. - Fixed a bug where :class:`sklearn.cluster.DBSCAN` gives incorrect From 55f40018c4e2613c615f78fff0100e3bf71a9b28 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 00:22:22 -0400 Subject: [PATCH 15/29] Update whats_new.rst --- doc/whats_new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 554e662a76975..4d94c6f31e402 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -156,7 +156,7 @@ Enhancements Bug fixes ......... - - Fixex a bug where :class:`sklearn.ensemble.IsolationForest` used an + - Fixed a bug where :class:`sklearn.ensemble.IsolationForest` used an an incorrect formula for the average path length :issue:`8549` by `Peter Wang `_. From 3b4c5115932d54ca9bd25b4de22b3b6c6828c7ba Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 01:14:30 -0400 Subject: [PATCH 16/29] reran tests --- doc/whats_new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 4d94c6f31e402..ebbed9261b355 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -156,7 +156,7 @@ Enhancements Bug fixes ......... - - Fixed a bug where :class:`sklearn.ensemble.IsolationForest` used an + - Fixed a bug where :class:`sklearn.ensemble.IsolationForest` uses an an incorrect formula for the average path length :issue:`8549` by `Peter Wang `_. From b85c25bd58e49033203fcd014934a96cd9ff84b9 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 04:53:58 -0400 Subject: [PATCH 17/29] redefine np.euler_gamma --- sklearn/utils/fixes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 7f1fe8eb964ab..3c4b5bbfadb34 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -36,6 +36,7 @@ def _parse_version(version_string): version.append(x) return tuple(version) +euler_gamma = getattr(np, 'euler_gamma', 0.577215664901532860606512090082402431) np_version = _parse_version(np.__version__) sp_version = _parse_version(scipy.__version__) From 477f50e887c119758a919adef1a7ffd0cafd8613 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 04:59:15 -0400 Subject: [PATCH 18/29] added import statement for euler_gammma in iforest and test_iforest --- sklearn/ensemble/iforest.py | 1 + sklearn/ensemble/tests/test_iforest.py | 1 + 2 files changed, 2 insertions(+) diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index fecb94d0aca8d..77249d273a776 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -7,6 +7,7 @@ import numpy as np import scipy as sp from warnings import warn +from sklearn.utils.fixes import euler_gamma from scipy.sparse import issparse diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 546f161958fe7..4d4fd9a5a0d80 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -8,6 +8,7 @@ import numpy as np +from sklearn.utils.fixes import euler_gamma from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal From 9a37bada09b08a75a315ca52b557c92229df247d Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 09:31:30 -0400 Subject: [PATCH 19/29] changed np.euler_gamma to euler_gamma --- sklearn/ensemble/iforest.py | 4 ++-- sklearn/ensemble/tests/test_iforest.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index 77249d273a776..ce379243e21d0 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -301,7 +301,7 @@ def _average_path_length(n_samples_leaf): if n_samples_leaf <= 1: return 1. else: - return 2. * (np.log(n_samples_leaf - 1.) + np.euler_gamma) - 2. * ( + return 2. * (np.log(n_samples_leaf - 1.) + euler_gamma) - 2. * ( n_samples_leaf - 1.) / n_samples_leaf else: @@ -315,7 +315,7 @@ def _average_path_length(n_samples_leaf): average_path_length[mask] = 1. average_path_length[not_mask] = 2. * ( - np.log(n_samples_leaf[not_mask] - 1.) + np.euler_gamma) - 2. * ( + np.log(n_samples_leaf[not_mask] - 1.) + euler_gamma) - 2. * ( n_samples_leaf[not_mask] - 1.) / n_samples_leaf[not_mask] return average_path_length.reshape(n_samples_leaf_shape) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 4d4fd9a5a0d80..518b18c544bb9 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -220,8 +220,8 @@ def test_iforest_average_path_length(): # It tests non-regression for #8549 which used the wrong formula # for average path length, strictly for the integer case - result_one = 2. * (np.log(4.) + np.euler_gamma) - 2. * 4. / 5. - result_two = 2. * (np.log(998.) + np.euler_gamma) - 2. * 998. / 999. + result_one = 2. * (np.log(4.) + euler_gamma) - 2. * 4. / 5. + result_two = 2. * (np.log(998.) + euler_gamma) - 2. * 998. / 999. assert_almost_equal(_average_path_length(1), 1., decimal=10) assert_almost_equal(_average_path_length(5), result_one, decimal=10) assert_almost_equal(_average_path_length(999), result_two, decimal=10) From 41a4a32449635bc919def848170b2319d2f94c2c Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 09:48:55 -0400 Subject: [PATCH 20/29] fix small formatting issue --- sklearn/utils/fixes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 3c4b5bbfadb34..bb0c2e1f7a1e9 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -36,7 +36,9 @@ def _parse_version(version_string): version.append(x) return tuple(version) -euler_gamma = getattr(np, 'euler_gamma', 0.577215664901532860606512090082402431) +euler_gamma = getattr(np, + 'euler_gamma', + 0.577215664901532860606512090082402431) np_version = _parse_version(np.__version__) sp_version = _parse_version(scipy.__version__) From 9cbae33747deca751c3710171b8bcdc30579ab22 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 10:18:40 -0400 Subject: [PATCH 21/29] fix small formatting issue --- sklearn/utils/fixes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index bb0c2e1f7a1e9..cbd944c76d74c 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -37,7 +37,7 @@ def _parse_version(version_string): return tuple(version) euler_gamma = getattr(np, - 'euler_gamma', + 'euler_gamma', 0.577215664901532860606512090082402431) np_version = _parse_version(np.__version__) From c9bba59b976192ee95416bcfeab8fb5a20c97632 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 19:38:31 -0400 Subject: [PATCH 22/29] modified average_path_length tests --- sklearn/ensemble/tests/test_iforest.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 518b18c544bb9..8b46eb927baf6 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -225,11 +225,9 @@ def test_iforest_average_path_length(): assert_almost_equal(_average_path_length(1), 1., decimal=10) assert_almost_equal(_average_path_length(5), result_one, decimal=10) assert_almost_equal(_average_path_length(999), result_two, decimal=10) - - -def test_average_path_length_arr_int(): - # It tests non-regression for #8549 for integer array equivalence - assert_almost_equal(_average_path_length(5), _average_path_length(np.array([5])), decimal=10) + assert_array_almost_equal(_average_path_length(np.array([1,5,999])), + [1., result_one, result_two], + decimal=10) From a36870fb7e6c42a7e716dad261f2d9dfc12bbc38 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 20:02:43 -0400 Subject: [PATCH 23/29] formatting fix + removed redundant tests --- sklearn/ensemble/tests/test_iforest.py | 8 +------- sklearn/utils/fixes.py | 3 +-- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 8b46eb927baf6..bd75dfa110eaf 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -222,12 +222,6 @@ def test_iforest_average_path_length(): result_one = 2. * (np.log(4.) + euler_gamma) - 2. * 4. / 5. result_two = 2. * (np.log(998.) + euler_gamma) - 2. * 998. / 999. - assert_almost_equal(_average_path_length(1), 1., decimal=10) - assert_almost_equal(_average_path_length(5), result_one, decimal=10) - assert_almost_equal(_average_path_length(999), result_two, decimal=10) - assert_almost_equal(_average_path_length(5), - _average_path_length(np.array([5])), - decimal=10) - assert_array_almost_equal(_average_path_length(np.array([1,5,999])), + assert_array_almost_equal(_average_path_length(np.array([1, 5, 999])), [1., result_one, result_two], decimal=10) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index cbd944c76d74c..d789d5f525cd4 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -36,8 +36,7 @@ def _parse_version(version_string): version.append(x) return tuple(version) -euler_gamma = getattr(np, - 'euler_gamma', +euler_gamma = getattr(np, 'euler_gamma', 0.577215664901532860606512090082402431) np_version = _parse_version(np.__version__) From 6d887f41c817d221c400946bb1a5b37f54c76671 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 20:24:58 -0400 Subject: [PATCH 24/29] fix import error --- sklearn/ensemble/tests/test_iforest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index bd75dfa110eaf..c81dbf5b74814 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -9,7 +9,6 @@ import numpy as np from sklearn.utils.fixes import euler_gamma -from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raises From e7f98a89c068b8676055dede83db41f57b29c373 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 23:11:13 -0400 Subject: [PATCH 25/29] retry remote server error --- sklearn/ensemble/tests/test_iforest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index c81dbf5b74814..cec2740fa5341 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -8,7 +8,7 @@ import numpy as np -from sklearn.utils.fixes import euler_gamma +from sklearn.utils.fixes import euler_gamm from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raises From 68b40a7232da19272f3cb6e68c8cff2910daf097 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 23:11:21 -0400 Subject: [PATCH 26/29] retry remote server error --- sklearn/ensemble/tests/test_iforest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index cec2740fa5341..c81dbf5b74814 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -8,7 +8,7 @@ import numpy as np -from sklearn.utils.fixes import euler_gamm +from sklearn.utils.fixes import euler_gamma from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raises From d3dc54389e1b8998c02fa43098c976fce33e243c Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 13 Mar 2017 23:30:30 -0400 Subject: [PATCH 27/29] retry remote server error --- sklearn/ensemble/tests/test_iforest.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index c81dbf5b74814..d6c2fcd0c6c25 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -222,5 +222,4 @@ def test_iforest_average_path_length(): result_one = 2. * (np.log(4.) + euler_gamma) - 2. * 4. / 5. result_two = 2. * (np.log(998.) + euler_gamma) - 2. * 998. / 999. assert_array_almost_equal(_average_path_length(np.array([1, 5, 999])), - [1., result_one, result_two], - decimal=10) + [1., result_one, result_two], decimal=10) From 2e040dccf0a873f33080798a2389d84f5288a629 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 14 Mar 2017 10:55:34 -0400 Subject: [PATCH 28/29] re-added some iforest tests --- sklearn/ensemble/tests/test_iforest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index d6c2fcd0c6c25..40f290597f7cb 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -9,6 +9,7 @@ import numpy as np from sklearn.utils.fixes import euler_gamma +from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raises @@ -221,5 +222,7 @@ def test_iforest_average_path_length(): result_one = 2. * (np.log(4.) + euler_gamma) - 2. * 4. / 5. result_two = 2. * (np.log(998.) + euler_gamma) - 2. * 998. / 999. + assert_almost_equal(_average_path_length(5), result_one, decimal=10) + assert_almost_equal(_average_path_length(999), result_two, decimal=10) assert_array_almost_equal(_average_path_length(np.array([1, 5, 999])), [1., result_one, result_two], decimal=10) From d2084b43375f849b64f2ae254f9834bf169eebd2 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Tue, 14 Mar 2017 10:57:05 -0400 Subject: [PATCH 29/29] re-added some iforest tests --- sklearn/ensemble/tests/test_iforest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 40f290597f7cb..0ade6195c618e 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -222,6 +222,7 @@ def test_iforest_average_path_length(): result_one = 2. * (np.log(4.) + euler_gamma) - 2. * 4. / 5. result_two = 2. * (np.log(998.) + euler_gamma) - 2. * 998. / 999. + assert_almost_equal(_average_path_length(1), 1., decimal=10) assert_almost_equal(_average_path_length(5), result_one, decimal=10) assert_almost_equal(_average_path_length(999), result_two, decimal=10) assert_array_almost_equal(_average_path_length(np.array([1, 5, 999])),