From ebe4d4464b07bb62aaf190fe6dc80af91bafa0c9 Mon Sep 17 00:00:00 2001 From: Brandon Carter Date: Sat, 17 Dec 2016 17:57:22 -0500 Subject: [PATCH 1/4] update reference based on #7861 --- sklearn/datasets/kddcup99.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index 03bf3f8d8fdef..82e12982db67f 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -155,9 +155,11 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None, Detection Evaluation Richard Lippmann, Joshua W. Haines, David J. Fried, Jonathan Korba, Kumar Das - .. [2] A Geometric Framework for Unsupervised Anomaly Detection: Detecting - Intrusions in Unlabeled Data (2002) by Eleazar Eskin, Andrew Arnold, - Michael Prerau, Leonid Portnoy, Sal Stolfo + .. [2] K. Yamanishi, J.-I. Takeuchi, G. Williams, and P. Milne. Online + unsupervised outlier detection using finite mixtures with + discounting learning algorithms. In Proceedings of the sixth + ACM SIGKDD international conference on Knowledge discovery + and data mining, pages 320–324. ACM Press, 2000. """ kddcup99 = _fetch_brute_kddcup99(shuffle=shuffle, percent10=percent10, From 226991885233234a3155ef8f206fc1bf67730909 Mon Sep 17 00:00:00 2001 From: Brandon Carter Date: Sat, 17 Dec 2016 18:52:38 -0500 Subject: [PATCH 2/4] replace a non-ascii character --- sklearn/datasets/kddcup99.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index 82e12982db67f..aa84ae5795d85 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -159,7 +159,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None, unsupervised outlier detection using finite mixtures with discounting learning algorithms. In Proceedings of the sixth ACM SIGKDD international conference on Knowledge discovery - and data mining, pages 320–324. ACM Press, 2000. + and data mining, pages 320-324. ACM Press, 2000. """ kddcup99 = _fetch_brute_kddcup99(shuffle=shuffle, percent10=percent10, From 1fb3eb3cd3ec9b4d8a64a9ece44d995b6aa066e2 Mon Sep 17 00:00:00 2001 From: Brandon Carter Date: Mon, 19 Dec 2016 00:09:53 -0500 Subject: [PATCH 3/4] fix default parameter value for percent10 in fetch_kddcup99 --- sklearn/datasets/kddcup99.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index aa84ae5795d85..e59b19c5be900 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -40,7 +40,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None, - percent10=True, download_if_missing=True): + percent10=False, download_if_missing=True): """Load and return the kddcup 99 dataset (classification). The KDD Cup '99 dataset was created by processing the tcpdump portions From 02bdd113b3ff22f7b822b48338f00acf62222484 Mon Sep 17 00:00:00 2001 From: Brandon Carter Date: Mon, 19 Dec 2016 13:14:43 -0500 Subject: [PATCH 4/4] percent10 now consistently true in helper and documentation for fetch_kddcup99 --- sklearn/datasets/kddcup99.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index e59b19c5be900..c2ed39caa10a6 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -40,12 +40,12 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None, - percent10=False, download_if_missing=True): + percent10=True, download_if_missing=True): """Load and return the kddcup 99 dataset (classification). The KDD Cup '99 dataset was created by processing the tcpdump portions of the 1998 DARPA Intrusion Detection System (IDS) Evaluation dataset, - created by MIT Lincoln Lab [1] . The artificial data was generated using + created by MIT Lincoln Lab [1]. The artificial data was generated using a closed network and hand-injected attacks to produce a large number of different types of attack with normal activity in the background. As the initial goal was to produce a large training set for supervised @@ -134,7 +134,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None, shuffle : bool, default=False Whether to shuffle dataset. - percent10 : bool, default=False + percent10 : bool, default=True Whether to load only 10 percent of the data. download_if_missing : bool, default=True @@ -216,7 +216,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None, def _fetch_brute_kddcup99(subset=None, data_home=None, download_if_missing=True, random_state=None, - shuffle=False, percent10=False): + shuffle=False, percent10=True): """Load the kddcup99 dataset, downloading it if necessary. @@ -244,7 +244,7 @@ def _fetch_brute_kddcup99(subset=None, data_home=None, shuffle : bool, default=False Whether to shuffle dataset. - percent10 : bool, default=False + percent10 : bool, default=True Whether to load only 10 percent of the data. Returns