From 14bae4411b29f1375c76212ecce94b6b04036d81 Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Fri, 14 Dec 2018 13:32:55 -0300 Subject: [PATCH 1/8] [WIP] DOC: fix document that fetch_20newsgroups Fix #12777 Related https://github.com/scikit-learn/scikit-learn/pull/12770/files#r241655146 --- sklearn/datasets/twenty_newsgroups.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 2a30efac03572..96f405b684287 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -216,11 +216,18 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, Returns ------- - bunch : Bunch object - bunch.data: list, length [n_samples] - bunch.target: array, shape [n_samples] - bunch.filenames: list, length [n_classes] - bunch.DESCR: a description of the dataset. + bunch : Bunch object with the following attribute: + + bunch.data: list, length [n_samples] + + bunch.target: array, shape [n_samples] + + bunch.filenames: list, length [n_classes] + + bunch.DESCR: a description of the dataset. + + bunch.target_names: a list of categories containing in the dataset, + length [n_categories]. This depends of the `categories` parameter. """ data_home = get_data_home(data_home=data_home) From d4b7c02d0a186ba3d5869ccb219d0c22ecb4dec1 Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Fri, 14 Dec 2018 16:15:28 -0300 Subject: [PATCH 2/8] improve docstring --- sklearn/datasets/twenty_newsgroups.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 96f405b684287..7e9c3253cd7a8 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -218,16 +218,16 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, ------- bunch : Bunch object with the following attribute: - bunch.data: list, length [n_samples] + - bunch.data: list, length [n_samples] - bunch.target: array, shape [n_samples] + - bunch.target: array, shape [n_samples] - bunch.filenames: list, length [n_classes] + - bunch.filenames: list, length [n_classes] - bunch.DESCR: a description of the dataset. + - bunch.DESCR: a description of the dataset. - bunch.target_names: a list of categories containing in the dataset, - length [n_categories]. This depends of the `categories` parameter. + - bunch.target_names: a list of categories of the returned data, + length [n_classes]. This depends of the `categories` parameter. """ data_home = get_data_home(data_home=data_home) @@ -376,11 +376,15 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None, Returns ------- - bunch : Bunch object - bunch.data: sparse matrix, shape [n_samples, n_features] - bunch.target: array, shape [n_samples] - bunch.target_names: list, length [n_classes] - bunch.DESCR: a description of the dataset. + bunch : Bunch object with the following attribute: + + - bunch.data: sparse matrix, shape [n_samples, n_features] + + - bunch.target: array, shape [n_samples] + + - bunch.target_names: list, length [n_classes] + + - bunch.DESCR: a description of the dataset. (data, target) : tuple if ``return_X_y`` is True From aaeaebf34a227afd15587fa373764525609585d5 Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Sat, 15 Dec 2018 22:00:40 -0300 Subject: [PATCH 3/8] fix docs --- sklearn/datasets/twenty_newsgroups.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 7e9c3253cd7a8..19e396a4dbbca 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -217,7 +217,6 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, Returns ------- bunch : Bunch object with the following attribute: - - bunch.data: list, length [n_samples] - bunch.target: array, shape [n_samples] @@ -377,7 +376,6 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None, Returns ------- bunch : Bunch object with the following attribute: - - bunch.data: sparse matrix, shape [n_samples, n_features] - bunch.target: array, shape [n_samples] From 0f6dac5823db6e3f6c4c22cdd42b6ea786acfff5 Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Sun, 16 Dec 2018 11:30:49 -0300 Subject: [PATCH 4/8] fix whitelines on bullets --- sklearn/datasets/twenty_newsgroups.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 19e396a4dbbca..3bac3801a8fa6 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -218,13 +218,9 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, ------- bunch : Bunch object with the following attribute: - bunch.data: list, length [n_samples] - - bunch.target: array, shape [n_samples] - - bunch.filenames: list, length [n_classes] - - bunch.DESCR: a description of the dataset. - - bunch.target_names: a list of categories of the returned data, length [n_classes]. This depends of the `categories` parameter. """ @@ -377,11 +373,8 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None, ------- bunch : Bunch object with the following attribute: - bunch.data: sparse matrix, shape [n_samples, n_features] - - bunch.target: array, shape [n_samples] - - bunch.target_names: list, length [n_classes] - - bunch.DESCR: a description of the dataset. (data, target) : tuple if ``return_X_y`` is True From 580b3fc108b14516d78b87866dd38f03a8feb758 Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Sun, 16 Dec 2018 13:38:20 -0300 Subject: [PATCH 5/8] fix doc --- sklearn/datasets/twenty_newsgroups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 3bac3801a8fa6..fb9c395f86ed9 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -222,7 +222,7 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, - bunch.filenames: list, length [n_classes] - bunch.DESCR: a description of the dataset. - bunch.target_names: a list of categories of the returned data, - length [n_classes]. This depends of the `categories` parameter. + length [n_classes]. This depends on the `categories` parameter. """ data_home = get_data_home(data_home=data_home) From 082fdc6d6abc48acd763650b476b2af837028f98 Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Sun, 16 Dec 2018 13:41:23 -0300 Subject: [PATCH 6/8] fix docs --- sklearn/datasets/twenty_newsgroups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index fb9c395f86ed9..3714bdf3ade23 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -374,7 +374,7 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None, bunch : Bunch object with the following attribute: - bunch.data: sparse matrix, shape [n_samples, n_features] - bunch.target: array, shape [n_samples] - - bunch.target_names: list, length [n_classes] + - bunch.target_names: list, length [n_samples] - bunch.DESCR: a description of the dataset. (data, target) : tuple if ``return_X_y`` is True From 0275d02e1eb51d1675473b6dc23994eac0927276 Mon Sep 17 00:00:00 2001 From: Emmanuel Arias Date: Sun, 16 Dec 2018 23:38:48 -0300 Subject: [PATCH 7/8] fix doc --- sklearn/datasets/twenty_newsgroups.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 3714bdf3ade23..1e92e7770e65a 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -374,7 +374,8 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None, bunch : Bunch object with the following attribute: - bunch.data: sparse matrix, shape [n_samples, n_features] - bunch.target: array, shape [n_samples] - - bunch.target_names: list, length [n_samples] + - bunch.target_names: a list of categories of the returned data, + length [n_classes]. - bunch.DESCR: a description of the dataset. (data, target) : tuple if ``return_X_y`` is True From 22f259ff57a5e563b78f75ba9b6ab7b39cda4af6 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sun, 23 Dec 2018 09:53:06 +0800 Subject: [PATCH 8/8] correction --- sklearn/datasets/twenty_newsgroups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 1e92e7770e65a..dd47d9962c42f 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -219,7 +219,7 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, bunch : Bunch object with the following attribute: - bunch.data: list, length [n_samples] - bunch.target: array, shape [n_samples] - - bunch.filenames: list, length [n_classes] + - bunch.filenames: list, length [n_samples] - bunch.DESCR: a description of the dataset. - bunch.target_names: a list of categories of the returned data, length [n_classes]. This depends on the `categories` parameter.