From 60d08b92601756efa11fd18e5c60312f1309a36d Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 15 Aug 2018 17:23:03 +1000 Subject: [PATCH 1/6] API Deprecate fetch_mldata and update examples --- doc/datasets/index.rst | 83 ------------------- doc/modules/classes.rst | 2 +- doc/whats_new/v0.20.rst | 4 + examples/gaussian_process/plot_gpr_co2.py | 49 +++++++++-- .../linear_model/plot_sgd_early_stopping.py | 6 +- .../plot_sparse_logistic_regression_mnist.py | 7 +- .../plot_classifier_chain_yeast.py | 12 +-- .../neural_networks/plot_mnist_filters.py | 12 +-- sklearn/datasets/mldata.py | 13 +++ sklearn/datasets/tests/test_mldata.py | 12 ++- sklearn/utils/testing.py | 8 ++ 11 files changed, 97 insertions(+), 111 deletions(-) diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst index 2f061aabe8d30..947e55f0c4c37 100644 --- a/doc/datasets/index.rst +++ b/doc/datasets/index.rst @@ -351,89 +351,6 @@ features:: _`Faster API-compatible implementation`: https://github.com/mblondel/svmlight-loader -.. - For doctests: - - >>> import numpy as np - >>> import os - >>> import tempfile - >>> # Create a temporary folder for the data fetcher - >>> custom_data_home = tempfile.mkdtemp() - >>> os.makedirs(os.path.join(custom_data_home, 'mldata')) - - -.. _mldata: - -Downloading datasets from the mldata.org repository ---------------------------------------------------- - -`mldata.org `_ is a public repository for machine learning -data, supported by the `PASCAL network `_ . - -The ``sklearn.datasets`` package is able to directly download data -sets from the repository using the function -:func:`sklearn.datasets.fetch_mldata`. - -For example, to download the MNIST digit recognition database:: - - >>> from sklearn.datasets import fetch_mldata - >>> mnist = fetch_mldata('MNIST original', data_home=custom_data_home) - -The MNIST database contains a total of 70000 examples of handwritten digits -of size 28x28 pixels, labeled from 0 to 9:: - - >>> mnist.data.shape - (70000, 784) - >>> mnist.target.shape - (70000,) - >>> np.unique(mnist.target) - array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) - -After the first download, the dataset is cached locally in the path -specified by the ``data_home`` keyword argument, which defaults to -``~/scikit_learn_data/``:: - - >>> os.listdir(os.path.join(custom_data_home, 'mldata')) - ['mnist-original.mat'] - -Data sets in `mldata.org `_ do not adhere to a strict -naming or formatting convention. :func:`sklearn.datasets.fetch_mldata` is -able to make sense of the most common cases, but allows to tailor the -defaults to individual datasets: - -* The data arrays in `mldata.org `_ are most often - shaped as ``(n_features, n_samples)``. This is the opposite of the - ``scikit-learn`` convention, so :func:`sklearn.datasets.fetch_mldata` - transposes the matrix by default. The ``transpose_data`` keyword controls - this behavior:: - - >>> iris = fetch_mldata('iris', data_home=custom_data_home) - >>> iris.data.shape - (150, 4) - >>> iris = fetch_mldata('iris', transpose_data=False, - ... data_home=custom_data_home) - >>> iris.data.shape - (4, 150) - -* For datasets with multiple columns, :func:`sklearn.datasets.fetch_mldata` - tries to identify the target and data columns and rename them to ``target`` - and ``data``. This is done by looking for arrays named ``label`` and - ``data`` in the dataset, and failing that by choosing the first array to be - ``target`` and the second to be ``data``. This behavior can be changed with - the ``target_name`` and ``data_name`` keywords, setting them to a specific - name or index number (the name and order of the columns in the datasets - can be found at its `mldata.org `_ under the tab "Data":: - - >>> iris2 = fetch_mldata('datasets-UCI iris', target_name=1, data_name=0, - ... data_home=custom_data_home) - >>> iris3 = fetch_mldata('datasets-UCI iris', target_name='class', - ... data_name='double0', data_home=custom_data_home) - - -.. - >>> import shutil - >>> shutil.rmtree(custom_data_home) - .. _external_datasets: Loading from external datasets diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index fb38eab8fde0d..57ccfb5cff704 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -257,7 +257,6 @@ Loaders datasets.fetch_kddcup99 datasets.fetch_lfw_pairs datasets.fetch_lfw_people - datasets.fetch_mldata datasets.fetch_olivetti_faces datasets.fetch_openml datasets.fetch_rcv1 @@ -1513,6 +1512,7 @@ To be removed in 0.22 :template: deprecated_function.rst covariance.graph_lasso + datasets.fetch_mldata To be removed in 0.21 diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index df346696734a0..d854d3cbc28fa 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -209,6 +209,10 @@ Support for Python 3.3 has been officially dropped. data points could be generated. :issue:`10045` by :user:`Christian Braune `. +- |API| Deprecated :func:`sklearn.datasets.fetch_mldata` to be removed in + version 0.22. MLData.org is no longer operational. Until removal it will + remain possible to load cached datasets. :issue:`11466` by `Joel Nothman`_. + :mod:`sklearn.decomposition` ............................ diff --git a/examples/gaussian_process/plot_gpr_co2.py b/examples/gaussian_process/plot_gpr_co2.py index b0b271a3644a2..8170de01898dc 100644 --- a/examples/gaussian_process/plot_gpr_co2.py +++ b/examples/gaussian_process/plot_gpr_co2.py @@ -8,7 +8,7 @@ hyperparameter optimization using gradient ascent on the log-marginal-likelihood. The data consists of the monthly average atmospheric CO2 concentrations (in parts per million by volume (ppmv)) collected at the -Mauna Loa Observatory in Hawaii, between 1958 and 1997. The objective is to +Mauna Loa Observatory in Hawaii, between 1958 and 2001. The objective is to model the CO2 concentration as a function of the time t. The kernel is composed of several terms that are responsible for explaining @@ -57,12 +57,12 @@ explained by the model. The figure shows also that the model makes very confident predictions until around 2015. """ -print(__doc__) - # Authors: Jan Hendrik Metzen # # License: BSD 3 clause +from __future__ import division, print_function + import numpy as np from matplotlib import pyplot as plt @@ -70,11 +70,46 @@ from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels \ import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared -from sklearn.datasets import fetch_mldata +try: + from urllib.request import urlopen +except ImportError: + # Python 2 + from urllib2 import urlopen + +print(__doc__) + -data = fetch_mldata('mauna-loa-atmospheric-co2').data -X = data[:, [1]] -y = data[:, 0] +def load_mauna_loa_atmospheric_c02(): + url = ('http://cdiac.ess-dive.lbl.gov/' + 'ftp/trends/co2/sio-keel-flask/maunaloa_c.dat') + months = [] + ppmv_sums = [] + counts = [] + for line in urlopen(url): + line = line.decode('utf8') + if not line.startswith('MLO'): + # ignore headers + continue + station, date, weight, flag, ppmv = line.split() + y = date[:2] + m = date[2:4] + month_float = (int(('20' if y < '20' else '19') + y) + + (int(m) - 1) / 12) + if not months or month_float != months[-1]: + months.append(month_float) + ppmv_sums.append(float(ppmv)) + counts.append(1) + else: + # aggregate monthly sum to produce average + ppmv_sums[-1] += float(ppmv) + counts[-1] += 1 + + months = np.asarray(months).reshape(-1, 1) + avg_ppmvs = np.asarray(ppmv_sums) / counts + return months, avg_ppmvs + + +X, y = load_mauna_loa_atmospheric_c02() # Kernel with parameters given in GPML book k1 = 66.0**2 * RBF(length_scale=67.0) # long term smooth rising trend diff --git a/examples/linear_model/plot_sgd_early_stopping.py b/examples/linear_model/plot_sgd_early_stopping.py index 31ce61f39d22c..3786a7aaef9b7 100644 --- a/examples/linear_model/plot_sgd_early_stopping.py +++ b/examples/linear_model/plot_sgd_early_stopping.py @@ -47,7 +47,7 @@ import matplotlib.pyplot as plt from sklearn import linear_model -from sklearn.datasets import fetch_mldata +from sklearn.datasets import fetch_openml from sklearn.model_selection import train_test_split from sklearn.utils.testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning @@ -56,9 +56,9 @@ print(__doc__) -def load_mnist(n_samples=None, class_0=0, class_1=8): +def load_mnist(n_samples=None, class_0='0', class_1='8'): """Load MNIST, select two classes, shuffle and return only n_samples.""" - mnist = fetch_mldata('MNIST original') + mnist = fetch_openml('mnist_784', version=1) # take only two classes for binary classification mask = np.logical_or(mnist.target == class_0, mnist.target == class_1) diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py index 5610f471b5d05..05fd976f5752f 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py +++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py @@ -20,7 +20,7 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.datasets import fetch_mldata +from sklearn.datasets import fetch_openml from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler @@ -35,9 +35,10 @@ t0 = time.time() train_samples = 5000 -mnist = fetch_mldata('MNIST original') -X = mnist.data.astype('float64') +mnist = fetch_openml('mnist_784', version=1) +X = mnist.data y = mnist.target + random_state = check_random_state(0) permutation = random_state.permutation(X.shape[0]) X = X[permutation] diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py index 6a90e14dfc379..414652500f2d7 100644 --- a/examples/multioutput/plot_classifier_chain_yeast.py +++ b/examples/multioutput/plot_classifier_chain_yeast.py @@ -32,24 +32,24 @@ with randomly ordered chains). """ -print(__doc__) - # Author: Adam Kleczewski # License: BSD 3 clause import numpy as np import matplotlib.pyplot as plt +from sklearn.datasets import fetch_openml from sklearn.multioutput import ClassifierChain from sklearn.model_selection import train_test_split from sklearn.multiclass import OneVsRestClassifier from sklearn.metrics import jaccard_similarity_score from sklearn.linear_model import LogisticRegression -from sklearn.datasets import fetch_mldata + +print(__doc__) # Load a multi-label dataset -yeast = fetch_mldata('yeast') -X = yeast['data'] -Y = yeast['target'].transpose().toarray() +yeast = fetch_openml('yeast', version=4) +X = yeast.data +Y = yeast.target == 'TRUE' X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2, random_state=0) diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py index 6c3b8b2284ea2..84af6eb26219f 100644 --- a/examples/neural_networks/plot_mnist_filters.py +++ b/examples/neural_networks/plot_mnist_filters.py @@ -20,15 +20,17 @@ for a very short time. Training longer would result in weights with a much smoother spatial appearance. """ -print(__doc__) - import matplotlib.pyplot as plt -from sklearn.datasets import fetch_mldata +from sklearn.datasets import fetch_openml from sklearn.neural_network import MLPClassifier -mnist = fetch_mldata("MNIST original") +print(__doc__) + +mnist = fetch_openml('mnist_784', version=1) +X = mnist.data +y = mnist.target + # rescale the data, use the traditional train/test split -X, y = mnist.data / 255., mnist.target X_train, X_test = X[:60000], X[60000:] y_train, y_test = y[:60000], y[60000:] diff --git a/sklearn/datasets/mldata.py b/sklearn/datasets/mldata.py index 1416208584634..8c2684988a182 100644 --- a/sklearn/datasets/mldata.py +++ b/sklearn/datasets/mldata.py @@ -25,13 +25,19 @@ from .base import get_data_home from ..utils import Bunch +from ..utils import deprecated MLDATA_BASE_URL = "http://mldata.org/repository/data/download/matlab/%s" +@deprecated('mldata_filename was deprecated in version 0.20 and will be ' + 'removed in version 0.22') def mldata_filename(dataname): """Convert a raw name for a data set in a mldata.org filename. + .. deprecated:: 0.20 + Will be removed in version 0.22 + Parameters ---------- dataname : str @@ -46,10 +52,14 @@ def mldata_filename(dataname): return re.sub(r'[().]', '', dataname) +@deprecated('fetch_mldata was deprecated in version 0.20 and will be removed ' + 'in version 0.22') def fetch_mldata(dataname, target_name='label', data_name='data', transpose_data=True, data_home=None): """Fetch an mldata.org data set + mldata.org is no longer operational. + If the file does not exist yet, it is downloaded from mldata.org . mldata.org does not have an enforced convention for storing data or @@ -70,6 +80,9 @@ def fetch_mldata(dataname, target_name='label', data_name='data', mldata.org data sets may have multiple columns, which are stored in the Bunch object with their original name. + .. deprecated:: 0.20 + Will be removed in version 0.22 + Parameters ---------- diff --git a/sklearn/datasets/tests/test_mldata.py b/sklearn/datasets/tests/test_mldata.py index 65e10a87818fa..3b956ecfa64b3 100644 --- a/sklearn/datasets/tests/test_mldata.py +++ b/sklearn/datasets/tests/test_mldata.py @@ -3,6 +3,7 @@ import os import scipy as sp import shutil +import warnings from sklearn import datasets from sklearn.datasets import mldata_filename, fetch_mldata @@ -13,6 +14,7 @@ from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_array_equal +from sklearn.utils.testing import assert_warns import pytest @@ -46,7 +48,8 @@ def test_download(tmpdata): }, }) try: - mock = fetch_mldata('mock', data_home=tmpdata) + mock = assert_warns(DeprecationWarning, fetch_mldata, + 'mock', data_home=tmpdata) for n in ["COL_NAMES", "DESCR", "target", "data"]: assert_in(n, mock) @@ -54,12 +57,14 @@ def test_download(tmpdata): assert_equal(mock.data.shape, (150, 4)) assert_raises(datasets.mldata.HTTPError, + assert_warns, DeprecationWarning, fetch_mldata, 'not_existing_name') finally: datasets.mldata.urlopen = _urlopen_ref -def test_fetch_one_column(tmpdata): +def test_fetch_one_column(tmpdata, recwarn): + warnings.simplefilter('ignore', DeprecationWarning) _urlopen_ref = datasets.mldata.urlopen try: dataname = 'onecol' @@ -82,7 +87,8 @@ def test_fetch_one_column(tmpdata): datasets.mldata.urlopen = _urlopen_ref -def test_fetch_multiple_column(tmpdata): +def test_fetch_multiple_column(tmpdata, recwarn): + warnings.simplefilter('ignore', DeprecationWarning) _urlopen_ref = datasets.mldata.urlopen try: # create fake data set in cache diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py index 8dc1188cfe037..c28d729883b4b 100644 --- a/sklearn/utils/testing.py +++ b/sklearn/utils/testing.py @@ -468,9 +468,13 @@ def assert_allclose_dense_sparse(x, y, rtol=1e-07, atol=1e-9, err_msg=''): " not a sparse matrix and an array.") +@deprecated('deprecated in version 0.20 to be removed in version 0.22') def fake_mldata(columns_dict, dataname, matfile, ordering=None): """Create a fake mldata data set. + .. deprecated:: 0.20 + Will be removed in version 0.22 + Parameters ---------- columns_dict : dict, keys=str, values=ndarray @@ -508,6 +512,7 @@ def fake_mldata(columns_dict, dataname, matfile, ordering=None): scipy.io.savemat(matfile, datasets, oned_as='column') +@deprecated('deprecated in version 0.20 to be removed in version 0.22') class mock_mldata_urlopen(object): """Object that mocks the urlopen function to fake requests to mldata. @@ -515,6 +520,9 @@ class mock_mldata_urlopen(object): creates a fake dataset in a StringIO object and returns it. Otherwise, it raises an HTTPError. + .. deprecated:: 0.20 + Will be removed in version 0.22 + Parameters ---------- mock_datasets : dict From 52491a1725e917e9242be6634a7af8f7799a4adc Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 16 Aug 2018 00:00:23 +1000 Subject: [PATCH 2/6] Use pytest's filterwarnings --- sklearn/datasets/tests/test_mldata.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/datasets/tests/test_mldata.py b/sklearn/datasets/tests/test_mldata.py index 3b956ecfa64b3..fb27ea1c0e700 100644 --- a/sklearn/datasets/tests/test_mldata.py +++ b/sklearn/datasets/tests/test_mldata.py @@ -28,6 +28,7 @@ def tmpdata(tmpdir_factory): shutil.rmtree(str(tmpdir)) +@pytest.mark.filterwarnings('ignore::DeprecationWarning') def test_mldata_filename(): cases = [('datasets-UCI iris', 'datasets-uci-iris'), ('news20.binary', 'news20binary'), @@ -38,6 +39,7 @@ def test_mldata_filename(): assert_equal(mldata_filename(name), desired) +@pytest.mark.filterwarnings('ignore::DeprecationWarning') def test_download(tmpdata): """Test that fetch_mldata is able to download and cache a data set.""" _urlopen_ref = datasets.mldata.urlopen @@ -63,8 +65,8 @@ def test_download(tmpdata): datasets.mldata.urlopen = _urlopen_ref -def test_fetch_one_column(tmpdata, recwarn): - warnings.simplefilter('ignore', DeprecationWarning) +@pytest.mark.filterwarnings('ignore::DeprecationWarning') +def test_fetch_one_column(tmpdata): _urlopen_ref = datasets.mldata.urlopen try: dataname = 'onecol' @@ -87,8 +89,8 @@ def test_fetch_one_column(tmpdata, recwarn): datasets.mldata.urlopen = _urlopen_ref -def test_fetch_multiple_column(tmpdata, recwarn): - warnings.simplefilter('ignore', DeprecationWarning) +@pytest.mark.filterwarnings('ignore::DeprecationWarning') +def test_fetch_multiple_column(tmpdata): _urlopen_ref = datasets.mldata.urlopen try: # create fake data set in cache From ea8f31b75de44e539ae96aef9bafc2931312f9f3 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Fri, 17 Aug 2018 11:28:12 +1000 Subject: [PATCH 3/6] Rm unused import --- sklearn/datasets/tests/test_mldata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/datasets/tests/test_mldata.py b/sklearn/datasets/tests/test_mldata.py index fb27ea1c0e700..be0d994e9b181 100644 --- a/sklearn/datasets/tests/test_mldata.py +++ b/sklearn/datasets/tests/test_mldata.py @@ -3,7 +3,6 @@ import os import scipy as sp import shutil -import warnings from sklearn import datasets from sklearn.datasets import mldata_filename, fetch_mldata From 1b40d08758b1b09f5cf8e81617a1e389f5d0b838 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 18 Aug 2018 19:40:46 +1000 Subject: [PATCH 4/6] Remove broken doctest --- sklearn/datasets/mldata.py | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/sklearn/datasets/mldata.py b/sklearn/datasets/mldata.py index 8c2684988a182..5948d04a8be80 100644 --- a/sklearn/datasets/mldata.py +++ b/sklearn/datasets/mldata.py @@ -112,40 +112,6 @@ def fetch_mldata(dataname, target_name='label', data_name='data', 'data', the data to learn, 'target', the classification labels, 'DESCR', the full description of the dataset, and 'COL_NAMES', the original names of the dataset columns. - - Examples - -------- - Load the 'iris' dataset from mldata.org: - - >>> from sklearn.datasets.mldata import fetch_mldata - >>> import tempfile - >>> test_data_home = tempfile.mkdtemp() - - >>> iris = fetch_mldata('iris', data_home=test_data_home) - >>> iris.target.shape - (150,) - >>> iris.data.shape - (150, 4) - - Load the 'leukemia' dataset from mldata.org, which needs to be transposed - to respects the scikit-learn axes convention: - - >>> leuk = fetch_mldata('leukemia', transpose_data=True, - ... data_home=test_data_home) - >>> leuk.data.shape - (72, 7129) - - Load an alternative 'iris' dataset, which has different names for the - columns: - - >>> iris2 = fetch_mldata('datasets-UCI iris', target_name=1, - ... data_name=0, data_home=test_data_home) - >>> iris3 = fetch_mldata('datasets-UCI iris', - ... target_name='class', data_name='double0', - ... data_home=test_data_home) - - >>> import shutil - >>> shutil.rmtree(test_data_home) """ # normalize dataset name From ee3784f07cc98c2115ccb092be76d3e0f12b7eb5 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 18 Aug 2018 19:44:42 +1000 Subject: [PATCH 5/6] Refer user to openml URL --- examples/linear_model/plot_sgd_early_stopping.py | 1 + examples/linear_model/plot_sparse_logistic_regression_mnist.py | 1 + examples/multioutput/plot_classifier_chain_yeast.py | 2 +- examples/neural_networks/plot_mnist_filters.py | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/linear_model/plot_sgd_early_stopping.py b/examples/linear_model/plot_sgd_early_stopping.py index 3786a7aaef9b7..4076fa5f6b28b 100644 --- a/examples/linear_model/plot_sgd_early_stopping.py +++ b/examples/linear_model/plot_sgd_early_stopping.py @@ -58,6 +58,7 @@ def load_mnist(n_samples=None, class_0='0', class_1='8'): """Load MNIST, select two classes, shuffle and return only n_samples.""" + # Load data from http://openml.org/d/554 mnist = fetch_openml('mnist_784', version=1) # take only two classes for binary classification diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py index 05fd976f5752f..7f5a328c08f0d 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py +++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py @@ -35,6 +35,7 @@ t0 = time.time() train_samples = 5000 +# Load data from https://www.openml.org/d/554 mnist = fetch_openml('mnist_784', version=1) X = mnist.data y = mnist.target diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py index 414652500f2d7..ea62eda756de3 100644 --- a/examples/multioutput/plot_classifier_chain_yeast.py +++ b/examples/multioutput/plot_classifier_chain_yeast.py @@ -46,7 +46,7 @@ print(__doc__) -# Load a multi-label dataset +# Load a multi-label dataset from https://www.openml.org/d/40597 yeast = fetch_openml('yeast', version=4) X = yeast.data Y = yeast.target == 'TRUE' diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py index 84af6eb26219f..bd8a9e96027a3 100644 --- a/examples/neural_networks/plot_mnist_filters.py +++ b/examples/neural_networks/plot_mnist_filters.py @@ -26,6 +26,7 @@ print(__doc__) +# Load data from https://www.openml.org/d/554 mnist = fetch_openml('mnist_784', version=1) X = mnist.data y = mnist.target From e9a59c7ff0d9b48b78a216e32334e08a94f16a97 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 18 Aug 2018 19:44:57 +1000 Subject: [PATCH 6/6] DOC whatsnew tweak --- doc/whats_new/v0.20.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index d854d3cbc28fa..faf5fb0c8bead 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -210,7 +210,7 @@ Support for Python 3.3 has been officially dropped. `. - |API| Deprecated :func:`sklearn.datasets.fetch_mldata` to be removed in - version 0.22. MLData.org is no longer operational. Until removal it will + version 0.22. mldata.org is no longer operational. Until removal it will remain possible to load cached datasets. :issue:`11466` by `Joel Nothman`_. :mod:`sklearn.decomposition`