diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
index 1316d596f50f1..1dcdec8550f9a 100644
--- a/doc/datasets/index.rst
+++ b/doc/datasets/index.rst
@@ -318,6 +318,7 @@ writing data in that format.
     olivetti_faces
     twenty_newsgroups
     mldata
+    openml
     labeled_faces
     covtype
     rcv1
@@ -328,6 +329,8 @@ writing data in that format.
 
 .. include:: twenty_newsgroups.rst
 
+.. include:: openml.rst
+
 .. include:: mldata.rst
 
 .. include:: labeled_faces.rst
diff --git a/doc/datasets/mldata.rst b/doc/datasets/mldata.rst
index b94dfd7620a24..60546bfcfd363 100644
--- a/doc/datasets/mldata.rst
+++ b/doc/datasets/mldata.rst
@@ -16,9 +16,10 @@ Downloading datasets from the mldata.org repository
 
 `mldata.org <http://mldata.org>`_ is a public repository for machine learning
 data, supported by the `PASCAL network <http://www.pascal-network.org>`_ .
+It is no longer actively maintained, and it's suggested to use :ref:`openml` instead.
 
-The ``sklearn.datasets`` package is able to directly download data
-sets from the repository using the function
+The ``sklearn.datasets`` package is able to directly download datasets
+from the repository using the function
 :func:`sklearn.datasets.fetch_mldata`.
 
 For example, to download the MNIST digit recognition database::
diff --git a/doc/datasets/openml.rst b/doc/datasets/openml.rst
new file mode 100644
index 0000000000000..60fc090cdecfc
--- /dev/null
+++ b/doc/datasets/openml.rst
@@ -0,0 +1,146 @@
+..
+    For doctests:
+
+    >>> import numpy as np
+    >>> import os
+    >>> import tempfile
+    >>> # Create a temporary folder for the data fetcher
+    >>> custom_data_home = tempfile.mkdtemp()
+    >>> os.makedirs(os.path.join(custom_data_home, 'openml'))
+
+
+.. _openml:
+
+Downloading datasets from the openml.org repository
+===================================================
+
+`openml.org <https://openml.org>`_ is a public repository for machine learning
+data and experiments, that allows everybody to upload open datasets.
+
+The ``sklearn.datasets`` package is able to directly download datasets
+from the repository using the function
+:func:`sklearn.datasets.fetch_openml`.
+
+For example, to download a dataset of gene expressions in mice brains::
+
+  >>> from sklearn.datasets import fetch_openml
+  >>> mice = fetch_openml('miceprotein', version=4, data_home=custom_data_home)
+
+To fully specify a dataset, you need to provide a name and a version, though the
+version is optional, see :ref:`openml_versions`_ below.
+The dataset contains a total of 1080 examples belonging to 8 different classes::
+
+  >>> mice.data.shape
+  (1080, 81)
+  >>> mice.target.shape
+  (1080,)
+  >>> np.unique(mice.target) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
+  array([b"'c-CS-m'", b"'c-CS-s'", b"'c-SC-m'", b"'c-SC-s'", b"'t-CS-m'",
+  b"'t-CS-s'", b"'t-SC-m'", b"'t-SC-s'"], dtype='|S8')
+
+You can get more information on the dataset by looking at the ``DESCR``
+and ``details`` attributes::
+
+  >>> print(mice.DESCR) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
+  **Author**: Clara Higuera, Katheleen J. Gardiner, Krzysztof J. Cios  
+  **Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Mice+Protein+Expression) - 2015   
+  **Please cite**: Higuera C, Gardiner KJ, Cios KJ (2015) Self-Organizing
+  Feature Maps Identify Proteins Critical to Learning in a Mouse Model of Down
+  Syndrome. PLoS ONE 10(6): e0129126...
+
+  >>> mice.details # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
+  {'id': '4550', 'name': 'MiceProtein', 'version': '1', 'format': 'ARFF',
+  'creator': ...,
+  'upload_date': '2016-02-17T14:32:49', 'licence': 'Public', 'url':
+  'https://www.openml.org/data/v1/download/1804243/MiceProtein.ARFF', 'file_id':
+  '1804243', 'default_target_attribute': 'class', 'citation': 'Higuera C,
+  Gardiner KJ, Cios KJ (2015) Self-Organizing Feature Maps Identify Proteins
+  Critical to Learning in a Mouse Model of Down Syndrome. PLoS ONE 10(6):
+  e0129126. [Web Link] journal.pone.0129126', 'tag': ['OpenML100', 'study_14',
+  'study_34'], 'visibility': 'public', 'status': 'active', 'md5_checksum':
+  '3c479a6885bfa0438971388283a1ce32'}
+
+
+The ``DESCR`` contains a free-text description of the data, while ``details``
+contains a dictionary of meta-data stored by openml, like the dataset id.
+The id of the mice protein dataset is 40966, and you can use this (or the name)
+to get more information on the dataset on the openml website::
+
+  >>> print(mice.url)
+  https://www.openml.org/d/40966
+
+The id is also the best way to specify how to fetch a dataset from OpenML::
+
+  >>> mice = fetch_openml(40966, data_home=custom_data_home)
+  >>> mice.details # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
+  {'id': '4550', 'name': 'MiceProtein', 'version': '1', 'format': 'ARFF',
+  'creator': ...,
+  'upload_date': '2016-02-17T14:32:49', 'licence': 'Public', 'url':
+  'https://www.openml.org/data/v1/download/1804243/MiceProtein.ARFF', 'file_id':
+  '1804243', 'default_target_attribute': 'class', 'citation': 'Higuera C,
+  Gardiner KJ, Cios KJ (2015) Self-Organizing Feature Maps Identify Proteins
+  Critical to Learning in a Mouse Model of Down Syndrome. PLoS ONE 10(6):
+  e0129126. [Web Link] journal.pone.0129126', 'tag': ['OpenML100', 'study_14',
+  'study_34'], 'visibility': 'public', 'status': 'active', 'md5_checksum':
+  '3c479a6885bfa0438971388283a1ce32'}
+
+.. _openml_versions:
+
+Dataset Versions
+----------------
+
+A dataset is uniquely specified by its id, but not necessarily by its name.
+Several different "versions" of a dataset with the same name can exist which can contain
+entirely different datasets.
+If a particular version of a dataset has been found to contain significant
+issues, it might be inactivated. Using a name to specify a dataset will yield
+the earliest version of a dataset that is still active. That means that
+``fetch_openml("miceprotein")`` can yield different results at different times
+if earlier versions become inactive.
+You can see that the dataset with id 40966 that we fetched above is the version 1
+of the "miceprotein" dataset::
+
+  >>> mice.details['version']  #doctest: +SKIP
+  '1'
+
+In fact, this dataset only has one version. The iris dataset on the other hand
+has multiple versions::
+
+  >>> iris = fetch_openml("iris", data_home=custom_data_home)
+  >>> iris.details['version']  #doctest: +SKIP
+  '1'
+  >>> iris.details['id']  #doctest: +SKIP
+  '61'
+
+  >>> iris_61 = fetch_openml(61, data_home=custom_data_home)
+  >>> iris_61.details['version']  #doctest: +SKIP
+  '1'
+  >>> iris_61.details['id']  #doctest: +SKIP
+  '61'
+
+  >>> iris_969 = fetch_openml(969, data_home=custom_data_home)
+  >>> iris_969.details['version']  #doctest: +SKIP
+  '3'
+  >>> iris_969.details['id']  #doctest: +SKIP
+  '969'
+
+Specifying the dataset by the name "iris" yields the lowest version, version 1, with the id 61.
+To make sure you always get this exact dataset, it is safest to specify it by the dataset id.
+The other dataset, with id 969, is version 3 (version 2 has become inactive), and contains
+a binarized version of the data::
+
+  >>> np.unique(iris_969.target)  #doctest: +SKIP
+  array([b'N', b'P'],
+        dtype='|S1')
+
+You can also specify both the name and the version, which also uniquely identifies the dataset:: 
+  >>> iris_version_3 = fetch_openml("iris", version=3, data_home=custom_data_home)
+  >>> iris_version_3.details['version']
+  '3'
+  >>> iris_version_3.details['id']
+  '969'
+
+
+..
+    >>> import shutil
+    >>> shutil.rmtree(custom_data_home)
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 2792ba8484664..2aad173950838 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -228,6 +228,7 @@ Loaders
    datasets.fetch_lfw_people
    datasets.fetch_mldata
    datasets.fetch_olivetti_faces
+   datasets.fetch_openml
    datasets.fetch_rcv1
    datasets.fetch_species_distributions
    datasets.get_data_home
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index b43f0ccdbbbdd..89b380622bc48 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -49,6 +49,9 @@ Classifiers and regressors
   Naive Bayes classifier described in Rennie et al. (2003).
   :issue:`8190` by :user:`Michael A. Alcorn <airalcorn2>`.
 
+- Added :class:`multioutput.RegressorChain` for multi-target
+  regression. :issue:`9257` by :user:`Kumar Ashutosh <thechargedneutron>`.
+
 Preprocessing
 
 - Added :class:`preprocessing.CategoricalEncoder`, which allows to encode
@@ -74,8 +77,11 @@ Model evaluation
   ``'balanced_accuracy'`` scorer for binary classification.
   :issue:`8066` by :user:`xyguo` and :user:`Aman Dalmia <dalmia>`.
 
-- Added :class:`multioutput.RegressorChain` for multi-target
-  regression. :issue:`9257` by :user:`Kumar Ashutosh <thechargedneutron>`.
+Datasets
+
+- Added :func:`dataset.fetch_openml` to fetch any dataset from `OpenML <http://openml.org>`.
+  OpenML is a free, open data sharing platform and will replace mldata, which
+  is no longer maintained. :issue:`9908` by `Andreas Müller`_
 
 Enhancements
 ............
diff --git a/setup.cfg b/setup.cfg
index f96e9cf9f85ab..0ca865a1e4648 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -7,6 +7,7 @@ test = pytest
 addopts =
     --doctest-modules
     --disable-pytest-warnings
+doctest_optionflags = NORMALIZE_WHITESPACE ALLOW_UNICODE
 
 [wheelhouse_uploader]
 artifact_indexes=
diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py
index c43c0c4758b10..c7d78e633493d 100644
--- a/sklearn/datasets/__init__.py
+++ b/sklearn/datasets/__init__.py
@@ -23,6 +23,7 @@
 from .twenty_newsgroups import fetch_20newsgroups
 from .twenty_newsgroups import fetch_20newsgroups_vectorized
 from .mldata import fetch_mldata, mldata_filename
+from .openml import fetch_openml
 from .samples_generator import make_classification
 from .samples_generator import make_multilabel_classification
 from .samples_generator import make_hastie_10_2
@@ -65,6 +66,7 @@
            'fetch_covtype',
            'fetch_rcv1',
            'fetch_kddcup99',
+           'fetch_openml',
            'get_data_home',
            'load_boston',
            'load_diabetes',
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
new file mode 100644
index 0000000000000..002935518c378
--- /dev/null
+++ b/sklearn/datasets/openml.py
@@ -0,0 +1,232 @@
+import json
+import numbers
+import sys
+import os
+from os.path import join, exists
+from warnings import warn
+
+try:
+    # Python 2
+    from urllib2 import urlopen
+except ImportError:
+    # Python 3+
+    from urllib.request import urlopen
+
+
+from scipy.io.arff import loadarff
+import numpy as np
+
+from .base import get_data_home
+from ..externals.joblib import Memory
+from ..externals.six import StringIO
+from ..externals.six.moves.urllib.error import HTTPError
+from ..utils import Bunch
+
+_SEARCH_NAME = "https://openml.org/api/v1/json/data/list/data_name/{}/limit/1"
+_DATA_INFO = "https://openml.org/api/v1/json/data/{}"
+_DATA_FEATURES = "https://openml.org/api/v1/json/data/features/{}"
+
+
+def _get_data_info_by_name(name, version):
+    data_found = True
+    try:
+        if version == "active":
+            json_string = urlopen(_SEARCH_NAME.format(name
+                                                      + "/status/active/"))
+        else:
+            json_string = urlopen(_SEARCH_NAME.format(name)
+                                  + "/data_version/{}".format(version))
+    except HTTPError as error:
+        if error.code == 412:
+            data_found = False
+        else:
+            raise error
+
+    if not data_found and version != "active":
+        # might have been deactivated. will warn later
+        data_found = True
+        try:
+            json_string = urlopen(_SEARCH_NAME.format(name) +
+                                  "/data_version/{}/status/deactivated".format(
+                                      version))
+        except HTTPError as error:
+            if error.code == 412:
+                data_found = False
+            else:
+                raise error
+
+    if not data_found:
+        # not in except for nicer traceback
+        if version == "active":
+            raise ValueError("No active dataset {} found.".format(name))
+        raise ValueError("Dataset {} with version {}"
+                         " not found.".format(name, version))
+
+    json_data = json.loads(json_string.read().decode("utf-8"))
+    return json_data['data']['dataset'][0]
+
+
+def _get_data_description_by_id(data_id):
+    data_found = True
+    try:
+        json_string = urlopen(_DATA_INFO.format(data_id))
+    except HTTPError as error:
+        if error.code == 412:
+            data_found = False
+    if not data_found:
+        # not in except for nicer traceback
+        raise ValueError("Dataset with id {} "
+                         "not found.".format(data_id))
+    json_data = json.loads(json_string.read().decode("utf-8"))
+    return json_data['data_set_description']
+
+
+def _get_data_features(data_id):
+    data_found = True
+    try:
+        json_string = urlopen(_DATA_FEATURES.format(data_id))
+    except HTTPError as error:
+        if error.code == 412:
+            data_found = False
+    if not data_found:
+        # not in except for nicer traceback
+        raise ValueError("Dataset with id {} "
+                         "not found.".format(data_id))
+    json_data = json.loads(json_string.read().decode("utf-8"))
+    return json_data['data_features']['feature']
+
+
+def _download_data(url):
+    response = urlopen(url)
+    if sys.version_info[0] == 2:
+        # Python2.7 numpy can't handle unicode?
+        arff = loadarff(StringIO(response.read()))
+    else:
+        arff = loadarff(StringIO(response.read().decode('utf-8')))
+
+    response.close()
+    return arff
+
+
+def _download_data_csv(file_id):
+    response = urlopen("https://openml.org/data/v1/get_csv/{}".format(file_id))
+    data = np.genfromtxt(response, names=True, dtype=None, delimiter=',',
+                         missing_values='?')
+    response.close()
+    return data
+
+
+def fetch_openml(name_or_id=None, version='active', data_home=None,
+                 target_column='default-target', memory=True):
+    """Fetch dataset from openml by name or dataset id.
+
+    Datasets are uniquely identified by either an integer ID or by a
+    combination of name and version (i.e. there might be multiple
+    versions of the 'iris' dataset).
+
+    Parameters
+    ----------
+    name_or_id : string or integer
+        Identifier of the dataset. If integer, assumed to be the id of the
+        dataset on OpenML, if string, assumed to be the name of the dataset.
+
+    version : integer or 'active', default='active'
+        Version of the dataset. Only used if ``name_or_id`` is a string.
+        If 'active' the oldest version that's still active is used.
+
+    data_home : string or None, default None
+        Specify another download and cache folder for the data sets. By default
+        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
+
+    target_column : string or None, default 'default-target'
+        Specify the column name in the data to use as target. If
+        'default-target', the standard target column a stored on the server
+        is used. If ``None``, all columns are returned as data and the
+        tharget is ``None``.
+
+    memory : boolean, default=True
+        Whether to store downloaded datasets using joblib.
+
+    Returns
+    -------
+
+    data : Bunch
+        Dictionary-like object, the interesting attributes are:
+        'data', the data to learn, 'target', the regression target or
+        classification labels, 'DESCR', the full description of the dataset,
+        'feature_names', the original names of the dataset columns, and
+        'details' which provide more information on the openml meta-data.
+    """
+    data_home = get_data_home(data_home=data_home)
+    data_home = join(data_home, 'openml')
+    if memory:
+        mem = Memory(join(data_home, 'cache'), verbose=0).cache
+    else:
+        def mem(func):
+            return func
+    _get_data_info_by_name_ = mem(_get_data_info_by_name)
+    _get_data_description_by_id_ = mem(_get_data_description_by_id)
+    _get_data_features_ = mem(_get_data_features)
+    _download_data_csv_ = mem(_download_data_csv)
+
+    if not exists(data_home):
+        os.makedirs(data_home)
+
+    # check if dataset id is known
+    if isinstance(name_or_id, numbers.Integral):
+        if version != "active":
+            raise ValueError(
+                "Dataset id={} and version={} passed, but you can only "
+                "specify a numeric id or a version, not both.".format(
+                    name_or_id, version))
+        data_id = name_or_id
+    elif isinstance(name_or_id, str):
+        data_info = _get_data_info_by_name_(name_or_id, version)
+        data_id = data_info['did']
+
+    else:
+        raise TypeError(
+            "Invalid name_or_id {}, should be string or integer.".format(
+                name_or_id))
+
+    data_description = _get_data_description_by_id_(data_id)
+    if data_description['status'] != "active":
+        warn("Version {} of dataset {} is inactive, meaning that issues have"
+             " been found in the dataset. Try using a newer version.".format(
+                 data_description['version'], data_description['name']))
+    if target_column == "default-target":
+        target_column = data_description.get('default_target_attribute', None)
+
+    # download actual data
+    features = _get_data_features_(data_id)
+    # TODO: stacking the content of the structured array
+    # this results in a copy. If the data was homogeneous
+    # and target at start or end, we could use a view instead.
+    data_columns = []
+    for feature in features:
+        if (feature['name'] != target_column and feature['is_ignore'] ==
+                'false' and feature['is_row_identifier'] == 'false'):
+            data_columns.append(feature['name'])
+
+    data = _download_data_csv_(data_description['file_id'])
+    if target_column is not None:
+        y = data[target_column]
+    else:
+        y = None
+
+    if all([feature['data_type'] == "numeric" for feature in features
+            if feature['name'] in data_columns]):
+        dtype = None
+    else:
+        dtype = object
+    X = np.array([data[c] for c in data_columns], dtype=dtype).T
+
+    description = u"{}\n\nDownloaded from openml.org.".format(
+        data_description.pop('description'))
+
+    bunch = Bunch(
+        data=X, target=y, feature_names=data_columns,
+        DESCR=description, details=data_description, features=features,
+        url="https://www.openml.org/d/{}".format(data_id))
+
+    return bunch
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
new file mode 100644
index 0000000000000..671c53a93349c
--- /dev/null
+++ b/sklearn/datasets/tests/test_openml.py
@@ -0,0 +1,36 @@
+"""Test the openml loader.
+
+Skipped on travis.
+"""
+
+from sklearn.datasets import fetch_openml
+from sklearn.utils.testing import (assert_warns_message,
+                                   assert_raise_message)
+
+
+def test_fetch_openml():
+    # check_skip_travis()
+    # fetch with version
+    iris_1 = fetch_openml("iris", version=1)
+    assert iris_1.details['id'] == '61'
+    # fetch without version
+    iris_1 = fetch_openml("iris")
+    assert iris_1.details['id'] == '61'
+    # fetch with dataset id
+    iris_by_id = fetch_openml(61)
+    assert iris_by_id.details['name'] == "iris"
+    assert iris_by_id.data.shape == (150, 4)
+    assert iris_by_id.target.shape == (150,)
+    # fetch inactive dataset by id
+    glas2 = assert_warns_message(
+        UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml,
+        40675)
+    # fetch inactive dataset by name and version
+    assert glas2.data.shape == (163, 9)
+    glas2_by_version = assert_warns_message(
+        UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml,
+        "glass2", 1)
+    # there is no active version of glass2
+    assert glas2_by_version.details['id'] == '40675'
+    assert_raise_message(ValueError, "No active dataset glass2 found",
+                         fetch_openml, 'glass2')