From 3ae41ff0368c5a9e804fc308c83781ba7203513c Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Tue, 6 Jun 2017 21:44:52 +0200
Subject: [PATCH 01/66] add test script to have docstrings consistent with
 function signatures

---
 .../utils/tests/test_docstring_parameters.py  | 186 ++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 sklearn/utils/tests/test_docstring_parameters.py

diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
new file mode 100644
index 0000000000000..6b7129f0463e5
--- /dev/null
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -0,0 +1,186 @@
+from __future__ import print_function
+
+from nose.tools import assert_true
+import inspect
+import warnings
+import importlib
+
+from pkgutil import walk_packages
+from inspect import getsource
+
+import sklearn
+from sklearn.utils.testing import SkipTest
+
+_doc_special_members = ('__contains__', '__getitem__', '__iter__', '__len__',
+                        '__call__', '__add__', '__sub__', '__mul__', '__div__',
+                        '__neg__', '__hash__')
+
+public_modules = [
+    # the list of modules users need to access for all functionality
+    # 'sklearn',
+    # 'sklearn.cluster',
+    # 'sklearn.covariance',
+    # 'sklearn.cross_decomposition',
+    'sklearn.datasets',
+    # 'sklearn.decomposition',
+    # 'sklearn.ensemble',
+    # 'sklearn.feature_extraction',
+    # 'sklearn.feature_selection',
+    # 'sklearn.gaussian_process',
+    # 'sklearn.linear_model',
+    # 'sklearn.manifold',
+    # 'sklearn.metrics',
+    # 'sklearn.mixture',
+    # 'sklearn.model_selection',
+    # 'sklearn.neighbors',
+    # 'sklearn.neural_network',
+    # 'sklearn.preprocessing',
+    # 'sklearn.semi_supervised',
+    # 'sklearn.tree',
+    # 'sklearn.utils',
+]
+
+
+# helpers to get function arguments
+if hasattr(inspect, 'signature'):  # py35
+    def _get_args(function, varargs=False):
+        params = inspect.signature(function).parameters
+        args = [key for key, param in params.items()
+                if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)]
+        if varargs:
+            varargs = [param.name for param in params.values()
+                       if param.kind == param.VAR_POSITIONAL]
+            if len(varargs) == 0:
+                varargs = None
+            return args, varargs
+        else:
+            return args
+else:
+    def _get_args(function, varargs=False):
+        out = inspect.getargspec(function)  # args, varargs, keywords, defaults
+        if varargs:
+            return out[:2]
+        else:
+            return out[0]
+
+
+def get_name(func):
+    parts = []
+    module = inspect.getmodule(func)
+    if module:
+        parts.append(module.__name__)
+    if hasattr(func, 'im_class'):
+        parts.append(func.im_class.__name__)
+    parts.append(func.__name__)
+    return '.'.join(parts)
+
+
+# functions to ignore args / docstring of
+_docstring_ignores = [
+    'sklearn.base.BaseEstimator',  # just an example
+]
+
+_tab_ignores = [
+]
+
+
+def check_parameters_match(func, doc=None):
+    """Helper to check docstring, returns list of incorrect results"""
+    from numpydoc import docscrape
+    incorrect = []
+    name_ = get_name(func)
+    if (not name_.startswith('sklearn.') or
+            name_.startswith('sklearn.externals')):
+        return incorrect
+    if inspect.isdatadescriptor(func):
+        return incorrect
+    args = _get_args(func)
+    # drop self
+    if len(args) > 0 and args[0] == 'self':
+        args = args[1:]
+
+    if doc is None:
+        with warnings.catch_warnings(record=True) as w:
+            try:
+                doc = docscrape.FunctionDoc(func)
+            except Exception as exp:
+                incorrect += [name_ + ' parsing error: ' + str(exp)]
+                return incorrect
+        if len(w):
+            raise RuntimeError('Error for %s:\n%s' % (name_, w[0]))
+    # check set
+    param_names = [name for name, _, _ in doc['Parameters']]
+    # clean up some docscrape output:
+    param_names = [name.split(':')[0].strip('` ') for name in param_names]
+    param_names = [name for name in param_names if '*' not in name]
+    if len(param_names) != len(args):
+        bad = str(sorted(list(set(param_names) - set(args)) +
+                         list(set(args) - set(param_names))))
+        if not any(d in name_ for d in _docstring_ignores) and \
+                'deprecation_wrapped' not in func.__code__.co_name:
+            incorrect += [name_ + ' arg mismatch: ' + bad]
+    else:
+        for n1, n2 in zip(param_names, args):
+            if n1 != n2:
+                incorrect += [name_ + ' ' + n1 + ' != ' + n2]
+    return incorrect
+
+
+def test_docstring_parameters():
+    """Test module docstring formatting."""
+    try:
+        import numpydoc  # noqa
+    except ImportError:
+        raise SkipTest(
+            "numpydoc is required to test the docstrings")
+
+    from numpydoc import docscrape
+
+    incorrect = []
+    for name in public_modules:
+        with warnings.catch_warnings(record=True):  # traits warnings
+            module = __import__(name, globals())
+        for submod in name.split('.')[1:]:
+            module = getattr(module, submod)
+        classes = inspect.getmembers(module, inspect.isclass)
+        for cname, cls in classes:
+            if cname.startswith('_') and cname not in _doc_special_members:
+                continue
+            with warnings.catch_warnings(record=True) as w:
+                cdoc = docscrape.ClassDoc(cls)
+            if len(w):
+                raise RuntimeError('Error for __init__ of %s in %s:\n%s'
+                                   % (cls, name, w[0]))
+            if hasattr(cls, '__init__'):
+                incorrect += check_parameters_match(cls.__init__, cdoc)
+            for method_name in cdoc.methods:
+                method = getattr(cls, method_name)
+                incorrect += check_parameters_match(method)
+            if hasattr(cls, '__call__'):
+                incorrect += check_parameters_match(cls.__call__)
+        functions = inspect.getmembers(module, inspect.isfunction)
+        for fname, func in functions:
+            if fname.startswith('_'):
+                continue
+            incorrect += check_parameters_match(func)
+    msg = '\n' + '\n'.join(sorted(list(set(incorrect))))
+    if len(incorrect) > 0:
+        raise AssertionError(msg)
+
+
+def test_tabs():
+    """Test that there are no tabs in our source files"""
+    ignore = _tab_ignores[:]
+
+    for importer, modname, ispkg in walk_packages(sklearn.__path__,
+                                                  prefix='sklearn.'):
+        # because we don't import e.g. mne.tests w/mne
+        if not ispkg and modname not in ignore:
+            mod = importlib.import_module(modname)
+            try:
+                source = getsource(mod)
+            except IOError:  # user probably should have run "make clean"
+                continue
+            assert_true('\t' not in source,
+                        '"%s" has tabs, please remove them or add it to the'
+                        'ignore list' % modname)

From 3a1b92573671743bdc6f52c23780bdaa5284204e Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Tue, 6 Jun 2017 21:55:07 +0200
Subject: [PATCH 02/66] fix docstrings in datasets

---
 sklearn/datasets/base.py                      | 23 +++++++++----
 sklearn/datasets/kddcup99.py                  |  6 ++--
 sklearn/datasets/mldata.py                    | 13 +++++++-
 sklearn/datasets/olivetti_faces.py            |  8 ++---
 sklearn/datasets/samples_generator.py         | 32 +++++++++++++------
 sklearn/datasets/svmlight_format.py           | 16 +++++-----
 sklearn/datasets/twenty_newsgroups.py         | 26 +++++++--------
 .../utils/tests/test_docstring_parameters.py  |  2 +-
 8 files changed, 79 insertions(+), 47 deletions(-)

diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py
index 5fa5668dafca8..b168ddc2f25a1 100644
--- a/sklearn/datasets/base.py
+++ b/sklearn/datasets/base.py
@@ -41,6 +41,11 @@ def get_data_home(data_home=None):
     '~' symbol is expanded to the user home folder.
 
     If the folder does not already exist, it is automatically created.
+
+    Parameters
+    ----------
+    data_home : str | None
+        The path to scikit-learn data dir.
     """
     if data_home is None:
         data_home = environ.get('SCIKIT_LEARN_DATA',
@@ -52,7 +57,13 @@ def get_data_home(data_home=None):
 
 
 def clear_data_home(data_home=None):
-    """Delete all the content of the data home cache."""
+    """Delete all the content of the data home cache.
+
+    Parameters
+    ----------
+    data_home : str | None
+        The path to scikit-learn data dir.
+    """
     data_home = get_data_home(data_home)
     shutil.rmtree(data_home)
 
@@ -118,6 +129,11 @@ def load_files(container_path, description=None, categories=None,
         in the data structure returned. If not, a filenames attribute
         gives the path to the files.
 
+    shuffle : bool, optional (default=True)
+        Whether or not to shuffle the data: might be important for models that
+        make the assumption that the samples are independent and identically
+        distributed (i.i.d.), such as stochastic gradient descent.
+
     encoding : string or None (default is None)
         If None, do not try to decode the content of the files (e.g. for
         images or other non-text content).
@@ -129,11 +145,6 @@ def load_files(container_path, description=None, categories=None,
         contains characters not of the given `encoding`. Passed as keyword
         argument 'errors' to bytes.decode.
 
-    shuffle : bool, optional (default=True)
-        Whether or not to shuffle the data: might be important for models that
-        make the assumption that the samples are independent and identically
-        distributed (i.i.d.), such as stochastic gradient descent.
-
     random_state : int, RandomState instance or None, optional (default=0)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index 762ca58a63554..27fcaf9227dc2 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -124,6 +124,9 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
         To return the corresponding classical subsets of kddcup 99.
         If None, return the entire kddcup 99 dataset.
 
+    shuffle : bool, default=False
+        Whether to shuffle dataset.
+
     random_state : int, RandomState instance or None, optional (default=None)
         Random state for shuffling the dataset.
         If int, random_state is the seed used by the random number generator;
@@ -131,9 +134,6 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    shuffle : bool, default=False
-        Whether to shuffle dataset.
-
     percent10 : bool, default=True
         Whether to load only 10 percent of the data.
 
diff --git a/sklearn/datasets/mldata.py b/sklearn/datasets/mldata.py
index 9b4a8e3a1daa3..9b20431e49d1c 100644
--- a/sklearn/datasets/mldata.py
+++ b/sklearn/datasets/mldata.py
@@ -30,7 +30,18 @@
 
 
 def mldata_filename(dataname):
-    """Convert a raw name for a data set in a mldata.org filename."""
+    """Convert a raw name for a data set in a mldata.org filename.
+
+    Parameters
+    ----------
+    dataname : str
+        Name of dataset
+
+    Returns
+    -------
+    fname : str
+        The converted dataname.
+    """
     dataname = dataname.lower().replace(' ', '-')
     return re.sub(r'[().]', '', dataname)
 
diff --git a/sklearn/datasets/olivetti_faces.py b/sklearn/datasets/olivetti_faces.py
index ac80d49e937d2..7ff3af6921230 100644
--- a/sklearn/datasets/olivetti_faces.py
+++ b/sklearn/datasets/olivetti_faces.py
@@ -67,16 +67,16 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
         If True the order of the dataset is shuffled to avoid having
         images of the same person grouped.
 
-    download_if_missing : optional, True by default
-        If False, raise a IOError if the data is not locally available
-        instead of trying to download the data from the source site.
-
     random_state : int, RandomState instance or None, optional (default=0)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
+    download_if_missing : optional, True by default
+        If False, raise a IOError if the data is not locally available
+        instead of trying to download the data from the source site.
+
     Returns
     -------
     An object with the following attributes:
diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
index 82ae355a7f4f2..c92dfcc9254ef 100644
--- a/sklearn/datasets/samples_generator.py
+++ b/sklearn/datasets/samples_generator.py
@@ -589,6 +589,12 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None,
     noise : double or None (default=None)
         Standard deviation of Gaussian noise added to the data.
 
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
     factor : double < 1 (default=.8)
         Scale factor between inner and outer circle.
 
@@ -643,6 +649,12 @@ def make_moons(n_samples=100, shuffle=True, noise=None, random_state=None):
     noise : double or None (default=None)
         Standard deviation of Gaussian noise added to the data.
 
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
     Returns
     -------
     X : array of shape [n_samples, 2]
@@ -1199,22 +1211,22 @@ def make_sparse_spd_matrix(dim=1, alpha=0.95, norm_diag=False,
         The probability that a coefficient is zero (see notes). Larger values
         enforce more sparsity.
 
+    norm_diag : boolean, optional (default=False)
+        Whether to normalize the output matrix to make the leading diagonal
+        elements all 1
+
+    smallest_coef : float between 0 and 1, optional (default=0.1)
+        The value of the smallest coefficient.
+
+    largest_coef : float between 0 and 1, optional (default=0.9)
+        The value of the largest coefficient.
+
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    largest_coef : float between 0 and 1, optional (default=0.9)
-        The value of the largest coefficient.
-
-    smallest_coef : float between 0 and 1, optional (default=0.1)
-        The value of the smallest coefficient.
-
-    norm_diag : boolean, optional (default=False)
-        Whether to normalize the output matrix to make the leading diagonal
-        elements all 1
-
     Returns
     -------
     prec : sparse matrix of shape (dim, dim)
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index a567e2091e1ab..d064c2eadc557 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -77,6 +77,10 @@ def load_svmlight_file(f, n_features=None, dtype=np.float64,
         every feature, hence the inferred shape might vary from one
         slice to another.
 
+    dtype : numpy data type, default np.float64
+        Data type of dataset to be loaded. This will be the data type of the
+        output numpy arrays ``X`` and ``y``.
+
     multilabel : boolean, optional, default False
         Samples may have several labels each (see
         http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
@@ -93,10 +97,6 @@ def load_svmlight_file(f, n_features=None, dtype=np.float64,
     query_id : boolean, default False
         If True, will return the query_id array for each file.
 
-    dtype : numpy data type, default np.float64
-        Data type of dataset to be loaded. This will be the data type of the
-        output numpy arrays ``X`` and ``y``.
-
     Returns
     -------
     X : scipy.sparse matrix of shape (n_samples, n_features)
@@ -205,6 +205,10 @@ def load_svmlight_files(files, n_features=None, dtype=np.float64,
         in any of the input files, but setting it to a lower value will cause
         an exception to be raised.
 
+    dtype : numpy data type, default np.float64
+        Data type of dataset to be loaded. This will be the data type of the
+        output numpy arrays ``X`` and ``y``.
+
     multilabel : boolean, optional
         Samples may have several labels each (see
         http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
@@ -221,10 +225,6 @@ def load_svmlight_files(files, n_features=None, dtype=np.float64,
     query_id : boolean, defaults to False
         If True, will return the query_id array for each file.
 
-    dtype : numpy data type, default np.float64
-        Data type of dataset to be loaded. This will be the data type of the
-        output numpy arrays ``X`` and ``y``.
-
     Returns
     -------
     [X1, y1, ..., Xn, yn]
diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py
index 47b543d8d2e16..fe838b1be5fd0 100644
--- a/sklearn/datasets/twenty_newsgroups.py
+++ b/sklearn/datasets/twenty_newsgroups.py
@@ -160,14 +160,14 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None,
 
     Parameters
     ----------
-    subset : 'train' or 'test', 'all', optional
-        Select the dataset to load: 'train' for the training set, 'test'
-        for the test set, 'all' for both, with shuffled ordering.
-
     data_home : optional, default: None
         Specify a download and cache folder for the datasets. If None,
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
+    subset : 'train' or 'test', 'all', optional
+        Select the dataset to load: 'train' for the training set, 'test'
+        for the test set, 'all' for both, with shuffled ordering.
+
     categories : None or collection of string or unicode
         If None (default), load all the categories.
         If not None, list of category names to load (other categories
@@ -181,10 +181,6 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None,
     random_state : numpy random number generator or seed integer
         Used to shuffle the dataset.
 
-    download_if_missing : optional, True by default
-        If False, raise an IOError if the data is not locally available
-        instead of trying to download the data from the source site.
-
     remove : tuple
         May contain any subset of ('headers', 'footers', 'quotes'). Each of
         these are kinds of text that will be detected and removed from the
@@ -197,6 +193,10 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None,
 
         'headers' follows an exact standard; the other filters are not always
         correct.
+
+    download_if_missing : optional, True by default
+        If False, raise an IOError if the data is not locally available
+        instead of trying to download the data from the source site.
     """
 
     data_home = get_data_home(data_home=data_home)
@@ -295,15 +295,10 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None):
 
     Parameters
     ----------
-
     subset : 'train' or 'test', 'all', optional
         Select the dataset to load: 'train' for the training set, 'test'
         for the test set, 'all' for both, with shuffled ordering.
 
-    data_home : optional, default: None
-        Specify an download and cache folder for the datasets. If None,
-        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
-
     remove : tuple
         May contain any subset of ('headers', 'footers', 'quotes'). Each of
         these are kinds of text that will be detected and removed from the
@@ -314,9 +309,12 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None):
         ends of posts that look like signatures, and 'quotes' removes lines
         that appear to be quoting another post.
 
+    data_home : optional, default: None
+        Specify an download and cache folder for the datasets. If None,
+        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
+
     Returns
     -------
-
     bunch : Bunch object
         bunch.data: sparse matrix, shape [n_samples, n_features]
         bunch.target: array, shape [n_samples]
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 6b7129f0463e5..9fb73f9eed1ee 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -77,7 +77,7 @@ def get_name(func):
 
 # functions to ignore args / docstring of
 _docstring_ignores = [
-    'sklearn.base.BaseEstimator',  # just an example
+    'sklearn.utils.deprecation.load_mlcomp',
 ]
 
 _tab_ignores = [

From 04da883055f1a06550b708b662ed15d14e1b3736 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 7 Jun 2017 17:43:49 +0200
Subject: [PATCH 03/66] add tests

---
 .../utils/tests/test_docstring_parameters.py  | 114 ++++++++++++++++++
 1 file changed, 114 insertions(+)

diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 9fb73f9eed1ee..0d9e275c18df2 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -10,6 +10,7 @@
 
 import sklearn
 from sklearn.utils.testing import SkipTest
+from sklearn.utils.testing import assert_raise_message
 
 _doc_special_members = ('__contains__', '__getitem__', '__iter__', '__len__',
                         '__call__', '__add__', '__sub__', '__mul__', '__div__',
@@ -126,6 +127,119 @@ def check_parameters_match(func, doc=None):
     return incorrect
 
 
+def f_ok(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_bad_sections(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Results
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_bad_order(b, a):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_missing(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+class Klass(object):
+    def f_missing(self, X, y):
+        pass
+
+    def f_bad_sections(self, X, y):
+        """Function f
+
+        Parameter
+        ----------
+        a : int
+            Parameter a
+        b : float
+            Parameter b
+
+        Results
+        -------
+        c : list
+            Parameter c
+        """
+        pass
+
+
+def test_check_parameters_match():
+    check_parameters_match(f_ok)
+    assert_raise_message(RuntimeError, 'Unknown section Results',
+                         check_parameters_match, f_bad_sections)
+    assert_raise_message(RuntimeError, 'Unknown section Parameter',
+                         check_parameters_match, Klass.f_bad_sections)
+
+    messages = ['a != b']
+    messages += ["arg mismatch: ['b']"]
+    messages += ["arg mismatch: ['X', 'y']"]
+    for mess, f in zip(messages, [f_bad_order, f_missing, Klass.f_missing]):
+        incorrect = check_parameters_match(f)
+        assert_true(len(incorrect) >= 1)
+        assert_true(mess in incorrect[0],
+                    '"%s" not in "%s"' % (mess, incorrect[0]))
+
+
 def test_docstring_parameters():
     """Test module docstring formatting."""
     try:

From 00ebcfc780dd3758a8780c7408260dd3da84d0d6 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 7 Jun 2017 17:51:01 +0200
Subject: [PATCH 04/66] update travis

---
 .travis.yml                   | 7 ++++---
 build_tools/travis/install.sh | 4 ++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 472b79b34d0b2..4b82207ad0baa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,7 +22,7 @@ matrix:
     # This environment tests that scikit-learn can be built against
     # versions of numpy, scipy with ATLAS that comes with Ubuntu Trusty 14.04
     - env: DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.4"
-           COVERAGE=true
+           COVERAGE=true TEST_DOCSTRINGS="false"
       addons:
         apt:
           packages:
@@ -33,12 +33,12 @@ matrix:
     # This environment tests the oldest supported anaconda env
     - env: DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
            NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.5"
-           COVERAGE=true
+           COVERAGE=true TEST_DOCSTRINGS="true"
     # This environment tests the newest supported Anaconda release (4.4.0)
     # It also runs tests requiring Pandas.
     - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true"
            NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1"
-           CYTHON_VERSION="0.25.2" COVERAGE=true
+           CYTHON_VERSION="0.25.2" COVERAGE=true TEST_DOCSTRINGS="false"
     # This environment use pytest to run the tests. It uses the newest
     # supported Anaconda release (4.4.0). It also runs tests requiring Pandas.
     # - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6.1"
@@ -48,6 +48,7 @@ matrix:
     - env: RUN_FLAKE8="true" SKIP_TESTS="true"
            DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
            NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.23.5"
+           TEST_DOCSTRINGS="true"
     # This environment tests scikit-learn against numpy and scipy master
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index fe0d46821e29d..8972cd783c451 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -84,6 +84,10 @@ if [[ "$COVERAGE" == "true" ]]; then
     pip install coverage codecov
 fi
 
+if [[ "$TEST_DOCSTRINGS" == "true" ]]; then
+    pip install numpydoc
+fi
+
 if [[ "$SKIP_TESTS" == "true" ]]; then
     echo "No need to build scikit-learn when not running the tests"
 else

From 0b2c0ebf1d303574a3152c8e5e25a75b1f7ca8dc Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 7 Jun 2017 18:45:17 +0200
Subject: [PATCH 05/66] refactor

---
 sklearn/utils/testing.py                      | 100 +++++++++
 .../utils/tests/test_docstring_parameters.py  | 197 +-----------------
 sklearn/utils/tests/test_testing.py           | 120 ++++++++++-
 3 files changed, 225 insertions(+), 192 deletions(-)

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 9a62b3c6a96fc..0ac0e6400575c 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -739,3 +739,103 @@ def __init__(self, check, arg_text):
 
     def __call__(self, *args, **kwargs):
         return self.check(*args, **kwargs)
+
+# Utils to test docstrings
+
+# helpers to get function arguments
+if hasattr(inspect, 'signature'):  # py35
+    def _get_args(function, varargs=False):
+        params = inspect.signature(function).parameters
+        args = [key for key, param in params.items()
+                if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)]
+        if varargs:
+            varargs = [param.name for param in params.values()
+                       if param.kind == param.VAR_POSITIONAL]
+            if len(varargs) == 0:
+                varargs = None
+            return args, varargs
+        else:
+            return args
+else:
+    def _get_args(function, varargs=False):
+        out = inspect.getargspec(function)  # args, varargs, keywords, defaults
+        if varargs:
+            return out[:2]
+        else:
+            return out[0]
+
+
+def get_func_name(func):
+    """Get function full name
+
+    Parameters
+    ----------
+    func : callable
+        The function object.
+
+    Returns
+    -------
+    name : str
+        The function name.
+    """
+    parts = []
+    module = inspect.getmodule(func)
+    if module:
+        parts.append(module.__name__)
+    if hasattr(func, 'im_class'):
+        parts.append(func.im_class.__name__)
+    parts.append(func.__name__)
+    return '.'.join(parts)
+
+
+def check_parameters_match(func, doc=None):
+    """Helper to check docstring
+
+    Parameters
+    ----------
+    func : callable
+        The function object to test.
+    doc : str
+        Pass evenually manually the docstring to test.
+
+    Returns
+    -------
+    incorrect : list
+        A list of string describing the incorrect results.
+    """
+    from numpydoc import docscrape
+    incorrect = []
+    name_ = get_func_name(func)
+    if (not name_.startswith('sklearn.') or
+            name_.startswith('sklearn.externals')):
+        return incorrect
+    if inspect.isdatadescriptor(func):
+        return incorrect
+    args = _get_args(func)
+    # drop self
+    if len(args) > 0 and args[0] == 'self':
+        args = args[1:]
+
+    if doc is None:
+        with warnings.catch_warnings(record=True) as w:
+            try:
+                doc = docscrape.FunctionDoc(func)
+            except Exception as exp:
+                incorrect += [name_ + ' parsing error: ' + str(exp)]
+                return incorrect
+        if len(w):
+            raise RuntimeError('Error for %s:\n%s' % (name_, w[0]))
+    # check set
+    param_names = [name for name, _, _ in doc['Parameters']]
+    # clean up some docscrape output:
+    param_names = [name.split(':')[0].strip('` ') for name in param_names]
+    param_names = [name for name in param_names if '*' not in name]
+    if len(param_names) != len(args):
+        bad = str(sorted(list(set(param_names) - set(args)) +
+                         list(set(args) - set(param_names))))
+        incorrect += [name_ + ' arg mismatch: ' + bad]
+    else:
+        for n1, n2 in zip(param_names, args):
+            if n1 != n2:
+                incorrect += [name_ + ' ' + n1 + ' != ' + n2]
+    return incorrect
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 0d9e275c18df2..1cf1bb2b94c33 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -10,7 +10,7 @@
 
 import sklearn
 from sklearn.utils.testing import SkipTest
-from sklearn.utils.testing import assert_raise_message
+from sklearn.utils.testing import check_parameters_match, get_func_name
 
 _doc_special_members = ('__contains__', '__getitem__', '__iter__', '__len__',
                         '__call__', '__add__', '__sub__', '__mul__', '__div__',
@@ -42,40 +42,6 @@
 ]
 
 
-# helpers to get function arguments
-if hasattr(inspect, 'signature'):  # py35
-    def _get_args(function, varargs=False):
-        params = inspect.signature(function).parameters
-        args = [key for key, param in params.items()
-                if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)]
-        if varargs:
-            varargs = [param.name for param in params.values()
-                       if param.kind == param.VAR_POSITIONAL]
-            if len(varargs) == 0:
-                varargs = None
-            return args, varargs
-        else:
-            return args
-else:
-    def _get_args(function, varargs=False):
-        out = inspect.getargspec(function)  # args, varargs, keywords, defaults
-        if varargs:
-            return out[:2]
-        else:
-            return out[0]
-
-
-def get_name(func):
-    parts = []
-    module = inspect.getmodule(func)
-    if module:
-        parts.append(module.__name__)
-    if hasattr(func, 'im_class'):
-        parts.append(func.im_class.__name__)
-    parts.append(func.__name__)
-    return '.'.join(parts)
-
-
 # functions to ignore args / docstring of
 _docstring_ignores = [
     'sklearn.utils.deprecation.load_mlcomp',
@@ -85,161 +51,6 @@ def get_name(func):
 ]
 
 
-def check_parameters_match(func, doc=None):
-    """Helper to check docstring, returns list of incorrect results"""
-    from numpydoc import docscrape
-    incorrect = []
-    name_ = get_name(func)
-    if (not name_.startswith('sklearn.') or
-            name_.startswith('sklearn.externals')):
-        return incorrect
-    if inspect.isdatadescriptor(func):
-        return incorrect
-    args = _get_args(func)
-    # drop self
-    if len(args) > 0 and args[0] == 'self':
-        args = args[1:]
-
-    if doc is None:
-        with warnings.catch_warnings(record=True) as w:
-            try:
-                doc = docscrape.FunctionDoc(func)
-            except Exception as exp:
-                incorrect += [name_ + ' parsing error: ' + str(exp)]
-                return incorrect
-        if len(w):
-            raise RuntimeError('Error for %s:\n%s' % (name_, w[0]))
-    # check set
-    param_names = [name for name, _, _ in doc['Parameters']]
-    # clean up some docscrape output:
-    param_names = [name.split(':')[0].strip('` ') for name in param_names]
-    param_names = [name for name in param_names if '*' not in name]
-    if len(param_names) != len(args):
-        bad = str(sorted(list(set(param_names) - set(args)) +
-                         list(set(args) - set(param_names))))
-        if not any(d in name_ for d in _docstring_ignores) and \
-                'deprecation_wrapped' not in func.__code__.co_name:
-            incorrect += [name_ + ' arg mismatch: ' + bad]
-    else:
-        for n1, n2 in zip(param_names, args):
-            if n1 != n2:
-                incorrect += [name_ + ' ' + n1 + ' != ' + n2]
-    return incorrect
-
-
-def f_ok(a, b):
-    """Function f
-
-    Parameters
-    ----------
-    a : int
-        Parameter a
-    b : float
-        Parameter b
-
-    Returns
-    -------
-    c : list
-        Parameter c
-    """
-    c = a + b
-    return c
-
-
-def f_bad_sections(a, b):
-    """Function f
-
-    Parameters
-    ----------
-    a : int
-        Parameter a
-    b : float
-        Parameter b
-
-    Results
-    -------
-    c : list
-        Parameter c
-    """
-    c = a + b
-    return c
-
-
-def f_bad_order(b, a):
-    """Function f
-
-    Parameters
-    ----------
-    a : int
-        Parameter a
-    b : float
-        Parameter b
-
-    Returns
-    -------
-    c : list
-        Parameter c
-    """
-    c = a + b
-    return c
-
-
-def f_missing(a, b):
-    """Function f
-
-    Parameters
-    ----------
-    a : int
-        Parameter a
-
-    Returns
-    -------
-    c : list
-        Parameter c
-    """
-    c = a + b
-    return c
-
-
-class Klass(object):
-    def f_missing(self, X, y):
-        pass
-
-    def f_bad_sections(self, X, y):
-        """Function f
-
-        Parameter
-        ----------
-        a : int
-            Parameter a
-        b : float
-            Parameter b
-
-        Results
-        -------
-        c : list
-            Parameter c
-        """
-        pass
-
-
-def test_check_parameters_match():
-    check_parameters_match(f_ok)
-    assert_raise_message(RuntimeError, 'Unknown section Results',
-                         check_parameters_match, f_bad_sections)
-    assert_raise_message(RuntimeError, 'Unknown section Parameter',
-                         check_parameters_match, Klass.f_bad_sections)
-
-    messages = ['a != b']
-    messages += ["arg mismatch: ['b']"]
-    messages += ["arg mismatch: ['X', 'y']"]
-    for mess, f in zip(messages, [f_bad_order, f_missing, Klass.f_missing]):
-        incorrect = check_parameters_match(f)
-        assert_true(len(incorrect) >= 1)
-        assert_true(mess in incorrect[0],
-                    '"%s" not in "%s"' % (mess, incorrect[0]))
-
-
 def test_docstring_parameters():
     """Test module docstring formatting."""
     try:
@@ -275,8 +86,12 @@ def test_docstring_parameters():
         functions = inspect.getmembers(module, inspect.isfunction)
         for fname, func in functions:
             if fname.startswith('_'):
+                # Don't test private methods / functions
                 continue
-            incorrect += check_parameters_match(func)
+            name_ = get_func_name(func)
+            if not any(d in name_ for d in _docstring_ignores) and \
+                    'deprecation_wrapped' not in func.__code__.co_name:
+                incorrect += check_parameters_match(func)
     msg = '\n' + '\n'.join(sorted(list(set(incorrect))))
     if len(incorrect) > 0:
         raise AssertionError(msg)
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 10657682e5cf1..47153a8348abb 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -3,6 +3,7 @@
 import sys
 
 from sklearn.utils.testing import (
+    assert_true,
     assert_raises,
     assert_less,
     assert_greater,
@@ -13,7 +14,9 @@
     assert_equal,
     set_random_state,
     assert_raise_message,
-    ignore_warnings)
+    ignore_warnings,
+    check_parameters_match
+    )
 
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
@@ -212,3 +215,118 @@ def f():
 
         if failed:
             raise AssertionError("wrong warning caught by assert_warn")
+
+
+# Tests for docstrings:
+
+def f_ok(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_bad_sections(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Results
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_bad_order(b, a):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_missing(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+class Klass(object):
+    def f_missing(self, X, y):
+        pass
+
+    def f_bad_sections(self, X, y):
+        """Function f
+
+        Parameter
+        ----------
+        a : int
+            Parameter a
+        b : float
+            Parameter b
+
+        Results
+        -------
+        c : list
+            Parameter c
+        """
+        pass
+
+
+def test_check_parameters_match():
+    check_parameters_match(f_ok)
+    assert_raise_message(RuntimeError, 'Unknown section Results',
+                         check_parameters_match, f_bad_sections)
+    assert_raise_message(RuntimeError, 'Unknown section Parameter',
+                         check_parameters_match, Klass.f_bad_sections)
+
+    messages = ['a != b']
+    messages += ["arg mismatch: ['b']"]
+    messages += ["arg mismatch: ['X', 'y']"]
+    for mess, f in zip(messages, [f_bad_order, f_missing, Klass.f_missing]):
+        incorrect = check_parameters_match(f)
+        assert_true(len(incorrect) >= 1)
+        assert_true(mess in incorrect[0],
+                    '"%s" not in "%s"' % (mess, incorrect[0]))

From 5495e0e2f2530bee3bb888aa071cdb64d82bcdf9 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 8 Jun 2017 16:30:40 +0200
Subject: [PATCH 06/66] make travis happy?

---
 build_tools/travis/install.sh       |  2 +-
 sklearn/utils/tests/test_testing.py | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index 8972cd783c451..0ed27b376b1b5 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -85,7 +85,7 @@ if [[ "$COVERAGE" == "true" ]]; then
 fi
 
 if [[ "$TEST_DOCSTRINGS" == "true" ]]; then
-    pip install numpydoc
+    pip install sphinx numpydoc  # numpydoc requires sphinx
 fi
 
 if [[ "$SKIP_TESTS" == "true" ]]; then
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 47153a8348abb..522c81336e12d 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -15,9 +15,9 @@
     set_random_state,
     assert_raise_message,
     ignore_warnings,
-    check_parameters_match
-    )
+    check_parameters_match)
 
+from sklearn.utils.testing import SkipTest
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 
@@ -316,6 +316,12 @@ def f_bad_sections(self, X, y):
 
 
 def test_check_parameters_match():
+    try:
+        import numpydoc  # noqa
+    except ImportError:
+        raise SkipTest(
+            "numpydoc is required to test the docstrings")
+
     check_parameters_match(f_ok)
     assert_raise_message(RuntimeError, 'Unknown section Results',
                          check_parameters_match, f_bad_sections)

From f3cefca95f197ca0797a89fe2976839bb2e29585 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 8 Jun 2017 17:06:16 +0200
Subject: [PATCH 07/66] do not crash when y=None for API reason

---
 sklearn/base.py                               | 52 +++++++++++++++++--
 sklearn/utils/testing.py                      |  8 ++-
 .../utils/tests/test_docstring_parameters.py  | 13 ++++-
 sklearn/utils/tests/test_testing.py           |  5 +-
 4 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 119696f5b3722..aa4f9f9ce17c1 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -428,6 +428,11 @@ def get_indices(self, i):
 
         Only works if ``rows_`` and ``columns_`` attributes exist.
 
+        Parameters
+        ----------
+        i : int
+            The index of the cluster.
+
         Returns
         -------
         row_ind : np.array, dtype=np.intp
@@ -443,6 +448,11 @@ def get_indices(self, i):
     def get_shape(self, i):
         """Shape of the i'th bicluster.
 
+        Parameters
+        ----------
+        i : int
+            The index of the cluster.
+
         Returns
         -------
         shape : (int, int)
@@ -454,9 +464,22 @@ def get_shape(self, i):
     def get_submatrix(self, i, data):
         """Returns the submatrix corresponding to bicluster `i`.
 
+        Parameters
+        ----------
+        i : int
+            The index of the cluster.
+        data : array
+            The data.
+
+        Returns
+        -------
+        submatrix : array
+            The submatrix corresponding to bicluster i.
+
+        Notes
+        -----
         Works with sparse matrices. Only works if ``rows_`` and
         ``columns_`` attributes exist.
-
         """
         from .utils.validation import check_array
         data = check_array(data, accept_sparse='csr')
@@ -525,10 +548,33 @@ class MetaEstimatorMixin(object):
 ###############################################################################
 
 def is_classifier(estimator):
-    """Returns True if the given estimator is (probably) a classifier."""
+    """Returns True if the given estimator is (probably) a classifier.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator object to test.
+
+    Returns
+    -------
+    out : bool
+        True if estimator is a classifier and False otherwise.
+    """
     return getattr(estimator, "_estimator_type", None) == "classifier"
 
 
 def is_regressor(estimator):
-    """Returns True if the given estimator is (probably) a regressor."""
+    """Returns True if the given estimator is (probably) a regressor.
+
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator object to test.
+
+    Returns
+    -------
+    out : bool
+        True if estimator is a regressor and False otherwise.
+    """
     return getattr(estimator, "_estimator_type", None) == "regressor"
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 0ac0e6400575c..bb1344b34fdaa 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -788,7 +788,7 @@ def get_func_name(func):
     return '.'.join(parts)
 
 
-def check_parameters_match(func, doc=None):
+def check_parameters_match(func, doc=None, ignore=None):
     """Helper to check docstring
 
     Parameters
@@ -797,6 +797,8 @@ def check_parameters_match(func, doc=None):
         The function object to test.
     doc : str
         Pass evenually manually the docstring to test.
+    ignore : None | list
+        Parameters to ignore.
 
     Returns
     -------
@@ -816,6 +818,10 @@ def check_parameters_match(func, doc=None):
     if len(args) > 0 and args[0] == 'self':
         args = args[1:]
 
+    if ignore is not None:
+        for p in ignore:
+            del args[args.index(p)]
+
     if doc is None:
         with warnings.catch_warnings(record=True) as w:
             try:
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 1cf1bb2b94c33..3174885ff8a88 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -9,6 +9,7 @@
 from inspect import getsource
 
 import sklearn
+from sklearn.base import signature
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import check_parameters_match, get_func_name
 
@@ -19,6 +20,7 @@
 public_modules = [
     # the list of modules users need to access for all functionality
     # 'sklearn',
+    'sklearn.base',
     # 'sklearn.cluster',
     # 'sklearn.covariance',
     # 'sklearn.cross_decomposition',
@@ -80,7 +82,16 @@ def test_docstring_parameters():
                 incorrect += check_parameters_match(cls.__init__, cdoc)
             for method_name in cdoc.methods:
                 method = getattr(cls, method_name)
-                incorrect += check_parameters_match(method)
+                param_ignore = None
+                # Now skip docstring test for y when y is None
+                # by default for API reason
+                if method_name in ['fit', 'score', 'fit_predict']:
+                    sig = signature(method)
+                    if ('y' in sig.parameters and
+                            sig.parameters['y'].default is None):
+                        param_ignore = ['y']  # ignore y for fit and score
+                incorrect += check_parameters_match(method,
+                                                    ignore=param_ignore)
             if hasattr(cls, '__call__'):
                 incorrect += check_parameters_match(cls.__call__)
         functions = inspect.getmembers(module, inspect.isfunction)
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 522c81336e12d..87837faf04796 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -322,7 +322,10 @@ def test_check_parameters_match():
         raise SkipTest(
             "numpydoc is required to test the docstrings")
 
-    check_parameters_match(f_ok)
+    incorrect = check_parameters_match(f_ok)
+    assert_equal(incorrect, [])
+    incorrect = check_parameters_match(f_missing, ignore=['b'])
+    assert_equal(incorrect, [])
     assert_raise_message(RuntimeError, 'Unknown section Results',
                          check_parameters_match, f_bad_sections)
     assert_raise_message(RuntimeError, 'Unknown section Parameter',

From a755044459fe0c390560e0df694693ba8a96600d Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 8 Jun 2017 17:23:44 +0200
Subject: [PATCH 08/66] more fixes

---
 sklearn/svm/classes.py                         | 14 +++++++-------
 sklearn/utils/__init__.py                      |  7 ++++++-
 sklearn/utils/deprecation.py                   | 18 +++++++++++-------
 sklearn/utils/testing.py                       |  9 ++++++++-
 .../utils/tests/test_docstring_parameters.py   | 11 ++++++++++-
 sklearn/utils/tests/test_testing.py            |  2 ++
 sklearn/utils/validation.py                    | 11 +++++++----
 7 files changed, 51 insertions(+), 21 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 3b919aeda0a93..37ad30eec5362 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -26,19 +26,16 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
 
     Parameters
     ----------
-    C : float, optional (default=1.0)
-        Penalty parameter C of the error term.
+    penalty : string, 'l1' or 'l2' (default='l2')
+        Specifies the norm used in the penalization. The 'l2'
+        penalty is the standard used in SVC. The 'l1' leads to ``coef_``
+        vectors that are sparse.
 
     loss : string, 'hinge' or 'squared_hinge' (default='squared_hinge')
         Specifies the loss function. 'hinge' is the standard SVM loss
         (used e.g. by the SVC class) while 'squared_hinge' is the
         square of the hinge loss.
 
-    penalty : string, 'l1' or 'l2' (default='l2')
-        Specifies the norm used in the penalization. The 'l2'
-        penalty is the standard used in SVC. The 'l1' leads to ``coef_``
-        vectors that are sparse.
-
     dual : bool, (default=True)
         Select the algorithm to either solve the dual or primal
         optimization problem. Prefer dual=False when n_samples > n_features.
@@ -46,6 +43,9 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
     tol : float, optional (default=1e-4)
         Tolerance for stopping criteria.
 
+    C : float, optional (default=1.0)
+        Penalty parameter C of the error term.
+
     multi_class : string, 'ovr' or 'crammer_singer' (default='ovr')
         Determines the multi-class strategy if `y` contains more than
         two classes.
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 69a1be10f089b..fc71c387903a3 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -463,7 +463,12 @@ def _get_n_jobs(n_jobs):
 
 
 def tosequence(x):
-    """Cast iterable x to a Sequence, avoiding a copy if possible."""
+    """Cast iterable x to a Sequence, avoiding a copy if possible.
+
+    Parameters
+    ----------
+    x : iterable
+    """
     if isinstance(x, np.ndarray):
         return np.asarray(x)
     elif isinstance(x, Sequence):
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index aa0caea2ce2b8..14ae308b29d0e 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -19,22 +19,26 @@ class deprecated(object):
 
     >>> @deprecated()
     ... def some_function(): pass
+
+    Parameters
+    ----------
+    extra : string
+          to be added to the deprecation messages
     """
 
     # Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary,
     # but with many changes.
 
     def __init__(self, extra=''):
-        """
-        Parameters
-        ----------
-        extra : string
-          to be added to the deprecation messages
-
-        """
         self.extra = extra
 
     def __call__(self, obj):
+        """Call method
+
+        Parameters
+        ----------
+        obj : object
+        """
         if isinstance(obj, type):
             return self._decorate_class(obj)
         else:
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index bb1344b34fdaa..adfccd5f23b9b 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -820,7 +820,7 @@ def check_parameters_match(func, doc=None, ignore=None):
 
     if ignore is not None:
         for p in ignore:
-            del args[args.index(p)]
+            args.remove(p)
 
     if doc is None:
         with warnings.catch_warnings(record=True) as w:
@@ -833,9 +833,16 @@ def check_parameters_match(func, doc=None, ignore=None):
             raise RuntimeError('Error for %s:\n%s' % (name_, w[0]))
     # check set
     param_names = [name for name, _, _ in doc['Parameters']]
+
     # clean up some docscrape output:
     param_names = [name.split(':')[0].strip('` ') for name in param_names]
     param_names = [name for name in param_names if '*' not in name]
+
+    if ignore is not None:
+        for p in ignore:
+            if p in param_names:
+                param_names.remove(p)
+
     if len(param_names) != len(args):
         bad = str(sorted(list(set(param_names) - set(args)) +
                          list(set(args) - set(param_names))))
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 3174885ff8a88..b7cea9f4c0796 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -21,6 +21,7 @@
     # the list of modules users need to access for all functionality
     # 'sklearn',
     'sklearn.base',
+    'sklearn.calibration',
     # 'sklearn.cluster',
     # 'sklearn.covariance',
     # 'sklearn.cross_decomposition',
@@ -30,14 +31,18 @@
     # 'sklearn.feature_extraction',
     # 'sklearn.feature_selection',
     # 'sklearn.gaussian_process',
+    'sklearn.isotonic',
     # 'sklearn.linear_model',
     # 'sklearn.manifold',
+    'sklearn.multiclass',
     # 'sklearn.metrics',
+    'sklearn.naive_bayes',
     # 'sklearn.mixture',
     # 'sklearn.model_selection',
     # 'sklearn.neighbors',
     # 'sklearn.neural_network',
     # 'sklearn.preprocessing',
+    'sklearn.pipeline',
     # 'sklearn.semi_supervised',
     # 'sklearn.tree',
     # 'sklearn.utils',
@@ -47,6 +52,9 @@
 # functions to ignore args / docstring of
 _docstring_ignores = [
     'sklearn.utils.deprecation.load_mlcomp',
+    'sklearn.pipeline.make_pipeline',
+    'sklearn.pipeline.make_union',
+    'sklearn.utils.extmath.safe_sparse_dot',
 ]
 
 _tab_ignores = [
@@ -85,7 +93,8 @@ def test_docstring_parameters():
                 param_ignore = None
                 # Now skip docstring test for y when y is None
                 # by default for API reason
-                if method_name in ['fit', 'score', 'fit_predict']:
+                if method_name in \
+                        ['fit', 'score', 'fit_predict', 'fit_transform']:
                     sig = signature(method)
                     if ('y' in sig.parameters and
                             sig.parameters['y'].default is None):
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 87837faf04796..e05f2650ae350 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -324,6 +324,8 @@ def test_check_parameters_match():
 
     incorrect = check_parameters_match(f_ok)
     assert_equal(incorrect, [])
+    incorrect = check_parameters_match(f_ok, ignore=['b'])
+    assert_equal(incorrect, [])
     incorrect = check_parameters_match(f_missing, ignore=['b'])
     assert_equal(incorrect, [])
     assert_raise_message(RuntimeError, 'Unknown section Results',
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 304d7610b0135..2a36c2a706c28 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -585,10 +585,13 @@ def column_or_1d(y, warn=False):
 def check_random_state(seed):
     """Turn seed into a np.random.RandomState instance
 
-    If seed is None, return the RandomState singleton used by np.random.
-    If seed is an int, return a new RandomState instance seeded with seed.
-    If seed is already a RandomState instance, return it.
-    Otherwise raise ValueError.
+    Parameters
+    ----------
+    seed : None | int | instance of RandomState
+        If seed is None, return the RandomState singleton used by np.random.
+        If seed is an int, return a new RandomState instance seeded with seed.
+        If seed is already a RandomState instance, return it.
+        Otherwise raise ValueError.
     """
     if seed is None or seed is np.random:
         return np.random.mtrand._rand

From 9fa05d84f758a4ff9068b0cc88dbf204439b5838 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 8 Jun 2017 17:33:16 +0200
Subject: [PATCH 09/66] more on metrics module

---
 sklearn/metrics/cluster/supervised.py         |  3 ++
 sklearn/metrics/pairwise.py                   | 50 ++++++++++---------
 sklearn/metrics/scorer.py                     | 12 +++++
 .../utils/tests/test_docstring_parameters.py  |  2 +-
 4 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
index 9115b93abefba..5c277cdbded08 100644
--- a/sklearn/metrics/cluster/supervised.py
+++ b/sklearn/metrics/cluster/supervised.py
@@ -813,6 +813,9 @@ def fowlkes_mallows_score(labels_true, labels_pred, sparse=False):
     labels_pred : array, shape = (``n_samples``, )
         A clustering of the data into disjoint subsets.
 
+    sparse : bool
+        Compute contingency matrix internally with sparse matrix.
+
     Returns
     -------
     score : float
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 9af3afd0c989c..01aeb7b51f591 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -273,16 +273,14 @@ def pairwise_distances_argmin_min(X, Y, axis=1, metric="euclidean",
 
     Parameters
     ----------
-    X, Y : {array-like, sparse matrix}
-        Arrays containing points. Respective shapes (n_samples1, n_features)
-        and (n_samples2, n_features)
+    X : {array-like, sparse matrix}, shape (n_samples1, n_features)
+        Array containing points.
 
-    batch_size : integer
-        To reduce memory consumption over the naive solution, data are
-        processed in batches, comprising batch_size rows of X and
-        batch_size rows of Y. The default value is quite conservative, but
-        can be changed for fine-tuning. The larger the number, the larger the
-        memory usage.
+    Y : {array-like, sparse matrix}, shape (n_samples2, n_features)
+        Arrays containing points.
+
+    axis : int, optional, default 1
+        Axis along which the argmin and distances are to be computed.
 
     metric : string or callable, default 'euclidean'
         metric to use for distance computation. Any metric from scikit-learn
@@ -310,12 +308,16 @@ def pairwise_distances_argmin_min(X, Y, axis=1, metric="euclidean",
         See the documentation for scipy.spatial.distance for details on these
         metrics.
 
+    batch_size : integer
+        To reduce memory consumption over the naive solution, data are
+        processed in batches, comprising batch_size rows of X and
+        batch_size rows of Y. The default value is quite conservative, but
+        can be changed for fine-tuning. The larger the number, the larger the
+        memory usage.
+
     metric_kwargs : dict, optional
         Keyword arguments to pass to specified metric function.
 
-    axis : int, optional, default 1
-        Axis along which the argmin and distances are to be computed.
-
     Returns
     -------
     argmin : numpy.ndarray
@@ -408,12 +410,8 @@ def pairwise_distances_argmin(X, Y, axis=1, metric="euclidean",
         Arrays containing points. Respective shapes (n_samples1, n_features)
         and (n_samples2, n_features)
 
-    batch_size : integer
-        To reduce memory consumption over the naive solution, data are
-        processed in batches, comprising batch_size rows of X and
-        batch_size rows of Y. The default value is quite conservative, but
-        can be changed for fine-tuning. The larger the number, the larger the
-        memory usage.
+    axis : int, optional, default 1
+        Axis along which the argmin and distances are to be computed.
 
     metric : string or callable
         metric to use for distance computation. Any metric from scikit-learn
@@ -441,12 +439,16 @@ def pairwise_distances_argmin(X, Y, axis=1, metric="euclidean",
         See the documentation for scipy.spatial.distance for details on these
         metrics.
 
+    batch_size : integer
+        To reduce memory consumption over the naive solution, data are
+        processed in batches, comprising batch_size rows of X and
+        batch_size rows of Y. The default value is quite conservative, but
+        can be changed for fine-tuning. The larger the number, the larger the
+        memory usage.
+
     metric_kwargs : dict
         keyword arguments to pass to specified metric function.
 
-    axis : int, optional, default 1
-        Axis along which the argmin and distances are to be computed.
-
     Returns
     -------
     argmin : numpy.ndarray
@@ -1349,6 +1351,9 @@ def pairwise_kernels(X, Y=None, metric="linear", filter_params=False,
         should take two arrays from X as input and return a value indicating
         the distance between them.
 
+    filter_params : boolean
+        Whether to filter invalid parameters or not.
+
     n_jobs : int
         The number of jobs to use for the computation. This works by breaking
         down the pairwise matrix into n_jobs even slices and computing them in
@@ -1359,9 +1364,6 @@ def pairwise_kernels(X, Y=None, metric="linear", filter_params=False,
         (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one
         are used.
 
-    filter_params : boolean
-        Whether to filter invalid parameters or not.
-
     **kwds : optional keyword parameters
         Any further parameters are passed directly to the kernel function.
 
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 3a163d967c542..870534a79db44 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -209,6 +209,18 @@ def _factory_args(self):
 
 
 def get_scorer(scoring):
+    """Get a scorer from string
+
+    Parameters
+    ----------
+    scoring : str | callable
+        scoring method as string. If callable it is returned as is.
+
+    Returns
+    -------
+    scorer : callable
+        The scorer.
+    """
     if isinstance(scoring, six.string_types):
         try:
             scorer = SCORERS[scoring]
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index b7cea9f4c0796..621ee4a2af431 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -35,7 +35,7 @@
     # 'sklearn.linear_model',
     # 'sklearn.manifold',
     'sklearn.multiclass',
-    # 'sklearn.metrics',
+    'sklearn.metrics',
     'sklearn.naive_bayes',
     # 'sklearn.mixture',
     # 'sklearn.model_selection',

From f3d9c1150b52eaa06cb9fb1080df27ff74a6c52a Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 8 Jun 2017 17:44:19 +0200
Subject: [PATCH 10/66] more

---
 sklearn/cluster/affinity_propagation_.py      |   6 +-
 sklearn/cluster/k_means_.py                   | 154 +++++++++---------
 sklearn/cluster/spectral.py                   |  48 +++---
 .../utils/tests/test_docstring_parameters.py  |   5 +-
 4 files changed, 107 insertions(+), 106 deletions(-)

diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py
index 1c9903dc2efe1..678cf7dcf6e76 100644
--- a/sklearn/cluster/affinity_propagation_.py
+++ b/sklearn/cluster/affinity_propagation_.py
@@ -198,13 +198,13 @@ class AffinityPropagation(BaseEstimator, ClusterMixin):
     damping : float, optional, default: 0.5
         Damping factor between 0.5 and 1.
 
+    max_iter : int, optional, default: 200
+        Maximum number of iterations.
+
     convergence_iter : int, optional, default: 15
         Number of iterations with no change in the number
         of estimated clusters that stops the convergence.
 
-    max_iter : int, optional, default: 200
-        Maximum number of iterations.
-
     copy : boolean, optional, default: True
         Make a copy of input data.
 
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index 5014279946637..142186c779e3e 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -181,14 +181,6 @@ def k_means(X, n_clusters, init='k-means++', precompute_distances='auto',
         The number of clusters to form as well as the number of
         centroids to generate.
 
-    max_iter : int, optional, default 300
-        Maximum number of iterations of the k-means algorithm to run.
-
-    n_init : int, optional, default: 10
-        Number of time the k-means algorithm will be run with different
-        centroid seeds. The final results will be the best output of
-        n_init consecutive runs in terms of inertia.
-
     init : {'k-means++', 'random', or ndarray, or a callable}, optional
         Method for initialization, default to 'k-means++':
 
@@ -205,12 +197,6 @@ def k_means(X, n_clusters, init='k-means++', precompute_distances='auto',
         If a callable is passed, it should take arguments X, k and
         and a random state and return an initialization.
 
-    algorithm : "auto", "full" or "elkan", default="auto"
-        K-means algorithm to use. The classical EM-style algorithm is "full".
-        The "elkan" variation is more efficient by using the triangle
-        inequality, but currently doesn't support sparse data. "auto" chooses
-        "elkan" for dense data and "full" for sparse data.
-
     precompute_distances : {'auto', True, False}
         Precompute distances (faster but takes more memory).
 
@@ -222,12 +208,20 @@ def k_means(X, n_clusters, init='k-means++', precompute_distances='auto',
 
         False : never precompute distances
 
-    tol : float, optional
-        The relative increment in the results before declaring convergence.
+    n_init : int, optional, default: 10
+        Number of time the k-means algorithm will be run with different
+        centroid seeds. The final results will be the best output of
+        n_init consecutive runs in terms of inertia.
+
+    max_iter : int, optional, default 300
+        Maximum number of iterations of the k-means algorithm to run.
 
     verbose : boolean, optional
         Verbosity mode.
 
+    tol : float, optional
+        The relative increment in the results before declaring convergence.
+
     random_state : int, RandomState instance or None, optional, default: None
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
@@ -250,6 +244,12 @@ def k_means(X, n_clusters, init='k-means++', precompute_distances='auto',
         (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one
         are used.
 
+    algorithm : "auto", "full" or "elkan", default="auto"
+        K-means algorithm to use. The classical EM-style algorithm is "full".
+        The "elkan" variation is more efficient by using the triangle
+        inequality, but currently doesn't support sparse data. "auto" chooses
+        "elkan" for dense data and "full" for sparse data.
+
     return_n_iter : bool, optional
         Whether or not to return the number of iterations.
 
@@ -716,15 +716,6 @@ class KMeans(BaseEstimator, ClusterMixin, TransformerMixin):
         The number of clusters to form as well as the number of
         centroids to generate.
 
-    max_iter : int, default: 300
-        Maximum number of iterations of the k-means algorithm for a
-        single run.
-
-    n_init : int, default: 10
-        Number of time the k-means algorithm will be run with different
-        centroid seeds. The final results will be the best output of
-        n_init consecutive runs in terms of inertia.
-
     init : {'k-means++', 'random' or an ndarray}
         Method for initialization, defaults to 'k-means++':
 
@@ -738,11 +729,17 @@ class KMeans(BaseEstimator, ClusterMixin, TransformerMixin):
         If an ndarray is passed, it should be of shape (n_clusters, n_features)
         and gives the initial centers.
 
-    algorithm : "auto", "full" or "elkan", default="auto"
-        K-means algorithm to use. The classical EM-style algorithm is "full".
-        The "elkan" variation is more efficient by using the triangle
-        inequality, but currently doesn't support sparse data. "auto" chooses
-        "elkan" for dense data and "full" for sparse data.
+    n_init : int, default: 10
+        Number of time the k-means algorithm will be run with different
+        centroid seeds. The final results will be the best output of
+        n_init consecutive runs in terms of inertia.
+
+    max_iter : int, default: 300
+        Maximum number of iterations of the k-means algorithm for a
+        single run.
+
+    tol : float, default: 1e-4
+        Relative tolerance with regards to inertia to declare convergence
 
     precompute_distances : {'auto', True, False}
         Precompute distances (faster but takes more memory).
@@ -755,17 +752,8 @@ class KMeans(BaseEstimator, ClusterMixin, TransformerMixin):
 
         False : never precompute distances
 
-    tol : float, default: 1e-4
-        Relative tolerance with regards to inertia to declare convergence
-
-    n_jobs : int
-        The number of jobs to use for the computation. This works by computing
-        each of the n_init runs in parallel.
-
-        If -1 all CPUs are used. If 1 is given, no parallel computing code is
-        used at all, which is useful for debugging. For n_jobs below -1,
-        (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one
-        are used.
+    verbose : int, default 0
+        Verbosity mode.
 
     random_state : int, RandomState instance or None, optional, default: None
         If int, random_state is the seed used by the random number generator;
@@ -773,9 +761,6 @@ class KMeans(BaseEstimator, ClusterMixin, TransformerMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    verbose : int, default 0
-        Verbosity mode.
-
     copy_x : boolean, default True
         When pre-computing distances it is more numerically accurate to center
         the data first.  If copy_x is True, then the original data is not
@@ -783,6 +768,21 @@ class KMeans(BaseEstimator, ClusterMixin, TransformerMixin):
         the function returns, but small numerical differences may be introduced
         by subtracting and then adding the data mean.
 
+    n_jobs : int
+        The number of jobs to use for the computation. This works by computing
+        each of the n_init runs in parallel.
+
+        If -1 all CPUs are used. If 1 is given, no parallel computing code is
+        used at all, which is useful for debugging. For n_jobs below -1,
+        (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one
+        are used.
+
+    algorithm : "auto", "full" or "elkan", default="auto"
+        K-means algorithm to use. The classical EM-style algorithm is "full".
+        The "elkan" variation is more efficient by using the triangle
+        inequality, but currently doesn't support sparse data. "auto" chooses
+        "elkan" for dense data and "full" for sparse data.
+
     Attributes
     ----------
     cluster_centers_ : array, [n_clusters, n_features]
@@ -1196,16 +1196,38 @@ class MiniBatchKMeans(KMeans):
         The number of clusters to form as well as the number of
         centroids to generate.
 
+    init : {'k-means++', 'random' or an ndarray}, default: 'k-means++'
+        Method for initialization, defaults to 'k-means++':
+
+        'k-means++' : selects initial cluster centers for k-mean
+        clustering in a smart way to speed up convergence. See section
+        Notes in k_init for more details.
+
+        'random': choose k observations (rows) at random from data for
+        the initial centroids.
+
+        If an ndarray is passed, it should be of shape (n_clusters, n_features)
+        and gives the initial centers.
+
     max_iter : int, optional
         Maximum number of iterations over the complete dataset before
         stopping independently of any early stopping criterion heuristics.
 
-    max_no_improvement : int, default: 10
-        Control early stopping based on the consecutive number of mini
-        batches that does not yield an improvement on the smoothed inertia.
+    batch_size : int, optional, default: 100
+        Size of the mini batches.
 
-        To disable convergence detection based on inertia, set
-        max_no_improvement to None.
+    verbose : boolean, optional
+        Verbosity mode.
+
+    compute_labels : boolean, default=True
+        Compute label assignment and inertia for the complete dataset
+        once the minibatch optimization has converged in fit.
+
+    random_state : int, RandomState instance or None, optional, default: None
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
 
     tol : float, default: 0.0
         Control early stopping based on the relative center changes as
@@ -1218,8 +1240,12 @@ class MiniBatchKMeans(KMeans):
         To disable convergence detection based on normalized center
         change, set tol to 0.0 (default).
 
-    batch_size : int, optional, default: 100
-        Size of the mini batches.
+    max_no_improvement : int, default: 10
+        Control early stopping based on the consecutive number of mini
+        batches that does not yield an improvement on the smoothed inertia.
+
+        To disable convergence detection based on inertia, set
+        max_no_improvement to None.
 
     init_size : int, optional, default: 3 * batch_size
         Number of samples to randomly sample for speeding up the
@@ -1227,34 +1253,11 @@ class MiniBatchKMeans(KMeans):
         only algorithm is initialized by running a batch KMeans on a
         random subset of the data. This needs to be larger than n_clusters.
 
-    init : {'k-means++', 'random' or an ndarray}, default: 'k-means++'
-        Method for initialization, defaults to 'k-means++':
-
-        'k-means++' : selects initial cluster centers for k-mean
-        clustering in a smart way to speed up convergence. See section
-        Notes in k_init for more details.
-
-        'random': choose k observations (rows) at random from data for
-        the initial centroids.
-
-        If an ndarray is passed, it should be of shape (n_clusters, n_features)
-        and gives the initial centers.
-
     n_init : int, default=3
         Number of random initializations that are tried.
         In contrast to KMeans, the algorithm is only run once, using the
         best of the ``n_init`` initializations as measured by inertia.
 
-    compute_labels : boolean, default=True
-        Compute label assignment and inertia for the complete dataset
-        once the minibatch optimization has converged in fit.
-
-    random_state : int, RandomState instance or None, optional, default: None
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
-
     reassignment_ratio : float, default: 0.01
         Control the fraction of the maximum number of counts for a
         center to be reassigned. A higher value means that low count
@@ -1262,9 +1265,6 @@ class MiniBatchKMeans(KMeans):
         model will take longer to converge, but should converge in a
         better clustering.
 
-    verbose : boolean, optional
-        Verbosity mode.
-
     Attributes
     ----------
 
diff --git a/sklearn/cluster/spectral.py b/sklearn/cluster/spectral.py
index d6caa0ae05667..5f5f0a4e9d452 100644
--- a/sklearn/cluster/spectral.py
+++ b/sklearn/cluster/spectral.py
@@ -300,30 +300,6 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
     n_clusters : integer, optional
         The dimension of the projection subspace.
 
-    affinity : string, array-like or callable, default 'rbf'
-        If a string, this may be one of 'nearest_neighbors', 'precomputed',
-        'rbf' or one of the kernels supported by
-        `sklearn.metrics.pairwise_kernels`.
-
-        Only kernels that produce similarity scores (non-negative values that
-        increase with similarity) should be used. This property is not checked
-        by the clustering algorithm.
-
-    gamma : float, default=1.0
-        Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.
-        Ignored for ``affinity='nearest_neighbors'``.
-
-    degree : float, default=3
-        Degree of the polynomial kernel. Ignored by other kernels.
-
-    coef0 : float, default=1
-        Zero coefficient for polynomial and sigmoid kernels.
-        Ignored by other kernels.
-
-    n_neighbors : integer
-        Number of neighbors to use when constructing the affinity matrix using
-        the nearest neighbors method. Ignored for ``affinity='rbf'``.
-
     eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
         The eigenvalue decomposition strategy to use. AMG requires pyamg
         to be installed. It can be faster on very large, sparse problems,
@@ -342,6 +318,23 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
         centroid seeds. The final results will be the best output of
         n_init consecutive runs in terms of inertia.
 
+    gamma : float, default=1.0
+        Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.
+        Ignored for ``affinity='nearest_neighbors'``.
+
+    affinity : string, array-like or callable, default 'rbf'
+        If a string, this may be one of 'nearest_neighbors', 'precomputed',
+        'rbf' or one of the kernels supported by
+        `sklearn.metrics.pairwise_kernels`.
+
+        Only kernels that produce similarity scores (non-negative values that
+        increase with similarity) should be used. This property is not checked
+        by the clustering algorithm.
+
+    n_neighbors : integer
+        Number of neighbors to use when constructing the affinity matrix using
+        the nearest neighbors method. Ignored for ``affinity='rbf'``.
+
     eigen_tol : float, optional, default: 0.0
         Stopping criterion for eigendecomposition of the Laplacian matrix
         when using arpack eigen_solver.
@@ -353,6 +346,13 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
         also be sensitive to initialization. Discretization is another approach
         which is less sensitive to random initialization.
 
+    degree : float, default=3
+        Degree of the polynomial kernel. Ignored by other kernels.
+
+    coef0 : float, default=1
+        Zero coefficient for polynomial and sigmoid kernels.
+        Ignored by other kernels.
+
     kernel_params : dictionary of string to any, optional
         Parameters (keyword arguments) and values for kernel passed as
         callable object. Ignored by other kernels.
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 621ee4a2af431..95131bdf69d1e 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -22,7 +22,7 @@
     # 'sklearn',
     'sklearn.base',
     'sklearn.calibration',
-    # 'sklearn.cluster',
+    'sklearn.cluster',
     # 'sklearn.covariance',
     # 'sklearn.cross_decomposition',
     'sklearn.datasets',
@@ -94,7 +94,8 @@ def test_docstring_parameters():
                 # Now skip docstring test for y when y is None
                 # by default for API reason
                 if method_name in \
-                        ['fit', 'score', 'fit_predict', 'fit_transform']:
+                        ['fit', 'score', 'fit_predict', 'fit_transform',
+                         'partial_fit']:
                     sig = signature(method)
                     if ('y' in sig.parameters and
                             sig.parameters['y'].default is None):

From 42b2a2d20ea087752385337639b9383cae45aedf Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 8 Jun 2017 17:51:21 +0200
Subject: [PATCH 11/66] more

---
 sklearn/cluster/hierarchical.py | 19 +++++++++++--------
 sklearn/cluster/k_means_.py     | 20 ++++++++++++++++++++
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
index 2195fe8ee3d85..9eca8176d12e8 100644
--- a/sklearn/cluster/hierarchical.py
+++ b/sklearn/cluster/hierarchical.py
@@ -312,6 +312,9 @@ def linkage_tree(X, connectivity=None, n_components=None,
         be symmetric and only the upper triangular half is used.
         Default is None, i.e, the Ward algorithm is unstructured.
 
+    n_components : int (optional)
+        The number of connected components in the graph.
+
     n_clusters : int (optional)
         Stop early the construction of the tree at n_clusters. This is
         useful to decrease computation time if the number of clusters is
@@ -596,14 +599,6 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
     n_clusters : int, default=2
         The number of clusters to find.
 
-    connectivity : array-like or callable, optional
-        Connectivity matrix. Defines for each sample the neighboring
-        samples following a given structure of the data.
-        This can be a connectivity matrix itself or a callable that transforms
-        the data into a connectivity matrix, such as derived from
-        kneighbors_graph. Default is None, i.e, the
-        hierarchical clustering algorithm is unstructured.
-
     affinity : string or callable, default: "euclidean"
         Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
         "manhattan", "cosine", or 'precomputed'.
@@ -615,6 +610,14 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
         By default, no caching is done. If a string is given, it is the
         path to the caching directory.
 
+    connectivity : array-like or callable, optional
+        Connectivity matrix. Defines for each sample the neighboring
+        samples following a given structure of the data.
+        This can be a connectivity matrix itself or a callable that transforms
+        the data into a connectivity matrix, such as derived from
+        kneighbors_graph. Default is None, i.e, the
+        hierarchical clustering algorithm is unstructured.
+
     compute_full_tree : bool or 'auto' (optional)
         Stop early the construction of the tree at n_clusters. This is
         useful to decrease computation time if the number of clusters is
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index 142186c779e3e..af2fc67e083db 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -898,6 +898,16 @@ def fit_predict(self, X, y=None):
 
         Convenience method; equivalent to calling fit(X) followed by
         predict(X).
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+            New data to transform.
+
+        Returns
+        -------
+        labels : array, shape [n_samples,]
+            Index of the cluster each sample belongs to.
         """
         return self.fit(X).labels_
 
@@ -905,6 +915,16 @@ def fit_transform(self, X, y=None):
         """Compute clustering and transform X to cluster-distance space.
 
         Equivalent to fit(X).transform(X), but more efficiently implemented.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+            New data to transform.
+
+        Returns
+        -------
+        X_new : array, shape [n_samples, k]
+            X transformed in the new space.
         """
         # Currently, this just skips a copy of the data if it is not in
         # np.array or CSR format already.

From 98686e408a71d1c6c4b2a9497eab97d6175c875a Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 8 Jun 2017 17:56:18 +0200
Subject: [PATCH 12/66] more

---
 sklearn/cluster/hierarchical.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
index 9eca8176d12e8..29d725bd8ce54 100644
--- a/sklearn/cluster/hierarchical.py
+++ b/sklearn/cluster/hierarchical.py
@@ -769,14 +769,6 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
     n_clusters : int, default 2
         The number of clusters to find.
 
-    connectivity : array-like or callable, optional
-        Connectivity matrix. Defines for each feature the neighboring
-        features following a given structure of the data.
-        This can be a connectivity matrix itself or a callable that transforms
-        the data into a connectivity matrix, such as derived from
-        kneighbors_graph. Default is None, i.e, the
-        hierarchical clustering algorithm is unstructured.
-
     affinity : string or callable, default "euclidean"
         Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
         "manhattan", "cosine", or 'precomputed'.
@@ -788,6 +780,14 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         By default, no caching is done. If a string is given, it is the
         path to the caching directory.
 
+    connectivity : array-like or callable, optional
+        Connectivity matrix. Defines for each feature the neighboring
+        features following a given structure of the data.
+        This can be a connectivity matrix itself or a callable that transforms
+        the data into a connectivity matrix, such as derived from
+        kneighbors_graph. Default is None, i.e, the
+        hierarchical clustering algorithm is unstructured.
+
     compute_full_tree : bool or 'auto', optional, default "auto"
         Stop early the construction of the tree at n_clusters. This is
         useful to decrease computation time if the number of clusters is

From 46abcd89fbb81f5a7b917e4a96ea9192db26adb8 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 8 Jun 2017 18:02:07 +0200
Subject: [PATCH 13/66] more

---
 sklearn/covariance/outlier_detection.py          | 6 ++++++
 sklearn/covariance/robust_covariance.py          | 8 ++++----
 sklearn/covariance/shrunk_covariance_.py         | 8 ++++----
 sklearn/utils/tests/test_docstring_parameters.py | 2 +-
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/sklearn/covariance/outlier_detection.py b/sklearn/covariance/outlier_detection.py
index 9fe219ba5d0b6..ca3fae69696a5 100644
--- a/sklearn/covariance/outlier_detection.py
+++ b/sklearn/covariance/outlier_detection.py
@@ -157,6 +157,12 @@ class EllipticEnvelope(OutlierDetectionMixin, MinCovDet):
         The amount of contamination of the data set, i.e. the proportion
         of outliers in the data set.
 
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
     Attributes
     ----------
     location_ : array-like, shape (n_features,)
diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py
index 2b7ebabc83db4..985dda92f990c 100644
--- a/sklearn/covariance/robust_covariance.py
+++ b/sklearn/covariance/robust_covariance.py
@@ -317,16 +317,16 @@ def fast_mcd(X, support_fraction=None,
           value of support_fraction will be used within the algorithm:
           `[n_sample + n_features + 1] / 2`.
 
+    cov_computation_method : callable, default empirical_covariance
+        The function which will be used to compute the covariance.
+        Must return shape (n_features, n_features)
+
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    cov_computation_method : callable, default empirical_covariance
-        The function which will be used to compute the covariance.
-        Must return shape (n_features, n_features)
-
     Notes
     -----
     The FastMCD algorithm has been introduced by Rousseuw and Van Driessen
diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
index 700052df49c6d..a99b0f4111323 100644
--- a/sklearn/covariance/shrunk_covariance_.py
+++ b/sklearn/covariance/shrunk_covariance_.py
@@ -73,16 +73,16 @@ class ShrunkCovariance(EmpiricalCovariance):
     store_precision : boolean, default True
         Specify if the estimated precision is stored
 
-    shrinkage : float, 0 <= shrinkage <= 1, default 0.1
-        Coefficient in the convex combination used for the computation
-        of the shrunk estimate.
-
     assume_centered : boolean, default False
         If True, data are not centered before computation.
         Useful when working with data whose mean is almost, but not exactly
         zero.
         If False, data are centered before computation.
 
+    shrinkage : float, 0 <= shrinkage <= 1, default 0.1
+        Coefficient in the convex combination used for the computation
+        of the shrunk estimate.
+
     Attributes
     ----------
     covariance_ : array-like, shape (n_features, n_features)
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 95131bdf69d1e..e990e1b760327 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -23,7 +23,7 @@
     'sklearn.base',
     'sklearn.calibration',
     'sklearn.cluster',
-    # 'sklearn.covariance',
+    'sklearn.covariance',
     # 'sklearn.cross_decomposition',
     'sklearn.datasets',
     # 'sklearn.decomposition',

From 27936f4dfa15c0bc33be82d53088261fbf8023da Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Fri, 9 Jun 2017 10:18:00 +0200
Subject: [PATCH 14/66] more

---
 sklearn/decomposition/dict_learning.py        | 23 ++++++++++++++-----
 sklearn/decomposition/incremental_pca.py      | 22 ++++++++++--------
 sklearn/decomposition/kernel_pca.py           | 18 +++++++--------
 sklearn/metrics/ranking.py                    |  4 ++--
 .../utils/tests/test_docstring_parameters.py  |  2 +-
 5 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 14ed2cf467309..c601f38b5f9ad 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -924,6 +924,17 @@ def fit(self, X, y=None):
 
         This method is just there to implement the usual API and hence
         work in pipelines.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training vector, where n_samples in the number of samples
+            and n_features is the number of features.
+
+        Returns
+        -------
+        self : object
+            Returns the object itself
         """
         return self
 
@@ -1129,6 +1140,12 @@ class MiniBatchDictionaryLearning(BaseEstimator, SparseCodingMixin):
         Lasso solution (linear_model.Lasso). Lars will be faster if
         the estimated components are sparse.
 
+    batch_size : int,
+        number of samples in each mini-batch
+
+    n_jobs : int,
+        number of parallel jobs to run
+
     transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \
     'threshold'}
         Algorithm used to transform the data.
@@ -1160,18 +1177,12 @@ class MiniBatchDictionaryLearning(BaseEstimator, SparseCodingMixin):
         its negative part and its positive part. This can improve the
         performance of downstream classifiers.
 
-    n_jobs : int,
-        number of parallel jobs to run
-
     dict_init : array of shape (n_components, n_features),
         initial value of the dictionary for warm restart scenarios
 
     verbose :
         degree of verbosity of the printed output
 
-    batch_size : int,
-        number of samples in each mini-batch
-
     shuffle : bool,
         whether to shuffle the samples before forming batches
 
diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
index e1806d1ef7616..1f1ed56137551 100644
--- a/sklearn/decomposition/incremental_pca.py
+++ b/sklearn/decomposition/incremental_pca.py
@@ -40,16 +40,6 @@ class IncrementalPCA(_BasePCA):
         Number of components to keep. If ``n_components `` is ``None``,
         then ``n_components`` is set to ``min(n_samples, n_features)``.
 
-    batch_size : int or None, (default=None)
-        The number of samples to use for each batch. Only used when calling
-        ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
-        is inferred from the data and set to ``5 * n_features``, to provide a
-        balance between approximation accuracy and memory consumption.
-
-    copy : bool, (default=True)
-        If False, X will be overwritten. ``copy=False`` can be used to
-        save memory but is unsafe for general use.
-
     whiten : bool, optional
         When True (False by default) the ``components_`` vectors are divided
         by ``n_samples`` times ``components_`` to ensure uncorrelated outputs
@@ -60,6 +50,16 @@ class IncrementalPCA(_BasePCA):
         improve the predictive accuracy of the downstream estimators by
         making data respect some hard-wired assumptions.
 
+    copy : bool, (default=True)
+        If False, X will be overwritten. ``copy=False`` can be used to
+        save memory but is unsafe for general use.
+
+    batch_size : int or None, (default=None)
+        The number of samples to use for each batch. Only used when calling
+        ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
+        is inferred from the data and set to ``5 * n_features``, to provide a
+        balance between approximation accuracy and memory consumption.
+
     Attributes
     ----------
     components_ : array, shape (n_components, n_features)
@@ -195,6 +195,8 @@ def partial_fit(self, X, y=None, check_input=True):
         X : array-like, shape (n_samples, n_features)
             Training data, where n_samples is the number of samples and
             n_features is the number of features.
+        check_input : bool
+            Run check_array on X.
 
         Returns
         -------
diff --git a/sklearn/decomposition/kernel_pca.py b/sklearn/decomposition/kernel_pca.py
index 385c0dd18996b..a9a728c9dcb97 100644
--- a/sklearn/decomposition/kernel_pca.py
+++ b/sklearn/decomposition/kernel_pca.py
@@ -31,13 +31,13 @@ class KernelPCA(BaseEstimator, TransformerMixin):
     kernel : "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
         Kernel. Default="linear".
 
-    degree : int, default=3
-        Degree for poly kernels. Ignored by other kernels.
-
     gamma : float, default=1/n_features
         Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other
         kernels.
 
+    degree : int, default=3
+        Degree for poly kernels. Ignored by other kernels.
+
     coef0 : float, default=1
         Independent term in poly and sigmoid kernels.
         Ignored by other kernels.
@@ -82,12 +82,6 @@ class KernelPCA(BaseEstimator, TransformerMixin):
 
         .. versionadded:: 0.18
 
-    n_jobs : int, default=1
-        The number of parallel jobs to run.
-        If `-1`, then the number of jobs is set to the number of CPU cores.
-
-        .. versionadded:: 0.18
-
     copy_X : boolean, default=True
         If True, input X is copied and stored by the model in the `X_fit_`
         attribute. If no further changes will be done to X, setting
@@ -95,6 +89,12 @@ class KernelPCA(BaseEstimator, TransformerMixin):
 
         .. versionadded:: 0.18
 
+    n_jobs : int, default=1
+        The number of parallel jobs to run.
+        If `-1`, then the number of jobs is set to the number of CPU cores.
+
+        .. versionadded:: 0.18
+
     Attributes
     ----------
     lambdas_ : array, (n_components,)
diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index ed93e7f66cc84..c2bd9893d1fac 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -816,8 +816,8 @@ def ndcg_score(y_true, y_score, k=5):
     -------
     score : float
 
-    Example
-    -------
+    Examples
+    --------
     >>> y_true = [1, 0, 2]
     >>> y_score = [[0.15, 0.55, 0.2], [0.7, 0.2, 0.1], [0.06, 0.04, 0.9]]
     >>> ndcg_score(y_true, y_score, k=2)
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index e990e1b760327..068466c127c5d 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -26,7 +26,7 @@
     'sklearn.covariance',
     # 'sklearn.cross_decomposition',
     'sklearn.datasets',
-    # 'sklearn.decomposition',
+    'sklearn.decomposition',
     # 'sklearn.ensemble',
     # 'sklearn.feature_extraction',
     # 'sklearn.feature_selection',

From 88c71f3ec2b98948350a0df47adee339860fff87 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Fri, 9 Jun 2017 11:01:11 +0200
Subject: [PATCH 15/66] more

---
 sklearn/decomposition/dict_learning.py        | 40 +++++++++----------
 sklearn/decomposition/nmf.py                  |  3 ++
 sklearn/decomposition/online_lda.py           |  9 +++--
 .../utils/tests/test_docstring_parameters.py  |  6 ++-
 4 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index c601f38b5f9ad..a0eb49e4693d7 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -220,6 +220,10 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
         the reconstruction error targeted. In this case, it overrides
         `n_nonzero_coefs`.
 
+    copy_cov : boolean, optional
+        Whether to copy the precomputed covariance matrix; if False, it may be
+        overwritten.
+
     init : array of shape (n_samples, n_components)
         Initialization value of the sparse codes. Only used if
         `algorithm='lasso_cd'`.
@@ -227,10 +231,6 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
     max_iter : int, 1000 by default
         Maximum number of iterations to perform if `algorithm='lasso_cd'`.
 
-    copy_cov : boolean, optional
-        Whether to copy the precomputed covariance matrix; if False, it may be
-        overwritten.
-
     n_jobs : int, optional
         Number of parallel jobs to run.
 
@@ -1006,11 +1006,6 @@ class DictionaryLearning(BaseEstimator, SparseCodingMixin):
         the reconstruction error targeted. In this case, it overrides
         `n_nonzero_coefs`.
 
-    split_sign : bool, False by default
-        Whether to split the sparse feature vector into the concatenation of
-        its negative part and its positive part. This can improve the
-        performance of downstream classifiers.
-
     n_jobs : int,
         number of parallel jobs to run
 
@@ -1023,6 +1018,11 @@ class DictionaryLearning(BaseEstimator, SparseCodingMixin):
     verbose :
         degree of verbosity of the printed output
 
+    split_sign : bool, False by default
+        Whether to split the sparse feature vector into the concatenation of
+        its negative part and its positive part. This can improve the
+        performance of downstream classifiers.
+
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
@@ -1140,11 +1140,17 @@ class MiniBatchDictionaryLearning(BaseEstimator, SparseCodingMixin):
         Lasso solution (linear_model.Lasso). Lars will be faster if
         the estimated components are sparse.
 
+    n_jobs : int,
+        number of parallel jobs to run
+
     batch_size : int,
         number of samples in each mini-batch
 
-    n_jobs : int,
-        number of parallel jobs to run
+    shuffle : bool,
+        whether to shuffle the samples before forming batches
+
+    dict_init : array of shape (n_components, n_features),
+        initial value of the dictionary for warm restart scenarios
 
     transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \
     'threshold'}
@@ -1172,20 +1178,14 @@ class MiniBatchDictionaryLearning(BaseEstimator, SparseCodingMixin):
         the reconstruction error targeted. In this case, it overrides
         `n_nonzero_coefs`.
 
+    verbose :
+        degree of verbosity of the printed output
+
     split_sign : bool, False by default
         Whether to split the sparse feature vector into the concatenation of
         its negative part and its positive part. This can improve the
         performance of downstream classifiers.
 
-    dict_init : array of shape (n_components, n_features),
-        initial value of the dictionary for warm restart scenarios
-
-    verbose :
-        degree of verbosity of the printed output
-
-    shuffle : bool,
-        whether to shuffle the samples before forming batches
-
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
index 72a52f802accb..9eef6b8b36b4a 100644
--- a/sklearn/decomposition/nmf.py
+++ b/sklearn/decomposition/nmf.py
@@ -1129,6 +1129,9 @@ class NMF(BaseEstimator, TransformerMixin):
            Regularization parameter *l1_ratio* used in the Coordinate Descent
            solver.
 
+    verbose : bool, default=False
+        Whether to be verbose.
+
     shuffle : boolean, default: False
         If true, randomize the order of coordinates in the CD solver.
 
diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
index 68900a3ea0764..01c72682892d6 100644
--- a/sklearn/decomposition/online_lda.py
+++ b/sklearn/decomposition/online_lda.py
@@ -187,9 +187,6 @@ class LatentDirichletAllocation(BaseEstimator, TransformerMixin):
     max_iter : integer, optional (default=10)
         The maximum number of iterations.
 
-    total_samples : int, optional (default=1e6)
-        Total number of documents. Only used in the `partial_fit` method.
-
     batch_size : int, optional (default=128)
         Number of documents to use in each EM iteration. Only used in online
         learning.
@@ -202,6 +199,9 @@ class LatentDirichletAllocation(BaseEstimator, TransformerMixin):
         Evaluating perplexity in every iteration might increase training time
         up to two-fold.
 
+    total_samples : int, optional (default=1e6)
+        Total number of documents. Only used in the `partial_fit` method.
+
     perp_tol : float, optional (default=1e-1)
         Perplexity tolerance in batch learning. Only used when
         ``evaluate_every`` is greater than 0.
@@ -795,6 +795,9 @@ def perplexity(self, X, doc_topic_distr='deprecated', sub_sampling=False):
 
             .. deprecated:: 0.19
 
+        sub_sampling : bool
+            Do sub-sampling or not.
+
         Returns
         -------
         score : float
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 068466c127c5d..4764e98e5d789 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -55,6 +55,7 @@
     'sklearn.pipeline.make_pipeline',
     'sklearn.pipeline.make_union',
     'sklearn.utils.extmath.safe_sparse_dot',
+    'RandomizedPCA',
 ]
 
 _tab_ignores = [
@@ -79,6 +80,8 @@ def test_docstring_parameters():
             module = getattr(module, submod)
         classes = inspect.getmembers(module, inspect.isclass)
         for cname, cls in classes:
+            if cname in _docstring_ignores:
+                continue
             if cname.startswith('_') and cname not in _doc_special_members:
                 continue
             with warnings.catch_warnings(record=True) as w:
@@ -88,6 +91,7 @@ def test_docstring_parameters():
                                    % (cls, name, w[0]))
             if hasattr(cls, '__init__'):
                 incorrect += check_parameters_match(cls.__init__, cdoc)
+
             for method_name in cdoc.methods:
                 method = getattr(cls, method_name)
                 param_ignore = None
@@ -95,7 +99,7 @@ def test_docstring_parameters():
                 # by default for API reason
                 if method_name in \
                         ['fit', 'score', 'fit_predict', 'fit_transform',
-                         'partial_fit']:
+                         'partial_fit', 'transform', 'inverse_transform']:  # XXX remove transform
                     sig = signature(method)
                     if ('y' in sig.parameters and
                             sig.parameters['y'].default is None):

From c0a0630d53587f32ec06d308d28584aa5ac03951 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Fri, 9 Jun 2017 12:02:23 +0200
Subject: [PATCH 16/66] more

---
 sklearn/ensemble/forest.py                    |  14 ++
 sklearn/ensemble/iforest.py                   |   3 +
 sklearn/feature_extraction/hashing.py         |  12 +-
 .../neural_network/multilayer_perceptron.py   |  52 +++---
 sklearn/semi_supervised/label_propagation.py  |  10 +-
 sklearn/tree/tree.py                          | 153 +++++++++---------
 .../utils/tests/test_docstring_parameters.py  |  25 ++-
 7 files changed, 153 insertions(+), 116 deletions(-)

diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 60732bf83a446..51792383eb0cb 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -1840,6 +1840,13 @@ def fit(self, X, y=None, sample_weight=None):
             efficiency. Sparse matrices are also supported, use sparse
             ``csc_matrix`` for maximum efficiency.
 
+        sample_weight : array-like, shape = [n_samples] or None
+            Sample weights. If None, then samples are equally weighted. Splits
+            that would create child nodes with net zero or negative weight are
+            ignored while searching for a split in each node. In the case of
+            classification, splits are also ignored if they would result in any
+            single class carrying a negative weight in either child node.
+
         Returns
         -------
         self : object
@@ -1858,6 +1865,13 @@ def fit_transform(self, X, y=None, sample_weight=None):
             Input data used to build forests. Use ``dtype=np.float32`` for
             maximum efficiency.
 
+        sample_weight : array-like, shape = [n_samples] or None
+            Sample weights. If None, then samples are equally weighted. Splits
+            that would create child nodes with net zero or negative weight are
+            ignored while searching for a split in each node. In the case of
+            classification, splits are also ignored if they would result in any
+            single class carrying a negative weight in either child node.
+
         Returns
         -------
         X_transformed : sparse matrix, shape=(n_samples, n_out)
diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py
index ce379243e21d0..27c674871fb14 100644
--- a/sklearn/ensemble/iforest.py
+++ b/sklearn/ensemble/iforest.py
@@ -150,6 +150,9 @@ def fit(self, X, y=None, sample_weight=None):
             efficiency. Sparse matrices are also supported, use sparse
             ``csc_matrix`` for maximum efficiency.
 
+        sample_weight : array-like, shape = [n_samples] or None
+            Sample weights. If None, then samples are equally weighted.
+
         Returns
         -------
         self : object
diff --git a/sklearn/feature_extraction/hashing.py b/sklearn/feature_extraction/hashing.py
index 6cbf1dde0afc6..7f6c8ec27bb0b 100644
--- a/sklearn/feature_extraction/hashing.py
+++ b/sklearn/feature_extraction/hashing.py
@@ -41,10 +41,6 @@ class FeatureHasher(BaseEstimator, TransformerMixin):
         The number of features (columns) in the output matrices. Small numbers
         of features are likely to cause hash collisions, but large numbers
         will cause larger coefficient dimensions in linear learners.
-    dtype : numpy type, optional, default np.float64
-        The type of feature values. Passed to scipy.sparse matrix constructors
-        as the dtype argument. Do not set this to bool, np.boolean or any
-        unsigned integer type.
     input_type : string, optional, default "dict"
         Either "dict" (the default) to accept dictionaries over
         (feature_name, value); "pair" to accept pairs of (feature_name, value);
@@ -54,6 +50,10 @@ class FeatureHasher(BaseEstimator, TransformerMixin):
         The feature_name is hashed to find the appropriate column for the
         feature. The value's sign might be flipped in the output (but see
         non_negative, below).
+    dtype : numpy type, optional, default np.float64
+        The type of feature values. Passed to scipy.sparse matrix constructors
+        as the dtype argument. Do not set this to bool, np.boolean or any
+        unsigned integer type.
     alternate_sign : boolean, optional, default True
         When True, an alternating sign is added to the features as to
         approximately conserve the inner product in the hashed space even for
@@ -117,6 +117,10 @@ def fit(self, X=None, y=None):
         This method doesn't do anything. It exists purely for compatibility
         with the scikit-learn transformer API.
 
+        Parameters
+        ----------
+        X : array-like
+
         Returns
         -------
         self : FeatureHasher
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index 601acf885685a..a58ea93507748 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -751,35 +751,35 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
 
         Only used when ``solver='sgd'``.
 
+    learning_rate_init : double, optional, default 0.001
+        The initial learning rate used. It controls the step-size
+        in updating the weights. Only used when solver='sgd' or 'adam'.
+
+    power_t : double, optional, default 0.5
+        The exponent for inverse scaling learning rate.
+        It is used in updating effective learning rate when the learning_rate
+        is set to 'invscaling'. Only used when solver='sgd'.
+
     max_iter : int, optional, default 200
         Maximum number of iterations. The solver iterates until convergence
         (determined by 'tol') or this number of iterations.
 
+    shuffle : bool, optional, default True
+        Whether to shuffle samples in each iteration. Only used when
+        solver='sgd' or 'adam'.
+
     random_state : int, RandomState instance or None, optional, default None
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    shuffle : bool, optional, default True
-        Whether to shuffle samples in each iteration. Only used when
-        solver='sgd' or 'adam'.
-
     tol : float, optional, default 1e-4
         Tolerance for the optimization. When the loss or score is not improving
         by at least tol for two consecutive iterations, unless `learning_rate`
         is set to 'adaptive', convergence is considered to be reached and
         training stops.
 
-    learning_rate_init : double, optional, default 0.001
-        The initial learning rate used. It controls the step-size
-        in updating the weights. Only used when solver='sgd' or 'adam'.
-
-    power_t : double, optional, default 0.5
-        The exponent for inverse scaling learning rate.
-        It is used in updating effective learning rate when the learning_rate
-        is set to 'invscaling'. Only used when solver='sgd'.
-
     verbose : bool, optional, default False
         Whether to print progress messages to stdout.
 
@@ -1125,35 +1125,35 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
 
         Only used when solver='sgd'.
 
+    learning_rate_init : double, optional, default 0.001
+        The initial learning rate used. It controls the step-size
+        in updating the weights. Only used when solver='sgd' or 'adam'.
+
+    power_t : double, optional, default 0.5
+        The exponent for inverse scaling learning rate.
+        It is used in updating effective learning rate when the learning_rate
+        is set to 'invscaling'. Only used when solver='sgd'.
+
     max_iter : int, optional, default 200
         Maximum number of iterations. The solver iterates until convergence
         (determined by 'tol') or this number of iterations.
 
+    shuffle : bool, optional, default True
+        Whether to shuffle samples in each iteration. Only used when
+        solver='sgd' or 'adam'.
+
     random_state : int, RandomState instance or None, optional, default None
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    shuffle : bool, optional, default True
-        Whether to shuffle samples in each iteration. Only used when
-        solver='sgd' or 'adam'.
-
     tol : float, optional, default 1e-4
         Tolerance for the optimization. When the loss or score is not improving
         by at least tol for two consecutive iterations, unless `learning_rate`
         is set to 'adaptive', convergence is considered to be reached and
         training stops.
 
-    learning_rate_init : double, optional, default 0.001
-        The initial learning rate used. It controls the step-size
-        in updating the weights. Only used when solver='sgd' or 'adam'.
-
-    power_t : double, optional, default 0.5
-        The exponent for inverse scaling learning rate.
-        It is used in updating effective learning rate when the learning_rate
-        is set to 'invscaling'. Only used when solver='sgd'.
-
     verbose : bool, optional, default False
         Whether to print progress messages to stdout.
 
diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py
index 1d1b1f9b03496..e04b1f08fbd0c 100644
--- a/sklearn/semi_supervised/label_propagation.py
+++ b/sklearn/semi_supervised/label_propagation.py
@@ -90,6 +90,9 @@ class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator,
     gamma : float
         Parameter for rbf kernel
 
+    n_neighbors : integer > 0
+        Parameter for knn kernel
+
     alpha : float
         Clamping factor
 
@@ -100,9 +103,6 @@ class BaseLabelPropagation(six.with_metaclass(ABCMeta, BaseEstimator,
         Convergence tolerance: threshold to consider the system at steady
         state
 
-    n_neighbors : integer > 0
-        Parameter for knn kernel
-
     n_jobs : int, optional (default = 1)
         The number of parallel jobs to run.
         If ``-1``, then the number of jobs is set to the number of CPU cores.
@@ -309,6 +309,10 @@ class LabelPropagation(BaseLabelPropagation):
         Convergence tolerance: threshold to consider the system at steady
         state
 
+    n_jobs : int, optional (default = 1)
+        The number of parallel jobs to run.
+        If ``-1``, then the number of jobs is set to the number of CPU cores.
+
     Attributes
     ----------
     X_ : array, shape = [n_samples, n_features]
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index bab9c00dc94b3..807b1f2a44e28 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -528,22 +528,6 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
         strategies are "best" to choose the best split and "random" to choose
         the best random split.
 
-    max_features : int, float, string or None, optional (default=None)
-        The number of features to consider when looking for the best split:
-
-            - If int, then consider `max_features` features at each split.
-            - If float, then `max_features` is a percentage and
-              `int(max_features * n_features)` features are considered at each
-              split.
-            - If "auto", then `max_features=sqrt(n_features)`.
-            - If "sqrt", then `max_features=sqrt(n_features)`.
-            - If "log2", then `max_features=log2(n_features)`.
-            - If None, then `max_features=n_features`.
-
-        Note: the search for a split does not stop until at least one
-        valid partition of the node samples is found, even if it requires to
-        effectively inspect more than ``max_features`` features.
-
     max_depth : int or None, optional (default=None)
         The maximum depth of the tree. If None, then nodes are expanded until
         all leaves are pure or until all leaves contain less than
@@ -576,31 +560,21 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-    max_leaf_nodes : int or None, optional (default=None)
-        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
-        Best nodes are defined as relative reduction in impurity.
-        If None then unlimited number of leaf nodes.
-
-    class_weight : dict, list of dicts, "balanced" or None, optional (default=None)
-        Weights associated with classes in the form ``{class_label: weight}``.
-        If not given, all classes are supposed to have weight one. For
-        multi-output problems, a list of dicts can be provided in the same
-        order as the columns of y.
-
-        Note that for multioutput (including multilabel) weights should be
-        defined for each class of every column in its own dict. For example,
-        for four-class multilabel classification weights should be
-        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
-        [{1:1}, {2:5}, {3:1}, {4:1}].
-
-        The "balanced" mode uses the values of y to automatically adjust
-        weights inversely proportional to class frequencies in the input data
-        as ``n_samples / (n_classes * np.bincount(y))``
+    max_features : int, float, string or None, optional (default=None)
+        The number of features to consider when looking for the best split:
 
-        For multi-output, the weights of each column of y will be multiplied.
+            - If int, then consider `max_features` features at each split.
+            - If float, then `max_features` is a percentage and
+              `int(max_features * n_features)` features are considered at each
+              split.
+            - If "auto", then `max_features=sqrt(n_features)`.
+            - If "sqrt", then `max_features=sqrt(n_features)`.
+            - If "log2", then `max_features=log2(n_features)`.
+            - If None, then `max_features=n_features`.
 
-        Note that these weights will be multiplied with sample_weight (passed
-        through the fit method) if sample_weight is specified.
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
 
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
@@ -608,14 +582,10 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    min_impurity_split : float,
-        Threshold for early stopping in tree growth. A node will split
-        if its impurity is above the threshold, otherwise it is a leaf.
-
-        .. deprecated:: 0.19
-           ``min_impurity_split`` has been deprecated in favor of
-           ``min_impurity_decrease`` in 0.19 and will be removed in 0.21.
-           Use ``min_impurity_decrease`` instead.
+    max_leaf_nodes : int or None, optional (default=None)
+        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
 
     min_impurity_decrease : float, optional (default=0.)
         A node will be split if this split induces a decrease of the impurity
@@ -635,6 +605,36 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
 
         .. versionadded:: 0.19
 
+    min_impurity_split : float,
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
+
+        .. deprecated:: 0.19
+           ``min_impurity_split`` has been deprecated in favor of
+           ``min_impurity_decrease`` in 0.19 and will be removed in 0.21.
+           Use ``min_impurity_decrease`` instead.
+
+    class_weight : dict, list of dicts, "balanced" or None, optional (default=None)
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        Note that for multioutput (including multilabel) weights should be
+        defined for each class of every column in its own dict. For example,
+        for four-class multilabel classification weights should be
+        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
+        [{1:1}, {2:5}, {3:1}, {4:1}].
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+
     presort : bool, optional (default=False)
         Whether to presort the data to speed up the finding of best splits in
         fitting. For the default settings of a decision tree on large
@@ -807,6 +807,9 @@ class in a leaf.
             ``dtype=np.float32`` and if a sparse matrix is provided
             to a sparse ``csr_matrix``.
 
+        check_input : bool
+            Run check_array on X.
+
         Returns
         -------
         p : array of shape = [n_samples, n_classes], or a list of n_outputs
@@ -888,22 +891,6 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
         strategies are "best" to choose the best split and "random" to choose
         the best random split.
 
-    max_features : int, float, string or None, optional (default=None)
-        The number of features to consider when looking for the best split:
-
-        - If int, then consider `max_features` features at each split.
-        - If float, then `max_features` is a percentage and
-          `int(max_features * n_features)` features are considered at each
-          split.
-        - If "auto", then `max_features=n_features`.
-        - If "sqrt", then `max_features=sqrt(n_features)`.
-        - If "log2", then `max_features=log2(n_features)`.
-        - If None, then `max_features=n_features`.
-
-        Note: the search for a split does not stop until at least one
-        valid partition of the node samples is found, even if it requires to
-        effectively inspect more than ``max_features`` features.
-
     max_depth : int or None, optional (default=None)
         The maximum depth of the tree. If None, then nodes are expanded until
         all leaves are pure or until all leaves contain less than
@@ -936,10 +923,21 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-    max_leaf_nodes : int or None, optional (default=None)
-        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
-        Best nodes are defined as relative reduction in impurity.
-        If None then unlimited number of leaf nodes.
+    max_features : int, float, string or None, optional (default=None)
+        The number of features to consider when looking for the best split:
+
+        - If int, then consider `max_features` features at each split.
+        - If float, then `max_features` is a percentage and
+          `int(max_features * n_features)` features are considered at each
+          split.
+        - If "auto", then `max_features=n_features`.
+        - If "sqrt", then `max_features=sqrt(n_features)`.
+        - If "log2", then `max_features=log2(n_features)`.
+        - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
 
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
@@ -947,14 +945,10 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    min_impurity_split : float,
-        Threshold for early stopping in tree growth. A node will split
-        if its impurity is above the threshold, otherwise it is a leaf.
-
-        .. deprecated:: 0.19
-           ``min_impurity_split`` has been deprecated in favor of
-           ``min_impurity_decrease`` in 0.19 and will be removed in 0.21.
-           Use ``min_impurity_decrease`` instead.
+    max_leaf_nodes : int or None, optional (default=None)
+        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
 
     min_impurity_decrease : float, optional (default=0.)
         A node will be split if this split induces a decrease of the impurity
@@ -974,6 +968,15 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
 
         .. versionadded:: 0.19
 
+    min_impurity_split : float,
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
+
+        .. deprecated:: 0.19
+           ``min_impurity_split`` has been deprecated in favor of
+           ``min_impurity_decrease`` in 0.19 and will be removed in 0.21.
+           Use ``min_impurity_decrease`` instead.
+
     presort : bool, optional (default=False)
         Whether to presort the data to speed up the finding of best splits in
         fitting. For the default settings of a decision tree on large
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 4764e98e5d789..f204d291507ac 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -28,7 +28,7 @@
     'sklearn.datasets',
     'sklearn.decomposition',
     # 'sklearn.ensemble',
-    # 'sklearn.feature_extraction',
+    'sklearn.feature_extraction',
     # 'sklearn.feature_selection',
     # 'sklearn.gaussian_process',
     'sklearn.isotonic',
@@ -40,11 +40,11 @@
     # 'sklearn.mixture',
     # 'sklearn.model_selection',
     # 'sklearn.neighbors',
-    # 'sklearn.neural_network',
+    'sklearn.neural_network',
     # 'sklearn.preprocessing',
     'sklearn.pipeline',
-    # 'sklearn.semi_supervised',
-    # 'sklearn.tree',
+    'sklearn.semi_supervised',
+    'sklearn.tree',
     # 'sklearn.utils',
 ]
 
@@ -56,6 +56,10 @@
     'sklearn.pipeline.make_union',
     'sklearn.utils.extmath.safe_sparse_dot',
     'RandomizedPCA',
+    'BaseForest',
+    'BaseDecisionTree',
+    'ExtraTreeClassifier',
+    'ExtraTreeRegressor',
 ]
 
 _tab_ignores = [
@@ -80,6 +84,7 @@ def test_docstring_parameters():
             module = getattr(module, submod)
         classes = inspect.getmembers(module, inspect.isclass)
         for cname, cls in classes:
+            this_incorrect = []
             if cname in _docstring_ignores:
                 continue
             if cname.startswith('_') and cname not in _doc_special_members:
@@ -90,7 +95,7 @@ def test_docstring_parameters():
                 raise RuntimeError('Error for __init__ of %s in %s:\n%s'
                                    % (cls, name, w[0]))
             if hasattr(cls, '__init__'):
-                incorrect += check_parameters_match(cls.__init__, cdoc)
+                this_incorrect += check_parameters_match(cls.__init__, cdoc)
 
             for method_name in cdoc.methods:
                 method = getattr(cls, method_name)
@@ -104,10 +109,14 @@ def test_docstring_parameters():
                     if ('y' in sig.parameters and
                             sig.parameters['y'].default is None):
                         param_ignore = ['y']  # ignore y for fit and score
-                incorrect += check_parameters_match(method,
-                                                    ignore=param_ignore)
+                this_incorrect += check_parameters_match(method,
+                                                         ignore=param_ignore)
             if hasattr(cls, '__call__'):
-                incorrect += check_parameters_match(cls.__call__)
+                this_incorrect += check_parameters_match(cls.__call__)
+
+            # Append class name
+            incorrect += [c + ' (' + cname + ')' for c in this_incorrect]
+
         functions = inspect.getmembers(module, inspect.isfunction)
         for fname, func in functions:
             if fname.startswith('_'):

From d48d425e6473bc934a2ab73994bcea2e9c2336eb Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Fri, 9 Jun 2017 12:03:47 +0200
Subject: [PATCH 17/66] more

---
 sklearn/utils/tests/test_docstring_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index f204d291507ac..d6b6f02f7aabf 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -30,7 +30,7 @@
     # 'sklearn.ensemble',
     'sklearn.feature_extraction',
     # 'sklearn.feature_selection',
-    # 'sklearn.gaussian_process',
+    'sklearn.gaussian_process',
     'sklearn.isotonic',
     # 'sklearn.linear_model',
     # 'sklearn.manifold',

From cd36d2d01cb3e216570885cc84661b8659f21980 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Fri, 9 Jun 2017 12:10:11 +0200
Subject: [PATCH 18/66] more

---
 sklearn/neighbors/approximate.py              | 12 +++++------
 sklearn/neighbors/classification.py           | 20 +++++++++----------
 sklearn/neighbors/graph.py                    | 20 +++++++++----------
 sklearn/neighbors/lof.py                      | 12 +++++------
 sklearn/neighbors/regression.py               | 20 +++++++++----------
 sklearn/neighbors/unsupervised.py             | 12 +++++------
 .../utils/tests/test_docstring_parameters.py  |  5 +++--
 7 files changed, 51 insertions(+), 50 deletions(-)

diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py
index c19cb408d643d..70ba18d035144 100644
--- a/sklearn/neighbors/approximate.py
+++ b/sklearn/neighbors/approximate.py
@@ -130,9 +130,9 @@ class LSHForest(BaseEstimator, KNeighborsMixin, RadiusNeighborsMixin):
     n_estimators : int (default = 10)
         Number of trees in the LSH Forest.
 
-    min_hash_match : int (default = 4)
-        lowest hash length to be searched when candidate selection is
-        performed for nearest neighbors.
+    radius : float, optinal (default = 1.0)
+        Radius from the data point to its neighbors. This is the parameter
+        space to use by default for the :meth`radius_neighbors` queries.
 
     n_candidates : int (default = 10)
         Minimum number of candidates evaluated per estimator, assuming enough
@@ -142,9 +142,9 @@ class LSHForest(BaseEstimator, KNeighborsMixin, RadiusNeighborsMixin):
         Number of neighbors to be returned from query function when
         it is not provided to the :meth:`kneighbors` method.
 
-    radius : float, optinal (default = 1.0)
-        Radius from the data point to its neighbors. This is the parameter
-        space to use by default for the :meth`radius_neighbors` queries.
+    min_hash_match : int (default = 4)
+        lowest hash length to be searched when candidate selection is
+        performed for nearest neighbors.
 
     radius_cutoff_ratio : float, optional (default = 0.9)
         A value ranges from 0 to 1. Radius neighbors will be searched until
diff --git a/sklearn/neighbors/classification.py b/sklearn/neighbors/classification.py
index ec14e4cab00ec..d029045da3ac5 100644
--- a/sklearn/neighbors/classification.py
+++ b/sklearn/neighbors/classification.py
@@ -61,17 +61,17 @@ class KNeighborsClassifier(NeighborsBase, KNeighborsMixin,
         required to store the tree.  The optimal value depends on the
         nature of the problem.
 
+    p : integer, optional (default = 2)
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
     metric : string or callable, default 'minkowski'
         the distance metric to use for the tree.  The default metric is
         minkowski, and with p=2 is equivalent to the standard Euclidean
         metric. See the documentation of the DistanceMetric class for a
         list of available metrics.
 
-    p : integer, optional (default = 2)
-        Power parameter for the Minkowski metric. When p = 1, this is
-        equivalent to using manhattan_distance (l1), and euclidean_distance
-        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
-
     metric_params : dict, optional (default = None)
         Additional keyword arguments for the metric function.
 
@@ -268,17 +268,17 @@ class RadiusNeighborsClassifier(NeighborsBase, RadiusNeighborsMixin,
         required to store the tree.  The optimal value depends on the
         nature of the problem.
 
+    p : integer, optional (default = 2)
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
     metric : string or callable, default 'minkowski'
         the distance metric to use for the tree.  The default metric is
         minkowski, and with p=2 is equivalent to the standard Euclidean
         metric. See the documentation of the DistanceMetric class for a
         list of available metrics.
 
-    p : integer, optional (default = 2)
-        Power parameter for the Minkowski metric. When p = 1, this is
-        equivalent to using manhattan_distance (l1), and euclidean_distance
-        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
-
     outlier_label : int, optional (default = None)
         Label, which is given for outlier samples (samples with no
         neighbors on given radius).
diff --git a/sklearn/neighbors/graph.py b/sklearn/neighbors/graph.py
index f04596584f2bf..61a4561430cad 100644
--- a/sklearn/neighbors/graph.py
+++ b/sklearn/neighbors/graph.py
@@ -57,11 +57,6 @@ def kneighbors_graph(X, n_neighbors, mode='connectivity', metric='minkowski',
         The default distance is 'euclidean' ('minkowski' metric with the p
         param equal to 2.)
 
-    include_self : bool, default=False.
-        Whether or not to mark each sample as the first nearest neighbor to
-        itself. If `None`, then True is used for mode='connectivity' and False
-        for mode='distance' as this will preserve backwards compatibilty.
-
     p : int, default 2
         Power parameter for the Minkowski metric. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
@@ -70,6 +65,11 @@ def kneighbors_graph(X, n_neighbors, mode='connectivity', metric='minkowski',
     metric_params : dict, optional
         additional keyword arguments for the metric function.
 
+    include_self : bool, default=False.
+        Whether or not to mark each sample as the first nearest neighbor to
+        itself. If `None`, then True is used for mode='connectivity' and False
+        for mode='distance' as this will preserve backwards compatibilty.
+
     n_jobs : int, optional (default = 1)
         The number of parallel jobs to run for neighbors search.
         If ``-1``, then the number of jobs is set to the number of CPU cores.
@@ -132,11 +132,6 @@ def radius_neighbors_graph(X, radius, mode='connectivity', metric='minkowski',
         gives a list of available metrics. The default distance is
         'euclidean' ('minkowski' metric with the param equal to 2.)
 
-    include_self : bool, default=False
-        Whether or not to mark each sample as the first nearest neighbor to
-        itself. If `None`, then True is used for mode='connectivity' and False
-        for mode='distance' as this will preserve backwards compatibilty.
-
     p : int, default 2
         Power parameter for the Minkowski metric. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
@@ -145,6 +140,11 @@ def radius_neighbors_graph(X, radius, mode='connectivity', metric='minkowski',
     metric_params : dict, optional
         additional keyword arguments for the metric function.
 
+    include_self : bool, default=False
+        Whether or not to mark each sample as the first nearest neighbor to
+        itself. If `None`, then True is used for mode='connectivity' and False
+        for mode='distance' as this will preserve backwards compatibilty.
+
     n_jobs : int, optional (default = 1)
         The number of parallel jobs to run for neighbors search.
         If ``-1``, then the number of jobs is set to the number of CPU cores.
diff --git a/sklearn/neighbors/lof.py b/sklearn/neighbors/lof.py
index c8595645e9b07..c1205eadc51fe 100644
--- a/sklearn/neighbors/lof.py
+++ b/sklearn/neighbors/lof.py
@@ -55,12 +55,6 @@ class LocalOutlierFactor(NeighborsBase, KNeighborsMixin, UnsupervisedMixin):
         required to store the tree. The optimal value depends on the
         nature of the problem.
 
-    p : integer, optional (default=2)
-        Parameter for the Minkowski metric from
-        :ref:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this is
-        equivalent to using manhattan_distance (l1), and euclidean_distance
-        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
-
     metric : string or callable, default 'minkowski'
         metric used for the distance computation. Any metric from scikit-learn
         or scipy.spatial.distance can be used.
@@ -89,6 +83,12 @@ class LocalOutlierFactor(NeighborsBase, KNeighborsMixin, UnsupervisedMixin):
         metrics:
         http://docs.scipy.org/doc/scipy/reference/spatial.distance.html
 
+    p : integer, optional (default=2)
+        Parameter for the Minkowski metric from
+        :ref:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
     metric_params : dict, optional (default=None)
         Additional keyword arguments for the metric function.
 
diff --git a/sklearn/neighbors/regression.py b/sklearn/neighbors/regression.py
index c798806c24b02..2242e2fb504d9 100644
--- a/sklearn/neighbors/regression.py
+++ b/sklearn/neighbors/regression.py
@@ -63,17 +63,17 @@ class KNeighborsRegressor(NeighborsBase, KNeighborsMixin,
         required to store the tree.  The optimal value depends on the
         nature of the problem.
 
+    p : integer, optional (default = 2)
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
     metric : string or callable, default 'minkowski'
         the distance metric to use for the tree.  The default metric is
         minkowski, and with p=2 is equivalent to the standard Euclidean
         metric. See the documentation of the DistanceMetric class for a
         list of available metrics.
 
-    p : integer, optional (default = 2)
-        Power parameter for the Minkowski metric. When p = 1, this is
-        equivalent to using manhattan_distance (l1), and euclidean_distance
-        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
-
     metric_params : dict, optional (default = None)
         Additional keyword arguments for the metric function.
 
@@ -213,17 +213,17 @@ class RadiusNeighborsRegressor(NeighborsBase, RadiusNeighborsMixin,
         required to store the tree.  The optimal value depends on the
         nature of the problem.
 
+    p : integer, optional (default = 2)
+        Power parameter for the Minkowski metric. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
     metric : string or callable, default 'minkowski'
         the distance metric to use for the tree.  The default metric is
         minkowski, and with p=2 is equivalent to the standard Euclidean
         metric. See the documentation of the DistanceMetric class for a
         list of available metrics.
 
-    p : integer, optional (default = 2)
-        Power parameter for the Minkowski metric. When p = 1, this is
-        equivalent to using manhattan_distance (l1), and euclidean_distance
-        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
-
     metric_params : dict, optional (default = None)
         Additional keyword arguments for the metric function.
 
diff --git a/sklearn/neighbors/unsupervised.py b/sklearn/neighbors/unsupervised.py
index 770f8f64c0270..95fc1fc1f6f3b 100644
--- a/sklearn/neighbors/unsupervised.py
+++ b/sklearn/neighbors/unsupervised.py
@@ -39,12 +39,6 @@ class NearestNeighbors(NeighborsBase, KNeighborsMixin,
         required to store the tree.  The optimal value depends on the
         nature of the problem.
 
-    p : integer, optional (default = 2)
-        Parameter for the Minkowski metric from
-        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
-        equivalent to using manhattan_distance (l1), and euclidean_distance
-        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
-
     metric : string or callable, default 'minkowski'
         metric to use for distance computation. Any metric from scikit-learn
         or scipy.spatial.distance can be used.
@@ -71,6 +65,12 @@ class NearestNeighbors(NeighborsBase, KNeighborsMixin,
         See the documentation for scipy.spatial.distance for details on these
         metrics.
 
+    p : integer, optional (default = 2)
+        Parameter for the Minkowski metric from
+        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
     metric_params : dict, optional (default = None)
         Additional keyword arguments for the metric function.
 
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index d6b6f02f7aabf..edb668be51c32 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -39,7 +39,7 @@
     'sklearn.naive_bayes',
     # 'sklearn.mixture',
     # 'sklearn.model_selection',
-    # 'sklearn.neighbors',
+    'sklearn.neighbors',
     'sklearn.neural_network',
     # 'sklearn.preprocessing',
     'sklearn.pipeline',
@@ -55,11 +55,12 @@
     'sklearn.pipeline.make_pipeline',
     'sklearn.pipeline.make_union',
     'sklearn.utils.extmath.safe_sparse_dot',
-    'RandomizedPCA',
+    'RandomizedPCA',  # deprecated
     'BaseForest',
     'BaseDecisionTree',
     'ExtraTreeClassifier',
     'ExtraTreeRegressor',
+    'GaussianProcess',  # deprecated
 ]
 
 _tab_ignores = [

From 4183f6a058dc301314da1ecf88528587e2415b20 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Fri, 9 Jun 2017 12:17:25 +0200
Subject: [PATCH 19/66] more

---
 sklearn/manifold/spectral_embedding_.py       | 32 +++++++++----------
 .../utils/tests/test_docstring_parameters.py  |  9 ++++--
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
index 5d10223fddd12..be2ac1ebcdede 100644
--- a/sklearn/manifold/spectral_embedding_.py
+++ b/sklearn/manifold/spectral_embedding_.py
@@ -179,15 +179,15 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
         Stopping criterion for eigendecomposition of the Laplacian matrix
         when using arpack eigen_solver.
 
+    norm_laplacian : bool, optional, default=True
+        If True, then compute normalized Laplacian.
+
     drop_first : bool, optional, default=True
         Whether to drop the first eigenvector. For spectral embedding, this
         should be True as the first eigenvector should be constant vector for
         connected graph, but for spectral clustering, this should be kept as
         False to retain the first eigenvector.
 
-    norm_laplacian : bool, optional, default=True
-        If True, then compute normalized Laplacian.
-
     Returns
     -------
     embedding : array, shape=(n_samples, n_components)
@@ -344,19 +344,6 @@ class SpectralEmbedding(BaseEstimator):
     n_components : integer, default: 2
         The dimension of the projected subspace.
 
-    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
-        The eigenvalue decomposition strategy to use. AMG requires pyamg
-        to be installed. It can be faster on very large, sparse problems,
-        but may also lead to instabilities.
-
-    random_state : int, RandomState instance or None, optional, default: None
-        A pseudo random number generator used for the initialization of the
-        lobpcg eigenvectors.  If int, random_state is the seed used by the
-        random number generator; If RandomState instance, random_state is the
-        random number generator; If None, the random number generator is the
-        RandomState instance used by `np.random`. Used when ``solver`` ==
-        'amg'.
-
     affinity : string or callable, default : "nearest_neighbors"
         How to construct the affinity matrix.
          - 'nearest_neighbors' : construct affinity matrix by knn graph
@@ -369,6 +356,19 @@ class SpectralEmbedding(BaseEstimator):
     gamma : float, optional, default : 1/n_features
         Kernel coefficient for rbf kernel.
 
+    random_state : int, RandomState instance or None, optional, default: None
+        A pseudo random number generator used for the initialization of the
+        lobpcg eigenvectors.  If int, random_state is the seed used by the
+        random number generator; If RandomState instance, random_state is the
+        random number generator; If None, the random number generator is the
+        RandomState instance used by `np.random`. Used when ``solver`` ==
+        'amg'.
+
+    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
+        The eigenvalue decomposition strategy to use. AMG requires pyamg
+        to be installed. It can be faster on very large, sparse problems,
+        but may also lead to instabilities.
+
     n_neighbors : int, default : max(n_samples/10 , 1)
         Number of nearest neighbors for nearest_neighbors graph building.
 
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index edb668be51c32..0f5cebd64cd6b 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -33,11 +33,11 @@
     'sklearn.gaussian_process',
     'sklearn.isotonic',
     # 'sklearn.linear_model',
-    # 'sklearn.manifold',
+    'sklearn.manifold',
     'sklearn.multiclass',
     'sklearn.metrics',
     'sklearn.naive_bayes',
-    # 'sklearn.mixture',
+    'sklearn.mixture',
     # 'sklearn.model_selection',
     'sklearn.neighbors',
     'sklearn.neural_network',
@@ -61,6 +61,11 @@
     'ExtraTreeClassifier',
     'ExtraTreeRegressor',
     'GaussianProcess',  # deprecated
+    'VBGMM',  # deprecated
+    'DPGMM',  # deprecated
+    'GMM',  # deprecated
+    'log_multivariate_normal_density',  # deprecated
+    'sample_gaussian',  # deprecated
 ]
 
 _tab_ignores = [

From a15459d757e01c404973db57ba8c4b4f4447c732 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Sat, 10 Jun 2017 18:04:21 +0200
Subject: [PATCH 20/66] more

---
 sklearn/linear_model/coordinate_descent.py    | 210 +++++++++---------
 sklearn/linear_model/least_angle.py           | 159 ++++++-------
 sklearn/linear_model/logistic.py              |  76 +++----
 sklearn/linear_model/omp.py                   |   8 +-
 sklearn/linear_model/passive_aggressive.py    |  44 ++--
 sklearn/linear_model/perceptron.py            |  18 +-
 sklearn/linear_model/randomized_l1.py         |  22 +-
 sklearn/linear_model/ridge.py                 |  68 +++---
 sklearn/linear_model/stochastic_gradient.py   |  28 +--
 .../utils/tests/test_docstring_parameters.py  |   5 +-
 10 files changed, 322 insertions(+), 316 deletions(-)

diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 0b950b26a6240..8ce4e5e754af4 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -335,9 +335,6 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
     verbose : bool or integer
         Amount of verbosity.
 
-    params : kwargs
-        keyword arguments passed to the coordinate descent solver.
-
     return_n_iter : bool
         whether to return the number of iterations or not.
 
@@ -349,6 +346,9 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
         Skip input validation checks, including the Gram matrix when provided
         assuming there are handled by the caller when check_input=False.
 
+    params : kwargs
+        keyword arguments passed to the coordinate descent solver.
+
     Returns
     -------
     alphas : array, shape (n_alphas,)
@@ -582,12 +582,6 @@ class ElasticNet(LinearModel, RegressorMixin):
     positive : bool, optional
         When set to ``True``, forces the coefficients to be positive.
 
-    selection : str, default 'cyclic'
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default. This
-        (setting to 'random') often leads to significantly faster convergence
-        especially when tol is higher than 1e-4.
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator that selects a random
         feature to update.  If int, random_state is the seed used by the random
@@ -596,6 +590,12 @@ class ElasticNet(LinearModel, RegressorMixin):
         RandomState instance used by `np.random`. Used when ``selection`` ==
         'random'.
 
+    selection : str, default 'cyclic'
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially by default. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4.
+
     Attributes
     ----------
     coef_ : array, shape (n_features,) | (n_targets, n_features)
@@ -807,15 +807,15 @@ class Lasso(ElasticNet):
         :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
-    copy_X : boolean, optional, default True
-        If ``True``, X will be copied; else, it may be overwritten.
-
     precompute : True | False | array-like, default=False
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to ``'auto'`` let us decide. The Gram
         matrix can also be passed as argument. For sparse input
         this option is always ``True`` to preserve sparsity.
 
+    copy_X : boolean, optional, default True
+        If ``True``, X will be copied; else, it may be overwritten.
+
     max_iter : int, optional
         The maximum number of iterations
 
@@ -832,12 +832,6 @@ class Lasso(ElasticNet):
     positive : bool, optional
         When set to ``True``, forces the coefficients to be positive.
 
-    selection : str, default 'cyclic'
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default. This
-        (setting to 'random') often leads to significantly faster convergence
-        especially when tol is higher than 1e-4.
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator that selects a random
         feature to update.  If int, random_state is the seed used by the random
@@ -846,6 +840,12 @@ class Lasso(ElasticNet):
         RandomState instance used by `np.random`. Used when ``selection`` ==
         'random'.
 
+    selection : str, default 'cyclic'
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially by default. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4.
+
     Attributes
     ----------
     coef_ : array, shape (n_features,) | (n_targets, n_features)
@@ -1234,6 +1234,19 @@ class LassoCV(LinearModelCV, RegressorMixin):
         List of alphas where to compute the models.
         If ``None`` alphas are set automatically
 
+    fit_intercept : boolean, default True
+        whether to calculate the intercept for this model. If set
+        to false, no intercept will be used in calculations
+        (e.g. data is expected to be already centered).
+
+    normalize : boolean, optional, default False
+        This parameter is ignored when ``fit_intercept`` is set to False.
+        If True, the regressors X will be normalized before regression by
+        subtracting the mean and dividing by the l2-norm.
+        If you wish to standardize, please use
+        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
+        on an estimator with ``normalize=False``.
+
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to ``'auto'`` let us decide. The Gram
@@ -1248,6 +1261,9 @@ class LassoCV(LinearModelCV, RegressorMixin):
         dual gap for optimality and continues until it is smaller
         than ``tol``.
 
+    copy_X : boolean, optional, default True
+        If ``True``, X will be copied; else, it may be overwritten.
+
     cv : int, cross-validation generator or an iterable, optional
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -1272,12 +1288,6 @@ class LassoCV(LinearModelCV, RegressorMixin):
     positive : bool, optional
         If positive, restrict regression coefficients to be positive
 
-    selection : str, default 'cyclic'
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default. This
-        (setting to 'random') often leads to significantly faster convergence
-        especially when tol is higher than 1e-4.
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator that selects a random
         feature to update.  If int, random_state is the seed used by the random
@@ -1286,21 +1296,11 @@ class LassoCV(LinearModelCV, RegressorMixin):
         RandomState instance used by `np.random`. Used when ``selection`` ==
         'random'.
 
-    fit_intercept : boolean, default True
-        whether to calculate the intercept for this model. If set
-        to false, no intercept will be used in calculations
-        (e.g. data is expected to be already centered).
-
-    normalize : boolean, optional, default False
-        This parameter is ignored when ``fit_intercept`` is set to False.
-        If True, the regressors X will be normalized before regression by
-        subtracting the mean and dividing by the l2-norm.
-        If you wish to standardize, please use
-        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
-        on an estimator with ``normalize=False``.
-
-    copy_X : boolean, optional, default True
-        If ``True``, X will be copied; else, it may be overwritten.
+    selection : str, default 'cyclic'
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially by default. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4.
 
     Attributes
     ----------
@@ -1389,6 +1389,19 @@ class ElasticNetCV(LinearModelCV, RegressorMixin):
         List of alphas where to compute the models.
         If None alphas are set automatically
 
+    fit_intercept : boolean
+        whether to calculate the intercept for this model. If set
+        to false, no intercept will be used in calculations
+        (e.g. data is expected to be already centered).
+
+    normalize : boolean, optional, default False
+        This parameter is ignored when ``fit_intercept`` is set to False.
+        If True, the regressors X will be normalized before regression by
+        subtracting the mean and dividing by the l2-norm.
+        If you wish to standardize, please use
+        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
+        on an estimator with ``normalize=False``.
+
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to ``'auto'`` let us decide. The Gram
@@ -1417,6 +1430,9 @@ class ElasticNetCV(LinearModelCV, RegressorMixin):
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
 
+    copy_X : boolean, optional, default True
+        If ``True``, X will be copied; else, it may be overwritten.
+
     verbose : bool or integer
         Amount of verbosity.
 
@@ -1427,12 +1443,6 @@ class ElasticNetCV(LinearModelCV, RegressorMixin):
     positive : bool, optional
         When set to ``True``, forces the coefficients to be positive.
 
-    selection : str, default 'cyclic'
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default. This
-        (setting to 'random') often leads to significantly faster convergence
-        especially when tol is higher than 1e-4.
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator that selects a random
         feature to update.  If int, random_state is the seed used by the random
@@ -1441,21 +1451,11 @@ class ElasticNetCV(LinearModelCV, RegressorMixin):
         RandomState instance used by `np.random`. Used when ``selection`` ==
         'random'.
 
-    fit_intercept : boolean
-        whether to calculate the intercept for this model. If set
-        to false, no intercept will be used in calculations
-        (e.g. data is expected to be already centered).
-
-    normalize : boolean, optional, default False
-        This parameter is ignored when ``fit_intercept`` is set to False.
-        If True, the regressors X will be normalized before regression by
-        subtracting the mean and dividing by the l2-norm.
-        If you wish to standardize, please use
-        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
-        on an estimator with ``normalize=False``.
-
-    copy_X : boolean, optional, default True
-        If ``True``, X will be copied; else, it may be overwritten.
+    selection : str, default 'cyclic'
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially by default. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4.
 
     Attributes
     ----------
@@ -1600,12 +1600,6 @@ class MultiTaskElasticNet(Lasso):
         When set to ``True``, reuse the solution of the previous call to fit as
         initialization, otherwise, just erase the previous solution.
 
-    selection : str, default 'cyclic'
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default. This
-        (setting to 'random') often leads to significantly faster convergence
-        especially when tol is higher than 1e-4.
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator that selects a random
         feature to update.  If int, random_state is the seed used by the random
@@ -1614,6 +1608,12 @@ class MultiTaskElasticNet(Lasso):
         RandomState instance used by `np.random`. Used when ``selection`` ==
         'random'.
 
+    selection : str, default 'cyclic'
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially by default. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4.
+
     Attributes
     ----------
     intercept_ : array, shape (n_tasks,)
@@ -1787,12 +1787,6 @@ class MultiTaskLasso(MultiTaskElasticNet):
         When set to ``True``, reuse the solution of the previous call to fit as
         initialization, otherwise, just erase the previous solution.
 
-    selection : str, default 'cyclic'
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default. This
-        (setting to 'random') often leads to significantly faster convergence
-        especially when tol is higher than 1e-4
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator that selects a random
         feature to update.  If int, random_state is the seed used by the random
@@ -1801,6 +1795,12 @@ class MultiTaskLasso(MultiTaskElasticNet):
         RandomState instance used by `np.random`. Used when ``selection`` ==
         'random'.
 
+    selection : str, default 'cyclic'
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially by default. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4
+
     Attributes
     ----------
     coef_ : array, shape (n_tasks, n_features)
@@ -1873,17 +1873,6 @@ class MultiTaskElasticNetCV(LinearModelCV, RegressorMixin):
 
     Parameters
     ----------
-    eps : float, optional
-        Length of the path. ``eps=1e-3`` means that
-        ``alpha_min / alpha_max = 1e-3``.
-
-    alphas : array-like, optional
-        List of alphas where to compute the models.
-        If not provided, set automatically.
-
-    n_alphas : int, optional
-        Number of alphas along the regularization path
-
     l1_ratio : float or array of floats
         The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.
         For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it
@@ -1896,6 +1885,17 @@ class MultiTaskElasticNetCV(LinearModelCV, RegressorMixin):
         (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,
         .9, .95, .99, 1]``
 
+    eps : float, optional
+        Length of the path. ``eps=1e-3`` means that
+        ``alpha_min / alpha_max = 1e-3``.
+
+    n_alphas : int, optional
+        Number of alphas along the regularization path
+
+    alphas : array-like, optional
+        List of alphas where to compute the models.
+        If not provided, set automatically.
+
     fit_intercept : boolean
         whether to calculate the intercept for this model. If set
         to false, no intercept will be used in calculations
@@ -1909,9 +1909,6 @@ class MultiTaskElasticNetCV(LinearModelCV, RegressorMixin):
         :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
-    copy_X : boolean, optional, default True
-        If ``True``, X will be copied; else, it may be overwritten.
-
     max_iter : int, optional
         The maximum number of iterations
 
@@ -1935,6 +1932,9 @@ class MultiTaskElasticNetCV(LinearModelCV, RegressorMixin):
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
 
+    copy_X : boolean, optional, default True
+        If ``True``, X will be copied; else, it may be overwritten.
+
     verbose : bool or integer
         Amount of verbosity.
 
@@ -1943,12 +1943,6 @@ class MultiTaskElasticNetCV(LinearModelCV, RegressorMixin):
         all the CPUs. Note that this is used only if multiple values for
         l1_ratio are given.
 
-    selection : str, default 'cyclic'
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default. This
-        (setting to 'random') often leads to significantly faster convergence
-        especially when tol is higher than 1e-4.
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator that selects a random
         feature to update.  If int, random_state is the seed used by the random
@@ -1957,6 +1951,12 @@ class MultiTaskElasticNetCV(LinearModelCV, RegressorMixin):
         RandomState instance used by `np.random`. Used when ``selection`` ==
         'random'.
 
+    selection : str, default 'cyclic'
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially by default. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4.
+
     Attributes
     ----------
     intercept_ : array, shape (n_tasks,)
@@ -2056,13 +2056,13 @@ class MultiTaskLassoCV(LinearModelCV, RegressorMixin):
         Length of the path. ``eps=1e-3`` means that
         ``alpha_min / alpha_max = 1e-3``.
 
+    n_alphas : int, optional
+        Number of alphas along the regularization path
+
     alphas : array-like, optional
         List of alphas where to compute the models.
         If not provided, set automatically.
 
-    n_alphas : int, optional
-        Number of alphas along the regularization path
-
     fit_intercept : boolean
         whether to calculate the intercept for this model. If set
         to false, no intercept will be used in calculations
@@ -2076,9 +2076,6 @@ class MultiTaskLassoCV(LinearModelCV, RegressorMixin):
         :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
-    copy_X : boolean, optional, default True
-        If ``True``, X will be copied; else, it may be overwritten.
-
     max_iter : int, optional
         The maximum number of iterations.
 
@@ -2088,6 +2085,9 @@ class MultiTaskLassoCV(LinearModelCV, RegressorMixin):
         dual gap for optimality and continues until it is smaller
         than ``tol``.
 
+    copy_X : boolean, optional, default True
+        If ``True``, X will be copied; else, it may be overwritten.
+
     cv : int, cross-validation generator or an iterable, optional
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -2110,19 +2110,19 @@ class MultiTaskLassoCV(LinearModelCV, RegressorMixin):
         all the CPUs. Note that this is used only if multiple values for
         l1_ratio are given.
 
-    selection : str, default 'cyclic'
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default. This
-        (setting to 'random') often leads to significantly faster convergence
-        especially when tol is higher than 1e-4.
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator that selects a random
         feature to update.  If int, random_state is the seed used by the random
         number generator; If RandomState instance, random_state is the random
         number generator; If None, the random number generator is the
         RandomState instance used by `np.random`. Used when ``selection`` ==
-        'random'/
+        'random'
+
+    selection : str, default 'cyclic'
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially by default. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4.
 
     Attributes
     ----------
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index da1c001798024..51f8408a5a8c0 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -54,24 +54,19 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
     y : array, shape: (n_samples)
         Input targets.
 
-    positive : boolean (default=False)
-        Restrict coefficients to be >= 0.
-        When using this option together with method 'lasso' the model
-        coefficients will not converge to the ordinary-least-squares solution
-        for small values of alpha (neither will they when using method 'lar'
-        ..). Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
-        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
-        algorithm are typically in congruence with the solution of the
-        coordinate descent lasso_path function.
-
-    max_iter : integer, optional (default=500)
-        Maximum number of iterations to perform, set to infinity for no limit.
+    Xy : array-like, shape (n_samples,) or (n_samples, n_targets), \
+            optional
+        Xy = np.dot(X.T, y) that can be precomputed. It is useful
+        only when the Gram matrix is precomputed.
 
     Gram : None, 'auto', array, shape: (n_features, n_features), optional
         Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram
         matrix is precomputed from the given X, if there are more samples
         than features.
 
+    max_iter : integer, optional (default=500)
+        Maximum number of iterations to perform, set to infinity for no limit.
+
     alpha_min : float, optional (default=0)
         Minimum correlation along the path. It corresponds to the
         regularization parameter alpha parameter in the Lasso.
@@ -80,14 +75,14 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
         Specifies the returned model. Select ``'lar'`` for Least Angle
         Regression, ``'lasso'`` for the Lasso.
 
+    copy_X : bool, optional (default=True)
+        If ``False``, ``X`` is overwritten.
+
     eps : float, optional (default=``np.finfo(np.float).eps``)
         The machine-precision regularization in the computation of the
         Cholesky diagonal factors. Increase this for very ill-conditioned
         systems.
 
-    copy_X : bool, optional (default=True)
-        If ``False``, ``X`` is overwritten.
-
     copy_Gram : bool, optional (default=True)
         If ``False``, ``Gram`` is overwritten.
 
@@ -101,6 +96,16 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
     return_n_iter : bool, optional (default=False)
         Whether to return the number of iterations.
 
+    positive : boolean (default=False)
+        Restrict coefficients to be >= 0.
+        When using this option together with method 'lasso' the model
+        coefficients will not converge to the ordinary-least-squares solution
+        for small values of alpha (neither will they when using method 'lar'
+        ..). Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
+        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
+        algorithm are typically in congruence with the solution of the
+        coordinate descent lasso_path function.
+
     Returns
     --------
     alphas : array, shape: [n_alphas + 1]
@@ -494,18 +499,11 @@ class Lars(LinearModel, RegressorMixin):
 
     Parameters
     ----------
-    n_nonzero_coefs : int, optional
-        Target number of non-zero coefficients. Use ``np.inf`` for no limit.
-
     fit_intercept : boolean
         Whether to calculate the intercept for this model. If set
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
-    positive : boolean (default=False)
-        Restrict coefficients to be >= 0. Be aware that you might want to
-        remove fit_intercept which is set True by default.
-
     verbose : boolean or integer, optional
         Sets the verbosity amount
 
@@ -522,8 +520,8 @@ class Lars(LinearModel, RegressorMixin):
         calculations. If set to ``'auto'`` let us decide. The Gram
         matrix can also be passed as argument.
 
-    copy_X : boolean, optional, default True
-        If ``True``, X will be copied; else, it may be overwritten.
+    n_nonzero_coefs : int, optional
+        Target number of non-zero coefficients. Use ``np.inf`` for no limit.
 
     eps : float, optional
         The machine-precision regularization in the computation of the
@@ -532,12 +530,19 @@ class Lars(LinearModel, RegressorMixin):
         optimization-based algorithms, this parameter does not control
         the tolerance of the optimization.
 
+    copy_X : boolean, optional, default True
+        If ``True``, X will be copied; else, it may be overwritten.
+
     fit_path : boolean
         If True the full path is stored in the ``coef_path_`` attribute.
         If you compute the solution for a large problem or many targets,
         setting ``fit_path`` to ``False`` will lead to a speedup, especially
         with a small alpha.
 
+    positive : boolean (default=False)
+        Restrict coefficients to be >= 0. Be aware that you might want to
+        remove fit_intercept which is set True by default.
+
     Attributes
     ----------
     alphas_ : array, shape (n_alphas + 1,) | list of n_targets such arrays
@@ -727,16 +732,6 @@ class LassoLars(Lars):
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
-    positive : boolean (default=False)
-        Restrict coefficients to be >= 0. Be aware that you might want to
-        remove fit_intercept which is set True by default.
-        Under the positive restriction the model coefficients will not converge
-        to the ordinary-least-squares solution for small values of alpha.
-        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
-        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
-        algorithm are typically in congruence with the solution of the
-        coordinate descent Lasso estimator.
-
     verbose : boolean or integer, optional
         Sets the verbosity amount
 
@@ -748,9 +743,6 @@ class LassoLars(Lars):
         :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
-    copy_X : boolean, optional, default True
-        If True, X will be copied; else, it may be overwritten.
-
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to ``'auto'`` let us decide. The Gram
@@ -766,12 +758,25 @@ class LassoLars(Lars):
         optimization-based algorithms, this parameter does not control
         the tolerance of the optimization.
 
+    copy_X : boolean, optional, default True
+        If True, X will be copied; else, it may be overwritten.
+
     fit_path : boolean
         If ``True`` the full path is stored in the ``coef_path_`` attribute.
         If you compute the solution for a large problem or many targets,
         setting ``fit_path`` to ``False`` will lead to a speedup, especially
         with a small alpha.
 
+    positive : boolean (default=False)
+        Restrict coefficients to be >= 0. Be aware that you might want to
+        remove fit_intercept which is set True by default.
+        Under the positive restriction the model coefficients will not converge
+        to the ordinary-least-squares solution for small values of alpha.
+        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
+        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
+        algorithm are typically in congruence with the solution of the
+        coordinate descent Lasso estimator.
+
     Attributes
     ----------
     alphas_ : array, shape (n_alphas + 1,) | list of n_targets such arrays
@@ -972,13 +977,12 @@ class LarsCV(Lars):
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
-    positive : boolean (default=False)
-        Restrict coefficients to be >= 0. Be aware that you might want to
-        remove fit_intercept which is set True by default.
-
     verbose : boolean or integer, optional
         Sets the verbosity amount
 
+    max_iter : integer, optional
+        Maximum number of iterations to perform.
+
     normalize : boolean, optional, default True
         This parameter is ignored when ``fit_intercept`` is set to False.
         If True, the regressors X will be normalized before regression by
@@ -987,17 +991,11 @@ class LarsCV(Lars):
         :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
-    copy_X : boolean, optional, default True
-        If ``True``, X will be copied; else, it may be overwritten.
-
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to ``'auto'`` let us decide. The Gram
         matrix can also be passed as argument.
 
-    max_iter : integer, optional
-        Maximum number of iterations to perform.
-
     cv : int, cross-validation generator or an iterable, optional
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -1025,6 +1023,13 @@ class LarsCV(Lars):
         Cholesky diagonal factors. Increase this for very ill-conditioned
         systems.
 
+    copy_X : boolean, optional, default True
+        If ``True``, X will be copied; else, it may be overwritten.
+
+    positive : boolean (default=False)
+        Restrict coefficients to be >= 0. Be aware that you might want to
+        remove fit_intercept which is set True by default.
+
 
     Attributes
     ----------
@@ -1176,21 +1181,12 @@ class LassoLarsCV(LarsCV):
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
-    positive : boolean (default=False)
-        Restrict coefficients to be >= 0. Be aware that you might want to
-        remove fit_intercept which is set True by default.
-        Under the positive restriction the model coefficients do not converge
-        to the ordinary-least-squares solution for small values of alpha.
-        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
-        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
-        algorithm are typically in congruence with the solution of the
-        coordinate descent Lasso estimator.
-        As a consequence using LassoLarsCV only makes sense for problems where
-        a sparse solution is expected and/or reached.
-
     verbose : boolean or integer, optional
         Sets the verbosity amount
 
+    max_iter : integer, optional
+        Maximum number of iterations to perform.
+
     normalize : boolean, optional, default True
         This parameter is ignored when ``fit_intercept`` is set to False.
         If True, the regressors X will be normalized before regression by
@@ -1204,9 +1200,6 @@ class LassoLarsCV(LarsCV):
         calculations. If set to ``'auto'`` let us decide. The Gram
         matrix can also be passed as argument.
 
-    max_iter : integer, optional
-        Maximum number of iterations to perform.
-
     cv : int, cross-validation generator or an iterable, optional
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -1237,6 +1230,18 @@ class LassoLarsCV(LarsCV):
     copy_X : boolean, optional, default True
         If True, X will be copied; else, it may be overwritten.
 
+    positive : boolean (default=False)
+        Restrict coefficients to be >= 0. Be aware that you might want to
+        remove fit_intercept which is set True by default.
+        Under the positive restriction the model coefficients do not converge
+        to the ordinary-least-squares solution for small values of alpha.
+        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
+        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
+        algorithm are typically in congruence with the solution of the
+        coordinate descent Lasso estimator.
+        As a consequence using LassoLarsCV only makes sense for problems where
+        a sparse solution is expected and/or reached.
+
     Attributes
     ----------
     coef_ : array, shape (n_features,)
@@ -1309,18 +1314,6 @@ class LassoLarsIC(LassoLars):
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
-    positive : boolean (default=False)
-        Restrict coefficients to be >= 0. Be aware that you might want to
-        remove fit_intercept which is set True by default.
-        Under the positive restriction the model coefficients do not converge
-        to the ordinary-least-squares solution for small values of alpha.
-        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
-        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
-        algorithm are typically in congruence with the solution of the
-        coordinate descent Lasso estimator.
-        As a consequence using LassoLarsIC only makes sense for problems where
-        a sparse solution is expected and/or reached.
-
     verbose : boolean or integer, optional
         Sets the verbosity amount
 
@@ -1332,9 +1325,6 @@ class LassoLarsIC(LassoLars):
         :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
-    copy_X : boolean, optional, default True
-        If True, X will be copied; else, it may be overwritten.
-
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to ``'auto'`` let us decide. The Gram
@@ -1351,6 +1341,21 @@ class LassoLarsIC(LassoLars):
         optimization-based algorithms, this parameter does not control
         the tolerance of the optimization.
 
+    copy_X : boolean, optional, default True
+        If True, X will be copied; else, it may be overwritten.
+
+    positive : boolean (default=False)
+        Restrict coefficients to be >= 0. Be aware that you might want to
+        remove fit_intercept which is set True by default.
+        Under the positive restriction the model coefficients do not converge
+        to the ordinary-least-squares solution for small values of alpha.
+        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
+        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
+        algorithm are typically in congruence with the solution of the
+        coordinate descent Lasso estimator.
+        As a consequence using LassoLarsIC only makes sense for problems where
+        a sparse solution is expected and/or reached.
+
 
     Attributes
     ----------
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index c12b4943807b1..d22fdad795e11 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -472,16 +472,16 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     y : array-like, shape (n_samples,)
         Input data, target values.
 
+    pos_class : int, None
+        The class with respect to which we perform a one-vs-all fit.
+        If None, then it is assumed that the given problem is binary.
+
     Cs : int | array-like, shape (n_cs,)
         List of values for the regularization parameter or integer specifying
         the number of regularization parameters that should be used. In this
         case, the parameters will be chosen in a logarithmic scale between
         1e-4 and 1e4.
 
-    pos_class : int, None
-        The class with respect to which we perform a one-vs-all fit.
-        If None, then it is assumed that the given problem is binary.
-
     fit_intercept : bool
         Whether to fit an intercept for the model. In this case the shape of
         the returned array is (n_cs, n_features + 1).
@@ -994,6 +994,9 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
         l2 penalty with liblinear solver. Prefer dual=False when
         n_samples > n_features.
 
+    tol : float, default: 1e-4
+        Tolerance for stopping criteria.
+
     C : float, default: 1.0
         Inverse of regularization strength; must be a positive float.
         Like in support vector machines, smaller values specify stronger
@@ -1030,10 +1033,6 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
         .. versionadded:: 0.17
            *class_weight='balanced'*
 
-    max_iter : int, default: 100
-        Useful only for the newton-cg, sag and lbfgs solvers.
-        Maximum number of iterations taken for the solvers to converge.
-
     random_state : int, RandomState instance or None, optional, default: None
         The seed of the pseudo random number generator to use when shuffling
         the data.  If int, random_state is the seed used by the random number
@@ -1063,8 +1062,9 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
         .. versionadded:: 0.19
            SAGA solver.
 
-    tol : float, default: 1e-4
-        Tolerance for stopping criteria.
+    max_iter : int, default: 100
+        Useful only for the newton-cg, sag and lbfgs solvers.
+        Maximum number of iterations taken for the solvers to converge.
 
     multi_class : str, {'ovr', 'multinomial'}, default: 'ovr'
         Multiclass option can be either 'ovr' or 'multinomial'. If the option
@@ -1386,35 +1386,21 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the decision function.
 
-    class_weight : dict or 'balanced', optional
-        Weights associated with classes in the form ``{class_label: weight}``.
-        If not given, all classes are supposed to have weight one.
-
-        The "balanced" mode uses the values of y to automatically adjust
-        weights inversely proportional to class frequencies in the input data
-        as ``n_samples / (n_classes * np.bincount(y))``.
-
-        Note that these weights will be multiplied with sample_weight (passed
-        through the fit method) if sample_weight is specified.
-
-        .. versionadded:: 0.17
-           class_weight == 'balanced'
-
     cv : integer or cross-validation generator
         The default cross-validation generator used is Stratified K-Folds.
         If an integer is provided, then it is the number of folds used.
         See the module :mod:`sklearn.model_selection` module for the
         list of possible cross-validation objects.
 
-    penalty : str, 'l1' or 'l2'
-        Used to specify the norm used in the penalization. The 'newton-cg',
-        'sag' and 'lbfgs' solvers support only l2 penalties.
-
     dual : bool
         Dual or primal formulation. Dual formulation is only implemented for
         l2 penalty with liblinear solver. Prefer dual=False when
         n_samples > n_features.
 
+    penalty : str, 'l1' or 'l2'
+        Used to specify the norm used in the penalization. The 'newton-cg',
+        'sag' and 'lbfgs' solvers support only l2 penalties.
+
     scoring : string, callable, or None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
@@ -1451,6 +1437,20 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
     max_iter : int, optional
         Maximum number of iterations of the optimization algorithm.
 
+    class_weight : dict or 'balanced', optional
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one.
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``.
+
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+
+        .. versionadded:: 0.17
+           class_weight == 'balanced'
+
     n_jobs : int, optional
         Number of CPU cores used during the cross-validation loop. If given
         a value of -1, all cores are used.
@@ -1466,16 +1466,6 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
         Otherwise the coefs, intercepts and C that correspond to the
         best scores across folds are averaged.
 
-    multi_class : str, {'ovr', 'multinomial'}
-        Multiclass option can be either 'ovr' or 'multinomial'. If the option
-        chosen is 'ovr', then a binary problem is fit for each label. Else
-        the loss minimised is the multinomial loss fit across
-        the entire probability distribution. Works only for the 'newton-cg',
-        'sag' and 'lbfgs' solver.
-
-        .. versionadded:: 0.18
-           Stochastic Average Gradient descent solver for 'multinomial' case.
-
     intercept_scaling : float, default 1.
         Useful only when the solver 'liblinear' is used
         and self.fit_intercept is set to True. In this case, x becomes
@@ -1489,6 +1479,16 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
         To lessen the effect of regularization on synthetic feature weight
         (and therefore on the intercept) intercept_scaling has to be increased.
 
+    multi_class : str, {'ovr', 'multinomial'}
+        Multiclass option can be either 'ovr' or 'multinomial'. If the option
+        chosen is 'ovr', then a binary problem is fit for each label. Else
+        the loss minimised is the multinomial loss fit across
+        the entire probability distribution. Works only for the 'newton-cg',
+        'sag' and 'lbfgs' solver.
+
+        .. versionadded:: 0.18
+           Stochastic Average Gradient descent solver for 'multinomial' case.
+
     random_state : int, RandomState instance or None, optional, default None
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py
index 7b75c4717e9a1..684c2a50d7f94 100644
--- a/sklearn/linear_model/omp.py
+++ b/sklearn/linear_model/omp.py
@@ -537,6 +537,8 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
 class OrthogonalMatchingPursuit(LinearModel, RegressorMixin):
     """Orthogonal Matching Pursuit model (OMP)
 
+    Read more in the :ref:`User Guide <omp>`.
+
     Parameters
     ----------
     n_nonzero_coefs : int, optional
@@ -565,8 +567,6 @@ class OrthogonalMatchingPursuit(LinearModel, RegressorMixin):
         very large. Note that if you already have such matrices, you can pass
         them directly to the fit method.
 
-    Read more in the :ref:`User Guide <omp>`.
-
     Attributes
     ----------
     coef_ : array, shape (n_features,) or (n_targets, n_features)
@@ -740,6 +740,8 @@ def _omp_path_residues(X_train, y_train, X_test, y_test, copy=True,
 class OrthogonalMatchingPursuitCV(LinearModel, RegressorMixin):
     """Cross-validated Orthogonal Matching Pursuit model (OMP)
 
+    Read more in the :ref:`User Guide <omp>`.
+
     Parameters
     ----------
     copy : bool, optional
@@ -785,8 +787,6 @@ class OrthogonalMatchingPursuitCV(LinearModel, RegressorMixin):
     verbose : boolean or integer, optional
         Sets the verbosity amount
 
-    Read more in the :ref:`User Guide <omp>`.
-
     Attributes
     ----------
     intercept_ : float or array, shape (n_targets,)
diff --git a/sklearn/linear_model/passive_aggressive.py b/sklearn/linear_model/passive_aggressive.py
index 941f398bd6e13..74e3c6df4086a 100644
--- a/sklearn/linear_model/passive_aggressive.py
+++ b/sklearn/linear_model/passive_aggressive.py
@@ -28,25 +28,25 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     shuffle : bool, default=True
         Whether or not the training data should be shuffled after each epoch.
 
-    random_state : int, RandomState instance or None, optional, default=None
-        The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
-
     verbose : integer, optional
         The verbosity level
 
+    loss : string, optional
+        The loss function to be used:
+        hinge: equivalent to PA-I in the reference paper.
+        squared_hinge: equivalent to PA-II in the reference paper.
+
     n_jobs : integer, optional
         The number of CPUs to use to do the OVA (One Versus All, for
         multi-class problems) computation. -1 means 'all CPUs'. Defaults
         to 1.
 
-    loss : string, optional
-        The loss function to be used:
-        hinge: equivalent to PA-I in the reference paper.
-        squared_hinge: equivalent to PA-II in the reference paper.
+    random_state : int, RandomState instance or None, optional, default=None
+        The seed of the pseudo random number generator to use when shuffling
+        the data.  If int, random_state is the seed used by the random number
+        generator; If RandomState instance, random_state is the random number
+        generator; If None, the random number generator is the RandomState
+        instance used by `np.random`.
 
     warm_start : bool, optional
         When set to True, reuse the solution of the previous call to fit as
@@ -192,10 +192,6 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
     C : float
         Maximum step size (regularization). Defaults to 1.0.
 
-    epsilon : float
-        If the difference between the current prediction and the correct label
-        is below this threshold, the model is not updated.
-
     fit_intercept : bool
         Whether the intercept should be estimated or not. If False, the
         data is assumed to be already centered. Defaults to True.
@@ -207,13 +203,6 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
     shuffle : bool, default=True
         Whether or not the training data should be shuffled after each epoch.
 
-    random_state : int, RandomState instance or None, optional, default=None
-        The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
-
     verbose : integer, optional
         The verbosity level
 
@@ -223,6 +212,17 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
         squared_epsilon_insensitive: equivalent to PA-II in the reference
         paper.
 
+    epsilon : float
+        If the difference between the current prediction and the correct label
+        is below this threshold, the model is not updated.
+
+    random_state : int, RandomState instance or None, optional, default=None
+        The seed of the pseudo random number generator to use when shuffling
+        the data.  If int, random_state is the seed used by the random number
+        generator; If RandomState instance, random_state is the random number
+        generator; If None, the random number generator is the RandomState
+        instance used by `np.random`.
+
     warm_start : bool, optional
         When set to True, reuse the solution of the previous call to fit as
         initialization, otherwise, just erase the previous solution.
diff --git a/sklearn/linear_model/perceptron.py b/sklearn/linear_model/perceptron.py
index 0b11049fc3b39..5432902e31532 100644
--- a/sklearn/linear_model/perceptron.py
+++ b/sklearn/linear_model/perceptron.py
@@ -30,23 +30,23 @@ class Perceptron(BaseSGDClassifier):
     shuffle : bool, optional, default True
         Whether or not the training data should be shuffled after each epoch.
 
-    random_state : int, RandomState instance or None, optional, default None
-        The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
-
     verbose : integer, optional
         The verbosity level
 
+    eta0 : double
+        Constant by which the updates are multiplied. Defaults to 1.
+
     n_jobs : integer, optional
         The number of CPUs to use to do the OVA (One Versus All, for
         multi-class problems) computation. -1 means 'all CPUs'. Defaults
         to 1.
 
-    eta0 : double
-        Constant by which the updates are multiplied. Defaults to 1.
+    random_state : int, RandomState instance or None, optional, default None
+        The seed of the pseudo random number generator to use when shuffling
+        the data.  If int, random_state is the seed used by the random number
+        generator; If RandomState instance, random_state is the random number
+        generator; If None, the random number generator is the RandomState
+        instance used by `np.random`.
 
     class_weight : dict, {class_label: weight} or "balanced" or None, optional
         Preset for the class_weight fit parameter.
diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py
index ac5b89722488e..235f88645a56a 100644
--- a/sklearn/linear_model/randomized_l1.py
+++ b/sklearn/linear_model/randomized_l1.py
@@ -245,16 +245,16 @@ class RandomizedLasso(BaseRandomizedLinearModel):
         optimization-based algorithms, this parameter does not control
         the tolerance of the optimization.
 
-    n_jobs : integer, optional
-        Number of CPUs to use during the resampling. If '-1', use
-        all the CPUs
-
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
+    n_jobs : integer, optional
+        Number of CPUs to use during the resampling. If '-1', use
+        all the CPUs
+
     pre_dispatch : int, or string, optional
         Controls the number of jobs that get dispatched during parallel
         execution. Reducing this number can be useful to avoid an
@@ -416,6 +416,9 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel):
     selection_threshold : float, optional, default=0.25
         The score above which features should be selected.
 
+    tol : float, optional, default=1e-3
+         tolerance for stopping criteria of LogisticRegression
+
     fit_intercept : boolean, optional, default=True
         whether to calculate the intercept for this model. If set
         to false, no intercept will be used in calculations
@@ -434,19 +437,16 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel):
         `preprocessing.StandardScaler` before calling `fit` on an estimator
         with `normalize=False`.
 
-    tol : float, optional, default=1e-3
-         tolerance for stopping criteria of LogisticRegression
-
-    n_jobs : integer, optional
-        Number of CPUs to use during the resampling. If '-1', use
-        all the CPUs
-
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
+    n_jobs : integer, optional
+        Number of CPUs to use during the resampling. If '-1', use
+        all the CPUs
+
     pre_dispatch : int, or string, optional
         Controls the number of jobs that get dispatched during parallel
         execution. Reducing this number can be useful to avoid an
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 398016b886bdc..007a08ddcb0ab 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -218,11 +218,6 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
         assumed to be specific to the targets. Hence they must correspond in
         number.
 
-    max_iter : int, optional
-        Maximum number of iterations for conjugate gradient solver.
-        For 'sparse_cg' and 'lsqr' solvers, the default value is determined
-        by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.
-
     sample_weight : float or numpy array of shape [n_samples]
         Individual weights for each sample. If sample_weight is not None and
         solver='auto', the solver will be set to 'cholesky'.
@@ -268,6 +263,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
         .. versionadded:: 0.19
            SAGA solver.
 
+    max_iter : int, optional
+        Maximum number of iterations for conjugate gradient solver.
+        For 'sparse_cg' and 'lsqr' solvers, the default value is determined
+        by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.
+
     tol : float
         Precision of the solution.
 
@@ -521,19 +521,11 @@ class Ridge(_BaseRidge, RegressorMixin):
         assumed to be specific to the targets. Hence they must correspond in
         number.
 
-    copy_X : boolean, optional, default True
-        If True, X will be copied; else, it may be overwritten.
-
     fit_intercept : boolean
         Whether to calculate the intercept for this model. If set
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
-    max_iter : int, optional
-        Maximum number of iterations for conjugate gradient solver.
-        For 'sparse_cg' and 'lsqr' solvers, the default value is determined
-        by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.
-
     normalize : boolean, optional, default False
         This parameter is ignored when ``fit_intercept`` is set to False.
         If True, the regressors X will be normalized before regression by
@@ -542,6 +534,17 @@ class Ridge(_BaseRidge, RegressorMixin):
         :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
+    copy_X : boolean, optional, default True
+        If True, X will be copied; else, it may be overwritten.
+
+    max_iter : int, optional
+        Maximum number of iterations for conjugate gradient solver.
+        For 'sparse_cg' and 'lsqr' solvers, the default value is determined
+        by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.
+
+    tol : float
+        Precision of the solution.
+
     solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}
         Solver to use in the computational routines:
 
@@ -580,9 +583,6 @@ class Ridge(_BaseRidge, RegressorMixin):
         .. versionadded:: 0.19
            SAGA solver.
 
-    tol : float
-        Precision of the solution.
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator to use when shuffling
         the data.  If int, random_state is the seed used by the random number
@@ -669,26 +669,11 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
         Alpha corresponds to ``C^-1`` in other linear models such as
         LogisticRegression or LinearSVC.
 
-    class_weight : dict or 'balanced', optional
-        Weights associated with classes in the form ``{class_label: weight}``.
-        If not given, all classes are supposed to have weight one.
-
-        The "balanced" mode uses the values of y to automatically adjust
-        weights inversely proportional to class frequencies in the input data
-        as ``n_samples / (n_classes * np.bincount(y))``
-
-    copy_X : boolean, optional, default True
-        If True, X will be copied; else, it may be overwritten.
-
     fit_intercept : boolean
         Whether to calculate the intercept for this model. If set to false, no
         intercept will be used in calculations (e.g. data is expected to be
         already centered).
 
-    max_iter : int, optional
-        Maximum number of iterations for conjugate gradient solver.
-        The default value is determined by scipy.sparse.linalg.
-
     normalize : boolean, optional, default False
         This parameter is ignored when ``fit_intercept`` is set to False.
         If True, the regressors X will be normalized before regression by
@@ -697,6 +682,24 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
         :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
+    copy_X : boolean, optional, default True
+        If True, X will be copied; else, it may be overwritten.
+
+    max_iter : int, optional
+        Maximum number of iterations for conjugate gradient solver.
+        The default value is determined by scipy.sparse.linalg.
+
+    tol : float
+        Precision of the solution.
+
+    class_weight : dict or 'balanced', optional
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one.
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
     solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}
         Solver to use in the computational routines:
 
@@ -731,9 +734,6 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
           .. versionadded:: 0.19
            SAGA solver.
 
-    tol : float
-        Precision of the solution.
-
     random_state : int, RandomState instance or None, optional, default None
         The seed of the pseudo random number generator to use when shuffling
         the data.  If int, random_state is the seed used by the random number
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 85f2b8ef7df07..67df5a512b21a 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -607,13 +607,6 @@ class SGDClassifier(BaseSGDClassifier):
         Whether or not the training data should be shuffled after each epoch.
         Defaults to True.
 
-    random_state : int, RandomState instance or None, optional (default=None)
-        The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
-
     verbose : integer, optional
         The verbosity level
 
@@ -630,6 +623,13 @@ class SGDClassifier(BaseSGDClassifier):
         multi-class problems) computation. -1 means 'all CPUs'. Defaults
         to 1.
 
+    random_state : int, RandomState instance or None, optional (default=None)
+        The seed of the pseudo random number generator to use when shuffling
+        the data.  If int, random_state is the seed used by the random number
+        generator; If RandomState instance, random_state is the random number
+        generator; If None, the random number generator is the RandomState
+        instance used by `np.random`.
+
     learning_rate : string, optional
         The learning rate schedule:
 
@@ -1136,13 +1136,6 @@ class SGDRegressor(BaseSGDRegressor):
         Whether or not the training data should be shuffled after each epoch.
         Defaults to True.
 
-    random_state : int, RandomState instance or None, optional (default=None)
-        The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
-
     verbose : integer, optional
         The verbosity level.
 
@@ -1154,6 +1147,13 @@ class SGDRegressor(BaseSGDRegressor):
         For epsilon-insensitive, any differences between the current prediction
         and the correct label are ignored if they are less than this threshold.
 
+    random_state : int, RandomState instance or None, optional (default=None)
+        The seed of the pseudo random number generator to use when shuffling
+        the data.  If int, random_state is the seed used by the random number
+        generator; If RandomState instance, random_state is the random number
+        generator; If None, the random number generator is the RandomState
+        instance used by `np.random`.
+
     learning_rate : string, optional
         The learning rate schedule:
 
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 0f5cebd64cd6b..267a0c9305d03 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -32,7 +32,7 @@
     # 'sklearn.feature_selection',
     'sklearn.gaussian_process',
     'sklearn.isotonic',
-    # 'sklearn.linear_model',
+    'sklearn.linear_model',
     'sklearn.manifold',
     'sklearn.multiclass',
     'sklearn.metrics',
@@ -110,7 +110,8 @@ def test_docstring_parameters():
                 # by default for API reason
                 if method_name in \
                         ['fit', 'score', 'fit_predict', 'fit_transform',
-                         'partial_fit', 'transform', 'inverse_transform']:  # XXX remove transform
+                         'partial_fit',
+                         'transform', 'inverse_transform', 'predict']:  # XXX remove transform
                     sig = signature(method)
                     if ('y' in sig.parameters and
                             sig.parameters['y'].default is None):

From 9a6f01dc4b9653f05301aacd03daee82121028a3 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Sat, 10 Jun 2017 18:10:08 +0200
Subject: [PATCH 21/66] more

---
 sklearn/preprocessing/data.py                 | 49 ++++++++++++++++---
 .../utils/tests/test_docstring_parameters.py  |  2 +-
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 46937c77bee46..ae059a8d4b585 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -375,6 +375,9 @@ def minmax_scale(X, feature_range=(0, 1), axis=0, copy=True):
 
     Parameters
     ----------
+    X : array-like, shape (n_samples, n_features)
+        The data.
+
     feature_range : tuple (min, max), default=(0, 1)
         Desired range of transformed data.
 
@@ -440,6 +443,12 @@ class StandardScaler(BaseEstimator, TransformerMixin):
 
     Parameters
     ----------
+    copy : boolean, optional, default True
+        If False, try to avoid a copy and do inplace scaling instead.
+        This is not guaranteed to always work inplace; e.g. if the data is
+        not a NumPy array or scipy.sparse CSR matrix, a copy may still be
+        returned.
+
     with_mean : boolean, True by default
         If True, center the data before scaling.
         This does not work (and will raise an exception) when attempted on
@@ -451,12 +460,6 @@ class StandardScaler(BaseEstimator, TransformerMixin):
         If True, scale the data to unit variance (or equivalently,
         unit standard deviation).
 
-    copy : boolean, optional, default True
-        If False, try to avoid a copy and do inplace scaling instead.
-        This is not guaranteed to always work inplace; e.g. if the data is
-        not a NumPy array or scipy.sparse CSR matrix, a copy may still be
-        returned.
-
     Attributes
     ----------
     scale_ : ndarray, shape (n_features,)
@@ -621,6 +624,14 @@ def inverse_transform(self, X, copy=None):
         ----------
         X : array-like, shape [n_samples, n_features]
             The data used to scale along the features axis.
+
+        copy : bool
+            Copy the input X or not.
+
+        Returns
+        -------
+        X_tr : array-like, shape [n_samples, n_features]
+            Transformed array.
         """
         check_is_fitted(self, 'scale_')
 
@@ -799,6 +810,9 @@ def maxabs_scale(X, axis=0, copy=True):
 
     Parameters
     ----------
+    X : array-like, shape (n_samples, n_features)
+        The data.
+
     axis : int (0 by default)
         axis used to scale along. If 0, independently scale each feature,
         otherwise (if 1) scale each sample.
@@ -1184,6 +1198,16 @@ def get_feature_names(self, input_features=None):
     def fit(self, X, y=None):
         """
         Compute number of output features.
+
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            The data.
+
+        Returns
+        -------
+        self : instance
         """
         n_samples, n_features = check_array(X).shape
         combinations = self._combinations(n_features, self.degree,
@@ -1366,6 +1390,10 @@ def fit(self, X, y=None):
 
         This method is just there to implement the usual API and hence
         work in pipelines.
+
+        Parameters
+        ----------
+        X : array-like
         """
         X = check_array(X, accept_sparse='csr')
         return self
@@ -1477,6 +1505,10 @@ def fit(self, X, y=None):
 
         This method is just there to implement the usual API and hence
         work in pipelines.
+
+        Parameters
+        ----------
+        X : array-like
         """
         check_array(X, accept_sparse='csr')
         return self
@@ -1841,6 +1873,11 @@ def fit_transform(self, X, y=None):
 
         Equivalent to self.fit(X).transform(X), but more convenient and more
         efficient. See fit for the parameters, transform for the return value.
+
+        Parameters
+        ----------
+        X : array-like, shape [n_samples, n_feature]
+            Input array of type int.
         """
         return _transform_selected(X, self._fit_transform,
                                    self.categorical_features, copy=True)
diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 267a0c9305d03..4ab48dfa16168 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -41,7 +41,7 @@
     # 'sklearn.model_selection',
     'sklearn.neighbors',
     'sklearn.neural_network',
-    # 'sklearn.preprocessing',
+    'sklearn.preprocessing',
     'sklearn.pipeline',
     'sklearn.semi_supervised',
     'sklearn.tree',

From d098674a7a859bbdfe28ce7c8a75ca21d3c4fe20 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 23 Jun 2017 13:46:24 +0200
Subject: [PATCH 22/66] No need of TEST_DOCSTRING='false'

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 3a5594ba5de12..d4a3a5955c97b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -23,7 +23,7 @@ matrix:
     # This environment tests that scikit-learn can be built against
     # versions of numpy, scipy with ATLAS that comes with Ubuntu Trusty 14.04
     - env: DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.4"
-           COVERAGE=true TEST_DOCSTRINGS="false"
+           COVERAGE=true
       addons:
         apt:
           packages:
@@ -39,7 +39,7 @@ matrix:
     # It also runs tests requiring Pandas.
     - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true"
            NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1"
-           CYTHON_VERSION="0.25.2" COVERAGE=true TEST_DOCSTRINGS="false"
+           CYTHON_VERSION="0.25.2" COVERAGE=true
     # This environment use pytest to run the tests. It uses the newest
     # supported Anaconda release (4.4.0). It also runs tests requiring Pandas.
     - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6.1"

From a1bbd533fa0ae4dfc1994105a00f8899b3447bcf Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 23 Jun 2017 13:49:58 +0200
Subject: [PATCH 23/66] Flake8 and licence

---
 sklearn/utils/tests/test_docstring_parameters.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 4ab48dfa16168..7207d84ea9846 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -1,3 +1,8 @@
+# Authors: Gael Varoquaux <gael.varoquaux@normalesup.org>
+#          Justin Vincent
+#          Lars Buitinck
+# License: BSD 3 clause
+
 from __future__ import print_function
 
 from nose.tools import assert_true
@@ -108,10 +113,11 @@ def test_docstring_parameters():
                 param_ignore = None
                 # Now skip docstring test for y when y is None
                 # by default for API reason
-                if method_name in \
-                        ['fit', 'score', 'fit_predict', 'fit_transform',
-                         'partial_fit',
-                         'transform', 'inverse_transform', 'predict']:  # XXX remove transform
+                # XXX remove transform
+                if method_name in ['fit', 'score', 'fit_predict',
+                                   'fit_transform', 'partial_fit',
+                                   'transform', 'inverse_transform',
+                                   'predict']:
                     sig = signature(method)
                     if ('y' in sig.parameters and
                             sig.parameters['y'].default is None):

From b092418d27ffb6f7d541fc6d5761c878e8b1ae53 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 23 Jun 2017 13:55:31 +0200
Subject: [PATCH 24/66] Add Alex's name to the authors

---
 .../utils/tests/test_docstring_parameters.py  | 24 +++++++++----------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/utils/tests/test_docstring_parameters.py
index 7207d84ea9846..bd37f76219dcc 100644
--- a/sklearn/utils/tests/test_docstring_parameters.py
+++ b/sklearn/utils/tests/test_docstring_parameters.py
@@ -1,6 +1,5 @@
-# Authors: Gael Varoquaux <gael.varoquaux@normalesup.org>
-#          Justin Vincent
-#          Lars Buitinck
+# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#          Raghav RV <rvraghav93@gmail.com>
 # License: BSD 3 clause
 
 from __future__ import print_function
@@ -18,11 +17,11 @@
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import check_parameters_match, get_func_name
 
-_doc_special_members = ('__contains__', '__getitem__', '__iter__', '__len__',
+_DOC_SPECIAL_MEMBERS = ('__contains__', '__getitem__', '__iter__', '__len__',
                         '__call__', '__add__', '__sub__', '__mul__', '__div__',
                         '__neg__', '__hash__')
 
-public_modules = [
+PUBLIC_MODULES = [
     # the list of modules users need to access for all functionality
     # 'sklearn',
     'sklearn.base',
@@ -53,9 +52,8 @@
     # 'sklearn.utils',
 ]
 
-
 # functions to ignore args / docstring of
-_docstring_ignores = [
+_DOCSTRING_IGNORES = [
     'sklearn.utils.deprecation.load_mlcomp',
     'sklearn.pipeline.make_pipeline',
     'sklearn.pipeline.make_union',
@@ -73,7 +71,7 @@
     'sample_gaussian',  # deprecated
 ]
 
-_tab_ignores = [
+_TAB_IGNORES = [
 ]
 
 
@@ -88,7 +86,7 @@ def test_docstring_parameters():
     from numpydoc import docscrape
 
     incorrect = []
-    for name in public_modules:
+    for name in PUBLIC_MODULES:
         with warnings.catch_warnings(record=True):  # traits warnings
             module = __import__(name, globals())
         for submod in name.split('.')[1:]:
@@ -96,9 +94,9 @@ def test_docstring_parameters():
         classes = inspect.getmembers(module, inspect.isclass)
         for cname, cls in classes:
             this_incorrect = []
-            if cname in _docstring_ignores:
+            if cname in _DOCSTRING_IGNORES:
                 continue
-            if cname.startswith('_') and cname not in _doc_special_members:
+            if cname.startswith('_') and cname not in _DOC_SPECIAL_MEMBERS:
                 continue
             with warnings.catch_warnings(record=True) as w:
                 cdoc = docscrape.ClassDoc(cls)
@@ -136,7 +134,7 @@ def test_docstring_parameters():
                 # Don't test private methods / functions
                 continue
             name_ = get_func_name(func)
-            if not any(d in name_ for d in _docstring_ignores) and \
+            if not any(d in name_ for d in _DOCSTRING_IGNORES) and \
                     'deprecation_wrapped' not in func.__code__.co_name:
                 incorrect += check_parameters_match(func)
     msg = '\n' + '\n'.join(sorted(list(set(incorrect))))
@@ -146,7 +144,7 @@ def test_docstring_parameters():
 
 def test_tabs():
     """Test that there are no tabs in our source files"""
-    ignore = _tab_ignores[:]
+    ignore = _TAB_IGNORES[:]
 
     for importer, modname, ispkg in walk_packages(sklearn.__path__,
                                                   prefix='sklearn.'):

From 5a104ca9f9aa3b5173827aee9f224fb87ca2cad4 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 23 Jun 2017 14:15:16 +0200
Subject: [PATCH 25/66] Moving docstring parameter test to sklearn/tests

---
 sklearn/{utils => }/tests/test_docstring_parameters.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sklearn/{utils => }/tests/test_docstring_parameters.py (100%)

diff --git a/sklearn/utils/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
similarity index 100%
rename from sklearn/utils/tests/test_docstring_parameters.py
rename to sklearn/tests/test_docstring_parameters.py

From 1f51107b440c60504eda635e1228adcfffec28e3 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 23 Jun 2017 15:03:48 +0200
Subject: [PATCH 26/66] Try fixing some of the inconsistencies

---
 sklearn/covariance/outlier_detection.py    |  7 +++++++
 sklearn/linear_model/coordinate_descent.py | 10 +++++-----
 sklearn/svm/base.py                        |  1 -
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/sklearn/covariance/outlier_detection.py b/sklearn/covariance/outlier_detection.py
index 8529cb571574f..826314d2bfcb7 100644
--- a/sklearn/covariance/outlier_detection.py
+++ b/sklearn/covariance/outlier_detection.py
@@ -47,6 +47,13 @@ class EllipticEnvelope(MinCovDet):
         The amount of contamination of the data set, i.e. the proportion
         of outliers in the data set.
 
+    random_state : int, RandomState instance or None, optional (default=None)
+        The seed of the pseudo random number generator to use when shuffling
+        the data.  If int, random_state is the seed used by the random number
+        generator; If RandomState instance, random_state is the random number
+        generator; If None, the random number generator is the RandomState
+        instance used by `np.random`.
+
     Attributes
     ----------
     location_ : array-like, shape (n_features,)
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 209cfe5ba02c2..55f9503629cc9 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -185,15 +185,15 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None,
     verbose : bool or integer
         Amount of verbosity.
 
-    params : kwargs
-        keyword arguments passed to the coordinate descent solver.
+    return_n_iter : bool
+        whether to return the number of iterations or not.
 
     positive : bool, default False
         If set to True, forces coefficients to be positive.
         (Only allowed when ``y.ndim == 1``).
 
-    return_n_iter : bool
-        whether to return the number of iterations or not.
+    **params : kwargs
+        keyword arguments passed to the coordinate descent solver.
 
     Returns
     -------
@@ -347,7 +347,7 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
         Skip input validation checks, including the Gram matrix when provided
         assuming there are handled by the caller when check_input=False.
 
-    params : kwargs
+    **params : kwargs
         keyword arguments passed to the coordinate descent solver.
 
     Returns
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index 252b1d07bb8d2..ad71aa678a8cf 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -811,7 +811,6 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
         generator; If None, the random number generator is the RandomState
         instance used by `np.random`.
 
-
     multi_class : str, {'ovr', 'crammer_singer'}
         `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`
         optimizes a joint objective over all classes.

From c093014c838f69e97b1395d523c510e862f3e568 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 23 Jun 2017 15:34:04 +0200
Subject: [PATCH 27/66] Remove 'optional' to keep it under 80chars

---
 sklearn/tree/tree.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 807b1f2a44e28..8f5defe8eb8fa 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -35,7 +35,6 @@
 from ..utils import compute_sample_weight
 from ..utils.multiclass import check_classification_targets
 from ..utils.validation import check_is_fitted
-from ..exceptions import NotFittedError
 
 from ._criterion import Criterion
 from ._splitter import Splitter
@@ -614,7 +613,7 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
            ``min_impurity_decrease`` in 0.19 and will be removed in 0.21.
            Use ``min_impurity_decrease`` instead.
 
-    class_weight : dict, list of dicts, "balanced" or None, optional (default=None)
+    class_weight : dict, list of dicts, "balanced" or None, default=None
         Weights associated with classes in the form ``{class_label: weight}``.
         If not given, all classes are supposed to have weight one. For
         multi-output problems, a list of dicts can be provided in the same

From 575b24302371a3d0eef16863366c185b7ee0582a Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 23 Jun 2017 15:34:46 +0200
Subject: [PATCH 28/66] Address flake8 in least_angle.py

---
 sklearn/linear_model/least_angle.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index b8275421ee141..e4874524d7637 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -101,10 +101,10 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
         When using this option together with method 'lasso' the model
         coefficients will not converge to the ordinary-least-squares solution
         for small values of alpha (neither will they when using method 'lar'
-        ..). Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
-        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
-        algorithm are typically in congruence with the solution of the
-        coordinate descent lasso_path function.
+        ..). Only coefficients up to the smallest alpha value
+        (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by the
+        stepwise Lars-Lasso algorithm are typically in congruence with the
+        solution of the coordinate descent lasso_path function.
 
     Returns
     --------

From 3e2d83716f8dfb71a02b9499a54b4b89534ac526 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 23 Jun 2017 15:41:52 +0200
Subject: [PATCH 29/66] Fix more inconsistencies

---
 sklearn/covariance/graph_lasso_.py | 8 ++++++++
 sklearn/preprocessing/data.py      | 9 +++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
index 08fc2448def7c..523a5d36621da 100644
--- a/sklearn/covariance/graph_lasso_.py
+++ b/sklearn/covariance/graph_lasso_.py
@@ -335,7 +335,14 @@ def __init__(self, alpha=.01, mode='cd', tol=1e-4, enet_tol=1e-4,
         self.store_precision = True
 
     def fit(self, X, y=None):
+        """Fits the GraphLasso model to X.
 
+        Parameters
+        ----------
+        X : ndarray, shape (n_samples, n_features)
+            Data from which to compute the covariance estimate
+        y : (ignored)
+        """
         # Covariance does not make sense for a single feature
         X = check_array(X, ensure_min_features=2, ensure_min_samples=2,
                         estimator=self)
@@ -577,6 +584,7 @@ def fit(self, X, y=None):
         ----------
         X : ndarray, shape (n_samples, n_features)
             Data from which to compute the covariance estimate
+        y : (ignored)
         """
         # Covariance does not make sense for a single feature
         X = check_array(X, ensure_min_features=2, estimator=self)
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index edd072a4c9b43..f589a6a824702 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -630,6 +630,8 @@ def transform(self, X, y='deprecated', copy=None):
         y : (ignored)
             .. deprecated:: 0.19
                This parameter will be removed in 0.21.
+        copy : bool, optional (default: None)
+            Copy the input X or not.
         """
         if not isinstance(y, string_types) or y != 'deprecated':
             warnings.warn("The parameter y on transform() is "
@@ -663,8 +665,7 @@ def inverse_transform(self, X, copy=None):
         ----------
         X : array-like, shape [n_samples, n_features]
             The data used to scale along the features axis.
-
-        copy : bool
+        copy : bool, optional (default: None)
             Copy the input X or not.
 
         Returns
@@ -1478,6 +1479,8 @@ def transform(self, X, y='deprecated', copy=None):
         y : (ignored)
             .. deprecated:: 0.19
                This parameter will be removed in 0.21.
+        copy : bool, optional (default: None)
+            Copy the input X or not.
         """
         if not isinstance(y, string_types) or y != 'deprecated':
             warnings.warn("The parameter y on transform() is "
@@ -1602,6 +1605,8 @@ def transform(self, X, y='deprecated', copy=None):
         y : (ignored)
             .. deprecated:: 0.19
                This parameter will be removed in 0.21.
+        copy : bool
+            Copy the input X or not.
         """
         if not isinstance(y, string_types) or y != 'deprecated':
             warnings.warn("The parameter y on transform() is "

From 8b8ce127fe1ff7019d39bba96647e4574bb39f5d Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 23 Jun 2017 15:44:04 +0200
Subject: [PATCH 30/66] Fix rest of the inconsistencies

---
 sklearn/decomposition/fastica_.py | 4 ++--
 sklearn/preprocessing/data.py     | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index af9cd582b42f9..fcc11ff643a5e 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -539,11 +539,11 @@ def transform(self, X, y='deprecated', copy=True):
         X : array-like, shape (n_samples, n_features)
             Data to transform, where n_samples is the number of samples
             and n_features is the number of features.
-        copy : bool (optional)
-            If False, data passed to fit are overwritten. Defaults to True.
         y : (ignored)
             .. deprecated:: 0.19
                This parameter will be removed in 0.21.
+        copy : bool (optional)
+            If False, data passed to fit are overwritten. Defaults to True.
 
         Returns
         -------
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index f589a6a824702..eb19494c83b75 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -2389,6 +2389,8 @@ def transform(self, X):
     def inverse_transform(self, X):
         """Back-projection to the original space.
 
+        Parameters
+        ----------
         X : ndarray or sparse matrix, shape (n_samples, n_features)
             The data used to scale along the features axis. If a sparse
             matrix is provided, it will be converted into a sparse

From bb554c77c7907d374b796b67ed9358f66eaf32ef Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Sat, 24 Jun 2017 11:04:00 +0200
Subject: [PATCH 31/66] Fix rest of the docstrings; Add debug statements

---
 sklearn/covariance/outlier_detection.py    |  8 ++++
 sklearn/tests/test_docstring_parameters.py | 45 ++++++++++++++--------
 sklearn/utils/testing.py                   | 10 +++--
 3 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/sklearn/covariance/outlier_detection.py b/sklearn/covariance/outlier_detection.py
index 826314d2bfcb7..8cc81cca07b5b 100644
--- a/sklearn/covariance/outlier_detection.py
+++ b/sklearn/covariance/outlier_detection.py
@@ -97,6 +97,14 @@ def __init__(self, store_precision=True, assume_centered=False,
         self.contamination = contamination
 
     def fit(self, X, y=None):
+        """Fit the EllipticEnvelope model with X.
+
+        Parameters
+        ----------
+        X : numpy array or sparse matrix of shape [n_samples, n_features]
+            Training data
+        y : (ignored)
+        """
         super(EllipticEnvelope, self).fit(X)
         self.threshold_ = sp.stats.scoreatpercentile(
             self.dist_, 100. * (1. - self.contamination))
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index bd37f76219dcc..629d2519ccd24 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -58,22 +58,29 @@
     'sklearn.pipeline.make_pipeline',
     'sklearn.pipeline.make_union',
     'sklearn.utils.extmath.safe_sparse_dot',
-    'RandomizedPCA',  # deprecated
-    'BaseForest',
-    'BaseDecisionTree',
-    'ExtraTreeClassifier',
-    'ExtraTreeRegressor',
-    'GaussianProcess',  # deprecated
-    'VBGMM',  # deprecated
-    'DPGMM',  # deprecated
-    'GMM',  # deprecated
-    'log_multivariate_normal_density',  # deprecated
-    'sample_gaussian',  # deprecated
+    # Deprecated classes and functions
+    'RandomizedPCA',
+    'GaussianProcess',
+    'VBGMM',
+    'DPGMM',
+    'GMM',
+    'log_multivariate_normal_density',
+    'sample_gaussian',
 ]
 
 _TAB_IGNORES = [
 ]
 
+# Methods to test for, in any class
+_METHODS_IGNORE_NONE_Y = [
+        'fit',
+        'score',
+        'fit_predict',
+        'fit_transform',
+        'partial_fit',
+        'predict'
+]
+
 
 def test_docstring_parameters():
     """Test module docstring formatting."""
@@ -112,16 +119,22 @@ def test_docstring_parameters():
                 # Now skip docstring test for y when y is None
                 # by default for API reason
                 # XXX remove transform
-                if method_name in ['fit', 'score', 'fit_predict',
-                                   'fit_transform', 'partial_fit',
-                                   'transform', 'inverse_transform',
-                                   'predict']:
+                if method_name in _METHODS_IGNORE_NONE_Y:
                     sig = signature(method)
                     if ('y' in sig.parameters and
                             sig.parameters['y'].default is None):
                         param_ignore = ['y']  # ignore y for fit and score
                 this_incorrect += check_parameters_match(method,
                                                          ignore=param_ignore)
+                if cname == "Pipeline":
+                    print(method)
+                    print(method_name)
+                    print(param_ignore)
+                    print('result: ',
+                          check_parameters_match(method,
+                                                 ignore=param_ignore,
+                                                 debug=True))
+
             if hasattr(cls, '__call__'):
                 this_incorrect += check_parameters_match(cls.__call__)
 
@@ -141,6 +154,8 @@ def test_docstring_parameters():
     if len(incorrect) > 0:
         raise AssertionError(msg)
 
+    assert False
+
 
 def test_tabs():
     """Test that there are no tabs in our source files"""
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index d5d2bd0420c39..060f79955eb40 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -822,17 +822,19 @@ def get_func_name(func):
     return '.'.join(parts)
 
 
-def check_parameters_match(func, doc=None, ignore=None):
+def check_parameters_match(func, doc=None, ignore=None, debug=False):
     """Helper to check docstring
 
     Parameters
     ----------
     func : callable
         The function object to test.
-    doc : str
-        Pass evenually manually the docstring to test.
+    doc : str, optional (default: None)
+        Docstring if it is passed manually to the test.
     ignore : None | list
         Parameters to ignore.
+    debug : bool, optional (default: False)
+        Whether to print debug messages
 
     Returns
     -------
@@ -882,6 +884,8 @@ def check_parameters_match(func, doc=None, ignore=None):
                          list(set(args) - set(param_names))))
         incorrect += [name_ + ' arg mismatch: ' + bad]
     else:
+        if debug:
+            print(param_names, args)
         for n1, n2 in zip(param_names, args):
             if n1 != n2:
                 incorrect += [name_ + ' ' + n1 + ' != ' + n2]

From 9ba168269b2e5889f8dcb378cb1959623d1e25e7 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Sun, 25 Jun 2017 16:09:00 +0200
Subject: [PATCH 32/66] Collapse the two Notes section into one

---
 sklearn/datasets/species_distributions.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/sklearn/datasets/species_distributions.py b/sklearn/datasets/species_distributions.py
index 556ad9ea45e05..8e50ba547e8a3 100644
--- a/sklearn/datasets/species_distributions.py
+++ b/sklearn/datasets/species_distributions.py
@@ -176,8 +176,16 @@ def fetch_species_distributions(data_home=None,
     grid_size : float
         The spacing between points of the grid, in degrees
 
+    References
+    ----------
+
+    * `"Maximum entropy modeling of species geographic distributions"
+      <http://rob.schapire.net/papers/ecolmod.pdf>`_
+      S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,
+      190:231-259, 2006.
+
     Notes
-    ------
+    -----
 
     This dataset represents the geographic distribution of species.
     The dataset is provided by Phillips et. al. (2006).
@@ -193,16 +201,6 @@ def fetch_species_distributions(data_home=None,
       also known as the Forest Small Rice Rat, a rodent that lives in Peru,
       Colombia, Ecuador, Peru, and Venezuela.
 
-    References
-    ----------
-
-    * `"Maximum entropy modeling of species geographic distributions"
-      <http://rob.schapire.net/papers/ecolmod.pdf>`_
-      S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,
-      190:231-259, 2006.
-
-    Notes
-    -----
 
     * For an example of using this dataset with scikit-learn, see
       :ref:`examples/applications/plot_species_distribution_modeling.py

From 8ab87a98d6cd4e38b804cbff2070bbfb62adbecb Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 26 Jun 2017 15:26:32 +0200
Subject: [PATCH 33/66] Print class names properly even for decorated class
 methods

---
 sklearn/tests/test_docstring_parameters.py | 19 +++++--------
 sklearn/utils/testing.py                   | 33 +++++++++++++---------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 629d2519ccd24..521cf8d101f3a 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -111,7 +111,8 @@ def test_docstring_parameters():
                 raise RuntimeError('Error for __init__ of %s in %s:\n%s'
                                    % (cls, name, w[0]))
             if hasattr(cls, '__init__'):
-                this_incorrect += check_parameters_match(cls.__init__, cdoc)
+                this_incorrect += check_parameters_match(cls.__init__, cdoc,
+                                                         class_name=cname)
 
             for method_name in cdoc.methods:
                 method = getattr(cls, method_name)
@@ -124,19 +125,13 @@ def test_docstring_parameters():
                     if ('y' in sig.parameters and
                             sig.parameters['y'].default is None):
                         param_ignore = ['y']  # ignore y for fit and score
-                this_incorrect += check_parameters_match(method,
-                                                         ignore=param_ignore)
-                if cname == "Pipeline":
-                    print(method)
-                    print(method_name)
-                    print(param_ignore)
-                    print('result: ',
-                          check_parameters_match(method,
-                                                 ignore=param_ignore,
-                                                 debug=True))
+                result = check_parameters_match(method, ignore=param_ignore,
+                                                class_name=cname)
+                this_incorrect += result
 
             if hasattr(cls, '__call__'):
-                this_incorrect += check_parameters_match(cls.__call__)
+                this_incorrect += check_parameters_match(cls.__call__,
+                                                         class_name=cname)
 
             # Append class name
             incorrect += [c + ' (' + cname + ')' for c in this_incorrect]
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 060f79955eb40..294e985946f03 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -799,13 +799,16 @@ def _get_args(function, varargs=False):
             return out[0]
 
 
-def get_func_name(func):
+def get_func_name(func, class_name=None):
     """Get function full name
 
     Parameters
     ----------
     func : callable
         The function object.
+    class_name : string, optional (default: None)
+       If ``func`` is a class method and the class name is known specify
+       class_name for the error message.
 
     Returns
     -------
@@ -816,13 +819,16 @@ def get_func_name(func):
     module = inspect.getmodule(func)
     if module:
         parts.append(module.__name__)
-    if hasattr(func, 'im_class'):
+    if class_name is not None:
+        parts.append(class_name)
+    elif hasattr(func, 'im_class'):
         parts.append(func.im_class.__name__)
+
     parts.append(func.__name__)
     return '.'.join(parts)
 
 
-def check_parameters_match(func, doc=None, ignore=None, debug=False):
+def check_parameters_match(func, doc=None, ignore=None, class_name=None):
     """Helper to check docstring
 
     Parameters
@@ -833,8 +839,9 @@ def check_parameters_match(func, doc=None, ignore=None, debug=False):
         Docstring if it is passed manually to the test.
     ignore : None | list
         Parameters to ignore.
-    debug : bool, optional (default: False)
-        Whether to print debug messages
+    class_name : string, optional (default: None)
+       If ``func`` is a class method and the class name is known specify
+       class_name for the error message.
 
     Returns
     -------
@@ -843,9 +850,9 @@ def check_parameters_match(func, doc=None, ignore=None, debug=False):
     """
     from numpydoc import docscrape
     incorrect = []
-    name_ = get_func_name(func)
-    if (not name_.startswith('sklearn.') or
-            name_.startswith('sklearn.externals')):
+    func_name = get_func_name(func, class_name=class_name)
+    if (not func_name.startswith('sklearn.') or
+            func_name.startswith('sklearn.externals')):
         return incorrect
     if inspect.isdatadescriptor(func):
         return incorrect
@@ -863,10 +870,10 @@ def check_parameters_match(func, doc=None, ignore=None, debug=False):
             try:
                 doc = docscrape.FunctionDoc(func)
             except Exception as exp:
-                incorrect += [name_ + ' parsing error: ' + str(exp)]
+                incorrect += [func_name + ' parsing error: ' + str(exp)]
                 return incorrect
         if len(w):
-            raise RuntimeError('Error for %s:\n%s' % (name_, w[0]))
+            raise RuntimeError('Error for %s:\n%s' % (func_name, w[0]))
     # check set
     param_names = [name for name, _, _ in doc['Parameters']]
 
@@ -882,11 +889,9 @@ def check_parameters_match(func, doc=None, ignore=None, debug=False):
     if len(param_names) != len(args):
         bad = str(sorted(list(set(param_names) - set(args)) +
                          list(set(args) - set(param_names))))
-        incorrect += [name_ + ' arg mismatch: ' + bad]
+        incorrect += [func_name + ' arg mismatch: ' + bad]
     else:
-        if debug:
-            print(param_names, args)
         for n1, n2 in zip(param_names, args):
             if n1 != n2:
-                incorrect += [name_ + ' ' + n1 + ' != ' + n2]
+                incorrect += [func_name + ' ' + n1 + ' != ' + n2]
     return incorrect

From 6bb55fb79dd2d5da18767c168c278655df87b411 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 26 Jun 2017 17:02:40 +0200
Subject: [PATCH 34/66] Fix doc inconsistencies in ExtraTree*

---
 sklearn/tree/tree.py | 221 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)

diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 8f5defe8eb8fa..93db4eb98f34e 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -1135,6 +1135,124 @@ class ExtraTreeClassifier(DecisionTreeClassifier):
 
     Read more in the :ref:`User Guide <tree>`.
 
+    Parameters
+    ----------
+    criterion : string, optional (default="gini")
+        The function to measure the quality of a split. Supported criteria are
+        "gini" for the Gini impurity and "entropy" for the information gain.
+
+    splitter : string, optional (default="best")
+        The strategy used to choose the split at each node. Supported
+        strategies are "best" to choose the best split and "random" to choose
+        the best random split.
+
+    max_depth : int or None, optional (default=None)
+        The maximum depth of the tree. If None, then nodes are expanded until
+        all leaves are pure or until all leaves contain less than
+        min_samples_split samples.
+
+    min_samples_split : int, float, optional (default=2)
+        The minimum number of samples required to split an internal node:
+
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a percentage and
+          `ceil(min_samples_split * n_samples)` are the minimum
+          number of samples for each split.
+
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
+    min_samples_leaf : int, float, optional (default=1)
+        The minimum number of samples required to be at a leaf node:
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a percentage and
+          `ceil(min_samples_leaf * n_samples)` are the minimum
+          number of samples for each node.
+
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
+    min_weight_fraction_leaf : float, optional (default=0.)
+        The minimum weighted fraction of the sum total of weights (of all
+        the input samples) required to be at a leaf node. Samples have
+        equal weight when sample_weight is not provided.
+
+    max_features : int, float, string or None, optional (default=None)
+        The number of features to consider when looking for the best split:
+
+            - If int, then consider `max_features` features at each split.
+            - If float, then `max_features` is a percentage and
+              `int(max_features * n_features)` features are considered at each
+              split.
+            - If "auto", then `max_features=sqrt(n_features)`.
+            - If "sqrt", then `max_features=sqrt(n_features)`.
+            - If "log2", then `max_features=log2(n_features)`.
+            - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    max_leaf_nodes : int or None, optional (default=None)
+        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
+
+    min_impurity_decrease : float, optional (default=0.)
+        A node will be split if this split induces a decrease of the impurity
+        greater than or equal to this value.
+
+        The weighted impurity decrease equation is the following::
+
+            N_t / N * (impurity - N_t_R / N_t * right_impurity
+                                - N_t_L / N_t * left_impurity)
+
+        where ``N`` is the total number of samples, ``N_t`` is the number of
+        samples at the current node, ``N_t_L`` is the number of samples in the
+        left child, and ``N_t_R`` is the number of samples in the right child.
+
+        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
+        if ``sample_weight`` is passed.
+
+        .. versionadded:: 0.19
+
+    min_impurity_split : float,
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
+
+        .. deprecated:: 0.19
+           ``min_impurity_split`` has been deprecated in favor of
+           ``min_impurity_decrease`` in 0.19 and will be removed in 0.21.
+           Use ``min_impurity_decrease`` instead.
+
+    class_weight : dict, list of dicts, "balanced" or None, default=None
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        Note that for multioutput (including multilabel) weights should be
+        defined for each class of every column in its own dict. For example,
+        for four-class multilabel classification weights should be
+        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
+        [{1:1}, {2:5}, {3:1}, {4:1}].
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+
     See also
     --------
     ExtraTreeRegressor, ExtraTreesClassifier, ExtraTreesRegressor
@@ -1195,6 +1313,109 @@ class ExtraTreeRegressor(DecisionTreeRegressor):
 
     Read more in the :ref:`User Guide <tree>`.
 
+    Parameters
+    ----------
+    criterion : string, optional (default="mse")
+        The function to measure the quality of a split. Supported criteria
+        are "mse" for the mean squared error, which is equal to variance
+        reduction as feature selection criterion, and "mae" for the mean
+        absolute error.
+
+        .. versionadded:: 0.18
+           Mean Absolute Error (MAE) criterion.
+
+    splitter : string, optional (default="best")
+        The strategy used to choose the split at each node. Supported
+        strategies are "best" to choose the best split and "random" to choose
+        the best random split.
+
+    max_depth : int or None, optional (default=None)
+        The maximum depth of the tree. If None, then nodes are expanded until
+        all leaves are pure or until all leaves contain less than
+        min_samples_split samples.
+
+    min_samples_split : int, float, optional (default=2)
+        The minimum number of samples required to split an internal node:
+
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a percentage and
+          `ceil(min_samples_split * n_samples)` are the minimum
+          number of samples for each split.
+
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
+    min_samples_leaf : int, float, optional (default=1)
+        The minimum number of samples required to be at a leaf node:
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a percentage and
+          `ceil(min_samples_leaf * n_samples)` are the minimum
+          number of samples for each node.
+
+        .. versionchanged:: 0.18
+           Added float values for percentages.
+
+    min_weight_fraction_leaf : float, optional (default=0.)
+        The minimum weighted fraction of the sum total of weights (of all
+        the input samples) required to be at a leaf node. Samples have
+        equal weight when sample_weight is not provided.
+
+    max_features : int, float, string or None, optional (default=None)
+        The number of features to consider when looking for the best split:
+
+        - If int, then consider `max_features` features at each split.
+        - If float, then `max_features` is a percentage and
+          `int(max_features * n_features)` features are considered at each
+          split.
+        - If "auto", then `max_features=n_features`.
+        - If "sqrt", then `max_features=sqrt(n_features)`.
+        - If "log2", then `max_features=log2(n_features)`.
+        - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    min_impurity_decrease : float, optional (default=0.)
+        A node will be split if this split induces a decrease of the impurity
+        greater than or equal to this value.
+
+        The weighted impurity decrease equation is the following::
+
+            N_t / N * (impurity - N_t_R / N_t * right_impurity
+                                - N_t_L / N_t * left_impurity)
+
+        where ``N`` is the total number of samples, ``N_t`` is the number of
+        samples at the current node, ``N_t_L`` is the number of samples in the
+        left child, and ``N_t_R`` is the number of samples in the right child.
+
+        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
+        if ``sample_weight`` is passed.
+
+        .. versionadded:: 0.19
+
+    min_impurity_split : float,
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
+
+        .. deprecated:: 0.19
+           ``min_impurity_split`` has been deprecated in favor of
+           ``min_impurity_decrease`` in 0.19 and will be removed in 0.21.
+           Use ``min_impurity_decrease`` instead.
+
+    max_leaf_nodes : int or None, optional (default=None)
+        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
+
+
     See also
     --------
     ExtraTreeClassifier, ExtraTreesClassifier, ExtraTreesRegressor

From e92b1b8c26c43c12a990b900a1c1c8b5b35aa90c Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 26 Jun 2017 17:02:56 +0200
Subject: [PATCH 35/66] Add some debug statements

---
 sklearn/tests/test_docstring_parameters.py | 10 ++++++----
 sklearn/utils/testing.py                   |  9 ++++++++-
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 521cf8d101f3a..91bb86b3bb103 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -116,10 +116,13 @@ def test_docstring_parameters():
 
             for method_name in cdoc.methods:
                 method = getattr(cls, method_name)
+                if cname == 'Pipeline' and method_name == 'score':
+                    print(method.__doc__)
+                    print(method.__name__)
+                    print(method_name)
                 param_ignore = None
                 # Now skip docstring test for y when y is None
                 # by default for API reason
-                # XXX remove transform
                 if method_name in _METHODS_IGNORE_NONE_Y:
                     sig = signature(method)
                     if ('y' in sig.parameters and
@@ -133,13 +136,12 @@ def test_docstring_parameters():
                 this_incorrect += check_parameters_match(cls.__call__,
                                                          class_name=cname)
 
-            # Append class name
-            incorrect += [c + ' (' + cname + ')' for c in this_incorrect]
+            incorrect += this_incorrect
 
         functions = inspect.getmembers(module, inspect.isfunction)
         for fname, func in functions:
+            # Don't test private methods / functions
             if fname.startswith('_'):
-                # Don't test private methods / functions
                 continue
             name_ = get_func_name(func)
             if not any(d in name_ for d in _DOCSTRING_IGNORES) and \
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 294e985946f03..7c780f92e380e 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -851,6 +851,12 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
     from numpydoc import docscrape
     incorrect = []
     func_name = get_func_name(func, class_name=class_name)
+    # Unwrap the function if it is a decorator
+    try:
+        func = func.__wrapped__
+        print(func, 'was wrapped')
+    except AttributeError:
+        pass
     if (not func_name.startswith('sklearn.') or
             func_name.startswith('sklearn.externals')):
         return incorrect
@@ -859,7 +865,7 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
     args = _get_args(func)
     # drop self
     if len(args) > 0 and args[0] == 'self':
-        args = args[1:]
+        args.remove('self')
 
     if ignore is not None:
         for p in ignore:
@@ -887,6 +893,7 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
                 param_names.remove(p)
 
     if len(param_names) != len(args):
+        print(param_names, args)
         bad = str(sorted(list(set(param_names) - set(args)) +
                          list(set(args) - set(param_names))))
         incorrect += [func_name + ' arg mismatch: ' + bad]

From 7d456a76d9a0f08ec30be06845024efebaf38613 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 26 Jun 2017 17:23:23 +0200
Subject: [PATCH 36/66] Test only on python3.5

---
 .travis.yml              |  2 +-
 sklearn/utils/testing.py | 38 +++++++++++++++++---------------------
 2 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d4a3a5955c97b..a1f58514b0d89 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,7 +34,7 @@ matrix:
     # This environment tests the oldest supported anaconda env
     - env: DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
            NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.5"
-           COVERAGE=true TEST_DOCSTRINGS="true"
+           COVERAGE=true
     # This environment tests the newest supported Anaconda release (4.4.0)
     # It also runs tests requiring Pandas.
     - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true"
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 7c780f92e380e..62b5ecfd4e898 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -776,27 +776,23 @@ def __call__(self, *args, **kwargs):
 
 # Utils to test docstrings
 
-# helpers to get function arguments
-if hasattr(inspect, 'signature'):  # py35
-    def _get_args(function, varargs=False):
-        params = inspect.signature(function).parameters
-        args = [key for key, param in params.items()
-                if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)]
-        if varargs:
-            varargs = [param.name for param in params.values()
-                       if param.kind == param.VAR_POSITIONAL]
-            if len(varargs) == 0:
-                varargs = None
-            return args, varargs
-        else:
-            return args
-else:
-    def _get_args(function, varargs=False):
-        out = inspect.getargspec(function)  # args, varargs, keywords, defaults
-        if varargs:
-            return out[:2]
-        else:
-            return out[0]
+def _get_args(function, varargs=False):
+    """Helper to get function arguments"""
+    # NOTE this works only in python3.5
+    if sys.version_info < (3, 5):
+        NotImplementedError("_get_args is not available for python < 3.5")
+
+    params = inspect.signature(function).parameters
+    args = [key for key, param in params.items()
+            if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)]
+    if varargs:
+        varargs = [param.name for param in params.values()
+                   if param.kind == param.VAR_POSITIONAL]
+        if len(varargs) == 0:
+            varargs = None
+        return args, varargs
+    else:
+        return args
 
 
 def get_func_name(func, class_name=None):

From 140bbb7561d1b20fdbba708f87658794efee6cab Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 26 Jun 2017 17:26:42 +0200
Subject: [PATCH 37/66] Remove debug statements

---
 sklearn/tests/test_docstring_parameters.py | 6 ------
 sklearn/utils/testing.py                   | 7 -------
 2 files changed, 13 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 91bb86b3bb103..33f736fe293cb 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -116,10 +116,6 @@ def test_docstring_parameters():
 
             for method_name in cdoc.methods:
                 method = getattr(cls, method_name)
-                if cname == 'Pipeline' and method_name == 'score':
-                    print(method.__doc__)
-                    print(method.__name__)
-                    print(method_name)
                 param_ignore = None
                 # Now skip docstring test for y when y is None
                 # by default for API reason
@@ -151,8 +147,6 @@ def test_docstring_parameters():
     if len(incorrect) > 0:
         raise AssertionError(msg)
 
-    assert False
-
 
 def test_tabs():
     """Test that there are no tabs in our source files"""
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 62b5ecfd4e898..e5502e5f28d05 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -847,12 +847,6 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
     from numpydoc import docscrape
     incorrect = []
     func_name = get_func_name(func, class_name=class_name)
-    # Unwrap the function if it is a decorator
-    try:
-        func = func.__wrapped__
-        print(func, 'was wrapped')
-    except AttributeError:
-        pass
     if (not func_name.startswith('sklearn.') or
             func_name.startswith('sklearn.externals')):
         return incorrect
@@ -889,7 +883,6 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
                 param_names.remove(p)
 
     if len(param_names) != len(args):
-        print(param_names, args)
         bad = str(sorted(list(set(param_names) - set(args)) +
                          list(set(args) - set(param_names))))
         incorrect += [func_name + ' arg mismatch: ' + bad]

From b71fe7e939d2944dc72aeed77db23bc3895377ee Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 26 Jun 2017 20:05:35 +0200
Subject: [PATCH 38/66] Fix flake8

---
 sklearn/utils/testing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index e5502e5f28d05..0e4c0c6376d21 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -776,6 +776,7 @@ def __call__(self, *args, **kwargs):
 
 # Utils to test docstrings
 
+
 def _get_args(function, varargs=False):
     """Helper to get function arguments"""
     # NOTE this works only in python3.5

From 657dca91a3be3f0c2a46ec6c37b5d2039b0be52c Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Wed, 28 Jun 2017 14:06:14 +0200
Subject: [PATCH 39/66] Fix up Olivier's comments; Add TODO comment

---
 sklearn/linear_model/logistic.py           | 5 +++--
 sklearn/linear_model/ridge.py              | 5 +++--
 sklearn/tests/test_docstring_parameters.py | 2 ++
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 643f9d8b9a372..4a60b588b5748 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -426,7 +426,8 @@ def hessp(v):
 def _check_solver_option(solver, multi_class, penalty, dual):
     if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']:
         raise ValueError("Logistic Regression supports only liblinear,"
-                         " newton-cg, lbfgs and sag solvers, got %s" % solver)
+                         " newton-cg, lbfgs, sag and saga solvers, got %s"
+                         % solver)
 
     if multi_class not in ['multinomial', 'ovr']:
         raise ValueError("multi_class should be either multinomial or "
@@ -1489,7 +1490,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
         chosen is 'ovr', then a binary problem is fit for each label. Else
         the loss minimised is the multinomial loss fit across
         the entire probability distribution. Works only for the 'newton-cg',
-        'sag' and 'lbfgs' solver.
+        'sag', 'saga' and 'lbfgs' solver.
 
         .. versionadded:: 0.18
            Stochastic Average Gradient descent solver for 'multinomial' case.
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index ca2399fa0f04d..f5fbaf656edcc 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -265,8 +265,9 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
 
     max_iter : int, optional
         Maximum number of iterations for conjugate gradient solver.
-        For 'sparse_cg' and 'lsqr' solvers, the default value is determined
-        by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.
+        For the 'sparse_cg' and 'lsqr' solvers, the default value is determined
+        by scipy.sparse.linalg. For 'sag' and saga solver, the default value is
+        1000.
 
     tol : float
         Precision of the solution.
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 33f736fe293cb..c74a9f561aad7 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -21,6 +21,8 @@
                         '__call__', '__add__', '__sub__', '__mul__', '__div__',
                         '__neg__', '__hash__')
 
+
+# TODO Uncomment all modules and fix doc inconsistencies everywhere
 PUBLIC_MODULES = [
     # the list of modules users need to access for all functionality
     # 'sklearn',

From ecf22f3239a9fefb909fb010ad6ee4b3198fd9b7 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Wed, 28 Jun 2017 14:13:52 +0200
Subject: [PATCH 40/66] Add ignore warnings

---
 sklearn/tests/test_docstring_parameters.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index c74a9f561aad7..d77b35c8cc721 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -15,7 +15,9 @@
 import sklearn
 from sklearn.base import signature
 from sklearn.utils.testing import SkipTest
-from sklearn.utils.testing import check_parameters_match, get_func_name
+from sklearn.utils.testing import check_parameters_match
+from sklearn.utils.testing import get_func_name
+from sklearn.utils.testing import ignore_warnings
 
 _DOC_SPECIAL_MEMBERS = ('__contains__', '__getitem__', '__iter__', '__len__',
                         '__call__', '__add__', '__sub__', '__mul__', '__div__',
@@ -150,13 +152,14 @@ def test_docstring_parameters():
         raise AssertionError(msg)
 
 
+@ignore_warnings(category=DeprecationWarning)
 def test_tabs():
     """Test that there are no tabs in our source files"""
     ignore = _TAB_IGNORES[:]
 
     for importer, modname, ispkg in walk_packages(sklearn.__path__,
                                                   prefix='sklearn.'):
-        # because we don't import e.g. mne.tests w/mne
+        # because we don't import
         if not ispkg and modname not in ignore:
             mod = importlib.import_module(modname)
             try:

From d3082574ffeecec47199969601c003d73fc99d56 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Wed, 28 Jun 2017 18:10:04 +0200
Subject: [PATCH 41/66] TST also check for incorrect parameter type definitions

---
 sklearn/utils/testing.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 0e4c0c6376d21..500d0e425c903 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -871,12 +871,18 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
                 return incorrect
         if len(w):
             raise RuntimeError('Error for %s:\n%s' % (func_name, w[0]))
-    # check set
-    param_names = [name for name, _, _ in doc['Parameters']]
 
-    # clean up some docscrape output:
-    param_names = [name.split(':')[0].strip('` ') for name in param_names]
-    param_names = [name for name in param_names if '*' not in name]
+    param_names = []
+    for name, type_definition, param_doc in doc['Parameters']:
+        if ':' in name or '*' not in name:
+            param_names.append(name.strip('` '))
+
+        type_definition = type_definition.strip()
+
+        if type_definition == "" or type_definition.startswith(':'):
+            incorrect += [func_name + ' incorrect type definition for param: '
+                          '%s (type definition was "%s")'
+                          % (name, type_definition)]
 
     if ignore is not None:
         for p in ignore:

From f65861a47e4e630653905da83f292306642b5bc4 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Wed, 28 Jun 2017 18:32:51 +0200
Subject: [PATCH 42/66] ENH Distinguish between no space before colon and empty
 type definition

---
 sklearn/utils/testing.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 500d0e425c903..7712d53a5f68f 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -874,15 +874,24 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
 
     param_names = []
     for name, type_definition, param_doc in doc['Parameters']:
-        if ':' in name or '*' not in name:
-            param_names.append(name.strip('` '))
-
-        type_definition = type_definition.strip()
-
-        if type_definition == "" or type_definition.startswith(':'):
-            incorrect += [func_name + ' incorrect type definition for param: '
-                          '%s (type definition was "%s")'
-                          % (name, type_definition)]
+        if (type_definition.strip() == "" or
+                type_definition.strip().startswith(':')):
+
+            # If there was no space between name and the colon
+            # "verbose:" -> len(["verbose", ""][0]) -> 7
+            # If "verbose:"[7] == ":", then there was no space
+            param_name = name.lstrip()
+
+            if param_name[len(param_name.split(':')[0].strip())] == ':':
+                incorrect += [func_name +
+                              'There was no space between the param name and '
+                              'colon ("%s")' % name]
+            else:
+                incorrect += [func_name + ' incorrect type definition for '
+                              'param: "%s" (type definition was "%s")'
+                              % (name.split(':')[0], type_definition)]
+        if '*' not in name:
+            param_names.append(name.split(':')[0].strip('` '))
 
     if ignore is not None:
         for p in ignore:

From 0cf439daff744d56878eb1de736816d995e003f7 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Wed, 28 Jun 2017 19:20:04 +0200
Subject: [PATCH 43/66] TST the test functions ;)

---
 sklearn/utils/testing.py            |  4 ++--
 sklearn/utils/tests/test_testing.py | 29 +++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 7712d53a5f68f..d4f85d9ca139c 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -884,10 +884,10 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
 
             if param_name[len(param_name.split(':')[0].strip())] == ':':
                 incorrect += [func_name +
-                              'There was no space between the param name and '
+                              ' There was no space between the param name and '
                               'colon ("%s")' % name]
             else:
-                incorrect += [func_name + ' incorrect type definition for '
+                incorrect += [func_name + ' Incorrect type definition for '
                               'param: "%s" (type definition was "%s")'
                               % (name.split(':')[0], type_definition)]
         if '*' not in name:
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 167bbd128737e..38d6b2c3d39c5 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -316,6 +316,23 @@ def f_missing(a, b):
     return c
 
 
+def f_check_param_definition(a, b, c, d):
+    """Function f
+
+    Parameters
+    ----------
+    a: int
+        Parameter a
+    b:
+        Parameter b
+    c :
+        Parameter c
+    d:int
+        Parameter d
+    """
+    return a + b + c + d
+
+
 class Klass(object):
     def f_missing(self, X, y):
         pass
@@ -364,3 +381,15 @@ def test_check_parameters_match():
         assert_true(len(incorrect) >= 1)
         assert_true(mess in incorrect[0],
                     '"%s" not in "%s"' % (mess, incorrect[0]))
+
+    incorrect = check_parameters_match(f_check_param_definition)
+    assert_equal(
+        incorrect,
+        ['sklearn.utils.tests.test_testing.f_check_param_definition There was '
+         'no space between the param name and colon ("a: int")',
+         'sklearn.utils.tests.test_testing.f_check_param_definition There was '
+         'no space between the param name and colon ("b:")',
+         'sklearn.utils.tests.test_testing.f_check_param_definition Incorrect '
+         'type definition for param: "c " (type definition was "")',
+         'sklearn.utils.tests.test_testing.f_check_param_definition There was '
+         'no space between the param name and colon ("d:int")'])

From 57e41099b17499030b1fba87fe3060d98bd85171 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Wed, 28 Jun 2017 19:38:40 +0200
Subject: [PATCH 44/66] Fix new doc inconsistencies after merging with master

---
 sklearn/linear_model/passive_aggressive.py  | 28 +++++-----
 sklearn/linear_model/perceptron.py          | 14 ++---
 sklearn/linear_model/stochastic_gradient.py | 57 +++++++++++----------
 3 files changed, 52 insertions(+), 47 deletions(-)

diff --git a/sklearn/linear_model/passive_aggressive.py b/sklearn/linear_model/passive_aggressive.py
index f71611d548198..183049e4fdb55 100644
--- a/sklearn/linear_model/passive_aggressive.py
+++ b/sklearn/linear_model/passive_aggressive.py
@@ -21,13 +21,6 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
         Whether the intercept should be estimated or not. If False, the
         data is assumed to be already centered.
 
-    n_iter : int, optional
-        The number of passes over the training data (aka epochs).
-        Defaults to None. Deprecated, will be removed in 0.21.
-
-        .. versionchanged:: 0.19
-            Deprecated
-
     max_iter : int, optional
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
@@ -92,6 +85,13 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
         .. versionadded:: 0.19
            parameter *average* to use weights averaging in SGD
 
+    n_iter : int, optional
+        The number of passes over the training data (aka epochs).
+        Defaults to None. Deprecated, will be removed in 0.21.
+
+        .. versionchanged:: 0.19
+            Deprecated
+
     Attributes
     ----------
     coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
@@ -221,13 +221,6 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
         Whether the intercept should be estimated or not. If False, the
         data is assumed to be already centered. Defaults to True.
 
-    n_iter : int, optional
-        The number of passes over the training data (aka epochs).
-        Defaults to None. Deprecated, will be removed in 0.21.
-
-        .. versionchanged:: 0.19
-            Deprecated
-
     max_iter : int, optional
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
@@ -279,6 +272,13 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
         .. versionadded:: 0.19
            parameter *average* to use weights averaging in SGD
 
+    n_iter : int, optional
+        The number of passes over the training data (aka epochs).
+        Defaults to None. Deprecated, will be removed in 0.21.
+
+        .. versionchanged:: 0.19
+            Deprecated
+
     Attributes
     ----------
     coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
diff --git a/sklearn/linear_model/perceptron.py b/sklearn/linear_model/perceptron.py
index 90ed6d7da6d85..28cb4561521f5 100644
--- a/sklearn/linear_model/perceptron.py
+++ b/sklearn/linear_model/perceptron.py
@@ -23,13 +23,6 @@ class Perceptron(BaseSGDClassifier):
         Whether the intercept should be estimated or not. If False, the
         data is assumed to be already centered. Defaults to True.
 
-    n_iter : int, optional
-        The number of passes over the training data (aka epochs).
-        Defaults to None. Deprecated, will be removed in 0.21.
-
-        .. versionchanged:: 0.19
-            Deprecated
-
     max_iter : int, optional
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
@@ -80,6 +73,13 @@ class Perceptron(BaseSGDClassifier):
         When set to True, reuse the solution of the previous call to fit as
         initialization, otherwise, just erase the previous solution.
 
+    n_iter : int, optional
+        The number of passes over the training data (aka epochs).
+        Defaults to None. Deprecated, will be removed in 0.21.
+
+        .. versionchanged:: 0.19
+            Deprecated
+
     Attributes
     ----------
     coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 82c9015d33415..390ba3953e289 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -605,11 +605,14 @@ class SGDClassifier(BaseSGDClassifier):
 
     Parameters
     ----------
-    loss : str, 'hinge', 'log', 'modified_huber', 'squared_hinge',\
-                'perceptron', or a regression loss: 'squared_loss', 'huber',\
-                'epsilon_insensitive', or 'squared_epsilon_insensitive'
+    loss : str, default: 'hinge'
         The loss function to be used. Defaults to 'hinge', which gives a
         linear SVM.
+
+        The possible options are 'hinge', 'log', 'modified_huber',
+        'squared_hinge', 'perceptron', or a regression loss: 'squared_loss',
+        'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.
+
         The 'log' loss gives logistic regression, a probabilistic classifier.
         'modified_huber' is another smooth loss that brings tolerance to
         outliers as well as probability estimates.
@@ -637,13 +640,6 @@ class SGDClassifier(BaseSGDClassifier):
         Whether the intercept should be estimated or not. If False, the
         data is assumed to be already centered. Defaults to True.
 
-    n_iter : int, optional
-        The number of passes over the training data (aka epochs).
-        Defaults to None. Deprecated, will be removed in 0.21.
-
-        .. versionchanged:: 0.19
-            Deprecated
-
     max_iter : int, optional
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
@@ -724,6 +720,13 @@ class SGDClassifier(BaseSGDClassifier):
         average. So ``average=10`` will begin averaging after seeing 10
         samples.
 
+    n_iter : int, optional
+        The number of passes over the training data (aka epochs).
+        Defaults to None. Deprecated, will be removed in 0.21.
+
+        .. versionchanged:: 0.19
+            Deprecated
+
     Attributes
     ----------
     coef_ : array, shape (1, n_features) if n_classes == 2 else (n_classes,\
@@ -1172,15 +1175,17 @@ class SGDRegressor(BaseSGDRegressor):
 
     Parameters
     ----------
-    loss : str, 'squared_loss', 'huber', 'epsilon_insensitive', \
-                or 'squared_epsilon_insensitive'
-        The loss function to be used. Defaults to 'squared_loss' which refers
-        to the ordinary least squares fit. 'huber' modifies 'squared_loss' to
-        focus less on getting outliers correct by switching from squared to
-        linear loss past a distance of epsilon. 'epsilon_insensitive' ignores
-        errors less than epsilon and is linear past that; this is the loss
-        function used in SVR. 'squared_epsilon_insensitive' is the same but
-        becomes squared loss past a tolerance of epsilon.
+    loss : str, default: 'squared_loss'
+        The loss function to be used. The possible values are 'squared_loss',
+        'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'
+
+        The 'squared_loss' refers to the ordinary least squares fit.
+        'huber' modifies 'squared_loss' to focus less on getting outliers
+        correct by switching from squared to linear loss past a distance of
+        epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is
+        linear past that; this is the loss function used in SVR.
+        'squared_epsilon_insensitive' is the same but becomes squared loss past
+        a tolerance of epsilon.
 
     penalty : str, 'none', 'l2', 'l1', or 'elasticnet'
         The penalty (aka regularization term) to be used. Defaults to 'l2'
@@ -1201,13 +1206,6 @@ class SGDRegressor(BaseSGDRegressor):
         Whether the intercept should be estimated or not. If False, the
         data is assumed to be already centered. Defaults to True.
 
-    n_iter : int, optional
-        The number of passes over the training data (aka epochs).
-        Defaults to None. Deprecated, will be removed in 0.21.
-
-        .. versionchanged:: 0.19
-            Deprecated
-
     max_iter : int, optional
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
@@ -1271,6 +1269,13 @@ class SGDRegressor(BaseSGDRegressor):
         average. So ``average=10`` will begin averaging after seeing 10
         samples.
 
+    n_iter : int, optional
+        The number of passes over the training data (aka epochs).
+        Defaults to None. Deprecated, will be removed in 0.21.
+
+        .. versionchanged:: 0.19
+            Deprecated
+
     Attributes
     ----------
     coef_ : array, shape (n_features,)

From ac8ecfd034190bb6b959268e9c26621d0627fd11 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Wed, 28 Jun 2017 19:46:04 +0200
Subject: [PATCH 45/66] FIX doc param definition inconsistencies

---
 sklearn/datasets/mldata.py             |  2 +-
 sklearn/decomposition/dict_learning.py | 24 ++++++++++++------------
 sklearn/decomposition/sparse_pca.py    | 12 ++++++------
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/sklearn/datasets/mldata.py b/sklearn/datasets/mldata.py
index 9b20431e49d1c..1416208584634 100644
--- a/sklearn/datasets/mldata.py
+++ b/sklearn/datasets/mldata.py
@@ -73,7 +73,7 @@ def fetch_mldata(dataname, target_name='label', data_name='data',
     Parameters
     ----------
 
-    dataname :
+    dataname : str
         Name of the data set on mldata.org,
         e.g.: "leukemia", "Whistler Daily Snowfall", etc.
         The raw name is automatically converted to a mldata.org URL .
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index a0eb49e4693d7..899a73d0068d4 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -434,11 +434,11 @@ def dict_learning(X, n_components, alpha, max_iter=100, tol=1e-8,
     code_init : array of shape (n_samples, n_components),
         Initial value for the sparse code for warm restart scenarios.
 
-    callback :
-        Callable that gets invoked every five iterations.
+    callback : callable or None, optional (default: None)
+        callable that gets invoked every five iterations
 
-    verbose :
-        Degree of output the procedure will print.
+    verbose : bool, optional (default: False)
+        To control the verbosity of the procedure.
 
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
@@ -599,14 +599,14 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
     dict_init : array of shape (n_components, n_features),
         Initial value for the dictionary for warm restart scenarios.
 
-    callback :
-        Callable that gets invoked every five iterations.
+    callback : callable or None, optional (default: None)
+        callable that gets invoked every five iterations
 
     batch_size : int,
         The number of samples to take in each batch.
 
-    verbose :
-        Degree of output the procedure will print.
+    verbose : bool, optional (default: False)
+        To control the verbosity of the procedure.
 
     shuffle : boolean,
         Whether to shuffle the data before splitting it in batches.
@@ -1015,8 +1015,8 @@ class DictionaryLearning(BaseEstimator, SparseCodingMixin):
     dict_init : array of shape (n_components, n_features),
         initial values for the dictionary, for warm restart
 
-    verbose :
-        degree of verbosity of the printed output
+    verbose : bool, optional (default: False)
+        To control the verbosity of the procedure.
 
     split_sign : bool, False by default
         Whether to split the sparse feature vector into the concatenation of
@@ -1178,8 +1178,8 @@ class MiniBatchDictionaryLearning(BaseEstimator, SparseCodingMixin):
         the reconstruction error targeted. In this case, it overrides
         `n_nonzero_coefs`.
 
-    verbose :
-        degree of verbosity of the printed output
+    verbose : bool, optional (default: False)
+        To control the verbosity of the procedure.
 
     split_sign : bool, False by default
         Whether to split the sparse feature vector into the concatenation of
diff --git a/sklearn/decomposition/sparse_pca.py b/sklearn/decomposition/sparse_pca.py
index 23d1163fdc881..fd8a86f15d0c0 100644
--- a/sklearn/decomposition/sparse_pca.py
+++ b/sklearn/decomposition/sparse_pca.py
@@ -57,8 +57,8 @@ class SparsePCA(BaseEstimator, TransformerMixin):
     V_init : array of shape (n_components, n_features),
         Initial values for the components for warm restart scenarios.
 
-    verbose :
-        Degree of verbosity of the printed output.
+    verbose : int
+        Controls the verbosity; the higher, the more messages. Defaults to 0.
 
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
@@ -151,7 +151,7 @@ def transform(self, X, ridge_alpha='deprecated'):
             Test data to be transformed, must have the same number of
             features as the data used to train the model.
 
-        ridge_alpha: float, default: 0.01
+        ridge_alpha : float, default: 0.01
             Amount of ridge shrinkage to apply in order to improve
             conditioning.
 
@@ -209,14 +209,14 @@ class MiniBatchSparsePCA(SparsePCA):
     n_iter : int,
         number of iterations to perform for each mini batch
 
-    callback : callable,
+    callback : callable or None, optional (default: None)
         callable that gets invoked every five iterations
 
     batch_size : int,
         the number of features to take in each mini batch
 
-    verbose :
-        degree of output the procedure will print
+    verbose : int
+        Controls the verbosity; the higher, the more messages. Defaults to 0.
 
     shuffle : boolean,
         whether to shuffle the data before splitting it in batches

From 70317cd173db576375ec241b784fac394c324945 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 29 Jun 2017 14:17:07 +0200
Subject: [PATCH 46/66] FIX error message to include sag

---
 sklearn/linear_model/logistic.py            | 5 +++++
 sklearn/linear_model/tests/test_logistic.py | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 4a60b588b5748..0eaabd3740899 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -425,8 +425,13 @@ def hessp(v):
 
 def _check_solver_option(solver, multi_class, penalty, dual):
     if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']:
+<<<<<<< Updated upstream
         raise ValueError("Logistic Regression supports only liblinear,"
                          " newton-cg, lbfgs, sag and saga solvers, got %s"
+=======
+        raise ValueError("Logistic Regression supports only liblinear, "
+                         "newton-cg, lbfgs, sag and saga solvers, got %s"
+>>>>>>> Stashed changes
                          % solver)
 
     if multi_class not in ['multinomial', 'ovr']:
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 89f09255cad39..6a7f717946481 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -145,8 +145,8 @@ def test_check_solver_option():
     X, y = iris.data, iris.target
     for LR in [LogisticRegression, LogisticRegressionCV]:
 
-        msg = ("Logistic Regression supports only liblinear, newton-cg, lbfgs"
-               " and sag solvers, got wrong_name")
+        msg = ('Logistic Regression supports only liblinear, newton-cg, '
+               'lbfgs, sag and saga solvers, got wrong_name')
         lr = LR(solver="wrong_name")
         assert_raise_message(ValueError, msg, lr.fit, X, y)
 

From 89a2b0e6bcd30a4b8c3f020bfabaf482bf398203 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 29 Jun 2017 14:42:45 +0200
Subject: [PATCH 47/66] Fix incorrect merge

---
 sklearn/linear_model/logistic.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 0eaabd3740899..8dbb1bec93d3d 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -425,13 +425,8 @@ def hessp(v):
 
 def _check_solver_option(solver, multi_class, penalty, dual):
     if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']:
-<<<<<<< Updated upstream
-        raise ValueError("Logistic Regression supports only liblinear,"
-                         " newton-cg, lbfgs, sag and saga solvers, got %s"
-=======
         raise ValueError("Logistic Regression supports only liblinear, "
                          "newton-cg, lbfgs, sag and saga solvers, got %s"
->>>>>>> Stashed changes
                          % solver)
 
     if multi_class not in ['multinomial', 'ovr']:

From efd4a8d0369ca3b4ba6ee1a1735a0e4f9f3df227 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 29 Jun 2017 21:28:22 +0200
Subject: [PATCH 48/66] FIX remove assert_true

---
 sklearn/tests/test_docstring_parameters.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index d77b35c8cc721..ca5ab850dfc0c 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -4,7 +4,6 @@
 
 from __future__ import print_function
 
-from nose.tools import assert_true
 import inspect
 import warnings
 import importlib
@@ -166,6 +165,6 @@ def test_tabs():
                 source = getsource(mod)
             except IOError:  # user probably should have run "make clean"
                 continue
-            assert_true('\t' not in source,
-                        '"%s" has tabs, please remove them or add it to the'
-                        'ignore list' % modname)
+            assert '\t' not in source, ('"%s" has tabs, please remove them ',
+                                        'or add it to theignore list'
+                                        % modname)

From a8f281c3a688b965df9f12f293870182f55bb8c1 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 30 Jun 2017 14:24:00 +0200
Subject: [PATCH 49/66] Use walk_packages to find the modules to check
 docstrings in

---
 sklearn/tests/test_docstring_parameters.py | 76 +++++++++-------------
 1 file changed, 32 insertions(+), 44 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index ca5ab850dfc0c..3df78465e3728 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -23,37 +23,31 @@
                         '__neg__', '__hash__')
 
 
+PUBLIC_MODULES = set(['sklearn.' + pckg[1]
+                      for pckg in walk_packages('sklearn.*')
+                      if not pckg[1].startswith('_')])
+
+
 # TODO Uncomment all modules and fix doc inconsistencies everywhere
-PUBLIC_MODULES = [
-    # the list of modules users need to access for all functionality
-    # 'sklearn',
-    'sklearn.base',
-    'sklearn.calibration',
-    'sklearn.cluster',
-    'sklearn.covariance',
-    # 'sklearn.cross_decomposition',
-    'sklearn.datasets',
-    'sklearn.decomposition',
-    # 'sklearn.ensemble',
-    'sklearn.feature_extraction',
-    # 'sklearn.feature_selection',
-    'sklearn.gaussian_process',
-    'sklearn.isotonic',
-    'sklearn.linear_model',
-    'sklearn.manifold',
-    'sklearn.multiclass',
-    'sklearn.metrics',
-    'sklearn.naive_bayes',
-    'sklearn.mixture',
-    # 'sklearn.model_selection',
-    'sklearn.neighbors',
-    'sklearn.neural_network',
-    'sklearn.preprocessing',
-    'sklearn.pipeline',
-    'sklearn.semi_supervised',
-    'sklearn.tree',
-    # 'sklearn.utils',
-]
+# The list of modules that are not tested for now
+PUBLIC_MODULES -= set([
+    'sklearn.cross_decomposition',
+    'sklearn.discriminant_analysis',
+    'sklearn.ensemble',
+    'sklearn.feature_selection',
+    'sklearn.kernel_approximation',
+    'sklearn.model_selection',
+    'sklearn.multioutput',
+    'sklearn.random_projection',
+    'sklearn.setup',
+    'sklearn.svm',
+    'sklearn.utils',
+    # Deprecated modules
+    'sklearn.cross_validation',
+    'sklearn.grid_search',
+    'sklearn.learning_curve',
+])
+
 
 # functions to ignore args / docstring of
 _DOCSTRING_IGNORES = [
@@ -71,9 +65,6 @@
     'sample_gaussian',
 ]
 
-_TAB_IGNORES = [
-]
-
 # Methods to test for, in any class
 _METHODS_IGNORE_NONE_Y = [
         'fit',
@@ -154,17 +145,14 @@ def test_docstring_parameters():
 @ignore_warnings(category=DeprecationWarning)
 def test_tabs():
     """Test that there are no tabs in our source files"""
-    ignore = _TAB_IGNORES[:]
-
     for importer, modname, ispkg in walk_packages(sklearn.__path__,
                                                   prefix='sklearn.'):
         # because we don't import
-        if not ispkg and modname not in ignore:
-            mod = importlib.import_module(modname)
-            try:
-                source = getsource(mod)
-            except IOError:  # user probably should have run "make clean"
-                continue
-            assert '\t' not in source, ('"%s" has tabs, please remove them ',
-                                        'or add it to theignore list'
-                                        % modname)
+        mod = importlib.import_module(modname)
+        try:
+            source = getsource(mod)
+        except IOError:  # user probably should have run "make clean"
+            continue
+        assert '\t' not in source, ('"%s" has tabs, please remove them ',
+                                    'or add it to theignore list'
+                                    % modname)

From e7f61c7a4fed472f8c3ba97b2d51927334da3ee8 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 30 Jun 2017 15:49:13 +0200
Subject: [PATCH 50/66] ENH Automatically identify deprecated
 classes/methods/functions

---
 sklearn/tests/test_docstring_parameters.py | 37 ++++++++++++----------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 3df78465e3728..0edbdb9a83b32 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -22,12 +22,10 @@
                         '__call__', '__add__', '__sub__', '__mul__', '__div__',
                         '__neg__', '__hash__')
 
-
 PUBLIC_MODULES = set(['sklearn.' + pckg[1]
                       for pckg in walk_packages('sklearn.*')
                       if not pckg[1].startswith('_')])
 
-
 # TODO Uncomment all modules and fix doc inconsistencies everywhere
 # The list of modules that are not tested for now
 PUBLIC_MODULES -= set([
@@ -48,21 +46,12 @@
     'sklearn.learning_curve',
 ])
 
-
 # functions to ignore args / docstring of
 _DOCSTRING_IGNORES = [
     'sklearn.utils.deprecation.load_mlcomp',
     'sklearn.pipeline.make_pipeline',
     'sklearn.pipeline.make_union',
     'sklearn.utils.extmath.safe_sparse_dot',
-    # Deprecated classes and functions
-    'RandomizedPCA',
-    'GaussianProcess',
-    'VBGMM',
-    'DPGMM',
-    'GMM',
-    'log_multivariate_normal_density',
-    'sample_gaussian',
 ]
 
 # Methods to test for, in any class
@@ -76,6 +65,15 @@
 ]
 
 
+def _is_deprecated(func):
+    closures = getattr(func, '__closure__', [])
+    if closures is None:
+        closures = []
+
+    return 'deprecated' in ''.join([c.cell_contents for c in closures
+                                    if isinstance(c.cell_contents, str)])
+
+
 def test_docstring_parameters():
     """Test module docstring formatting."""
     try:
@@ -104,12 +102,19 @@ def test_docstring_parameters():
             if len(w):
                 raise RuntimeError('Error for __init__ of %s in %s:\n%s'
                                    % (cls, name, w[0]))
-            if hasattr(cls, '__init__'):
+
+            cls_init = getattr(cls, '__init__', None)
+
+            if _is_deprecated(cls_init):
+                continue
+
+            elif cls_init is not None:
                 this_incorrect += check_parameters_match(cls.__init__, cdoc,
                                                          class_name=cname)
-
             for method_name in cdoc.methods:
                 method = getattr(cls, method_name)
+                if _is_deprecated(method):
+                    continue
                 param_ignore = None
                 # Now skip docstring test for y when y is None
                 # by default for API reason
@@ -122,7 +127,7 @@ def test_docstring_parameters():
                                                 class_name=cname)
                 this_incorrect += result
 
-            if hasattr(cls, '__call__'):
+            if hasattr(cls, '__call__') and not _is_deprecated(cls.__call__):
                 this_incorrect += check_parameters_match(cls.__call__,
                                                          class_name=cname)
 
@@ -134,8 +139,8 @@ def test_docstring_parameters():
             if fname.startswith('_'):
                 continue
             name_ = get_func_name(func)
-            if not any(d in name_ for d in _DOCSTRING_IGNORES) and \
-                    'deprecation_wrapped' not in func.__code__.co_name:
+            if (not any(d in name_ for d in _DOCSTRING_IGNORES) and
+                    not _is_deprecated(func)):
                 incorrect += check_parameters_match(func)
     msg = '\n' + '\n'.join(sorted(list(set(incorrect))))
     if len(incorrect) > 0:

From dffc5fcc6a3f9975bdbdee61d2e5aaee05a7ca28 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 30 Jun 2017 16:29:39 +0200
Subject: [PATCH 51/66] ENH add _is_deprecated and tests for the same in utils

---
 sklearn/tests/test_docstring_parameters.py | 16 ++------
 sklearn/utils/deprecation.py               | 10 +++++
 sklearn/utils/tests/test_deprecation.py    | 47 ++++++++++++++++++++++
 3 files changed, 61 insertions(+), 12 deletions(-)
 create mode 100644 sklearn/utils/tests/test_deprecation.py

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 0edbdb9a83b32..118694f05b1f6 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -17,6 +17,7 @@
 from sklearn.utils.testing import check_parameters_match
 from sklearn.utils.testing import get_func_name
 from sklearn.utils.testing import ignore_warnings
+from sklearn.utils.deprecation import _is_deprecated
 
 _DOC_SPECIAL_MEMBERS = ('__contains__', '__getitem__', '__iter__', '__len__',
                         '__call__', '__add__', '__sub__', '__mul__', '__div__',
@@ -54,7 +55,7 @@
     'sklearn.utils.extmath.safe_sparse_dot',
 ]
 
-# Methods to test for, in any class
+# Methods where y param should be ignored if y=None by default
 _METHODS_IGNORE_NONE_Y = [
         'fit',
         'score',
@@ -65,17 +66,8 @@
 ]
 
 
-def _is_deprecated(func):
-    closures = getattr(func, '__closure__', [])
-    if closures is None:
-        closures = []
-
-    return 'deprecated' in ''.join([c.cell_contents for c in closures
-                                    if isinstance(c.cell_contents, str)])
-
-
 def test_docstring_parameters():
-    """Test module docstring formatting."""
+    # Test module docstring formatting
     try:
         import numpydoc  # noqa
     except ImportError:
@@ -86,7 +78,7 @@ def test_docstring_parameters():
 
     incorrect = []
     for name in PUBLIC_MODULES:
-        with warnings.catch_warnings(record=True):  # traits warnings
+        with warnings.catch_warnings(record=True):
             module = __import__(name, globals())
         for submod in name.split('.')[1:]:
             module = getattr(module, submod)
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index 14ae308b29d0e..614f2ebd56fa5 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -87,3 +87,13 @@ def _update_doc(self, olddoc):
         if olddoc:
             newdoc = "%s\n\n%s" % (newdoc, olddoc)
         return newdoc
+
+
+def _is_deprecated(func):
+    """Helper to check if func is wraped by our deprecated decorator"""
+    closures = getattr(func, '__closure__', [])
+    if closures is None:
+        closures = []
+
+    return 'deprecated' in ''.join([c.cell_contents for c in closures
+                                    if isinstance(c.cell_contents, str)])
diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py
new file mode 100644
index 0000000000000..267109cc2e3cd
--- /dev/null
+++ b/sklearn/utils/tests/test_deprecation.py
@@ -0,0 +1,47 @@
+# Authors: Raghav RV <rvraghav93@gmail.com>
+# License: BSD 3 clause
+
+
+from sklearn.utils.deprecation import _is_deprecated
+from sklearn.utils.deprecation import deprecated
+from sklearn.utils.testing import assert_warns_message
+
+
+@deprecated('qwerty')
+class MockClass1:
+    pass
+
+
+class MockClass2:
+    @deprecated('mockclass2_method')
+    def method(self):
+        pass
+
+
+class MockClass3:
+    @deprecated()
+    def __init__(self):
+        pass
+
+
+@deprecated()
+def mock_function():
+    return 10
+
+
+def test_deprecated():
+    assert_warns_message(DeprecationWarning, 'qwerty', MockClass1)
+    assert_warns_message(DeprecationWarning, 'mockclass2_method',
+                         MockClass2().method)
+    assert_warns_message(DeprecationWarning, 'deprecated', MockClass3)
+    val = assert_warns_message(DeprecationWarning, 'deprecated', mock_function)
+    assert val == 10
+
+
+def test_is_deprecated():
+    # Test if _is_deprecated helper identifies wrapping via deprecated
+    # NOTE it works only for class methods and functions
+    assert _is_deprecated(MockClass1.__init__)
+    assert _is_deprecated(MockClass2().method)
+    assert _is_deprecated(MockClass3.__init__)
+    assert _is_deprecated(mock_function)

From 6788174df2f70795d3792fad238b1b4647ac63d3 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 30 Jun 2017 17:18:11 +0200
Subject: [PATCH 52/66] TST Use importlib

---
 sklearn/tests/test_docstring_parameters.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 118694f05b1f6..153359ee8762a 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -79,9 +79,7 @@ def test_docstring_parameters():
     incorrect = []
     for name in PUBLIC_MODULES:
         with warnings.catch_warnings(record=True):
-            module = __import__(name, globals())
-        for submod in name.split('.')[1:]:
-            module = getattr(module, submod)
+            module = importlib.import_module(name)
         classes = inspect.getmembers(module, inspect.isclass)
         for cname, cls in classes:
             this_incorrect = []
@@ -141,7 +139,7 @@ def test_docstring_parameters():
 
 @ignore_warnings(category=DeprecationWarning)
 def test_tabs():
-    """Test that there are no tabs in our source files"""
+    # Test that there are no tabs in our source files
     for importer, modname, ispkg in walk_packages(sklearn.__path__,
                                                   prefix='sklearn.'):
         # because we don't import

From 0e2c56a9dfc51b46c2e469ad17472437971b34e3 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 3 Jul 2017 14:28:19 +0200
Subject: [PATCH 53/66] FIX address Andy's comments

---
 sklearn/utils/testing.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index d4f85d9ca139c..ebacc7481503e 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -847,21 +847,20 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
     """
     from numpydoc import docscrape
     incorrect = []
+    ignore = [] if ignore is None else ignore
+
     func_name = get_func_name(func, class_name=class_name)
     if (not func_name.startswith('sklearn.') or
             func_name.startswith('sklearn.externals')):
         return incorrect
+    # Don't check docstring for property-functions
     if inspect.isdatadescriptor(func):
         return incorrect
-    args = _get_args(func)
+    args = list(filter(lambda x: x in ignore, _get_args(func)))
     # drop self
     if len(args) > 0 and args[0] == 'self':
         args.remove('self')
 
-    if ignore is not None:
-        for p in ignore:
-            args.remove(p)
-
     if doc is None:
         with warnings.catch_warnings(record=True) as w:
             try:
@@ -877,11 +876,11 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
         if (type_definition.strip() == "" or
                 type_definition.strip().startswith(':')):
 
+            param_name = name.lstrip()
+
             # If there was no space between name and the colon
             # "verbose:" -> len(["verbose", ""][0]) -> 7
             # If "verbose:"[7] == ":", then there was no space
-            param_name = name.lstrip()
-
             if param_name[len(param_name.split(':')[0].strip())] == ':':
                 incorrect += [func_name +
                               ' There was no space between the param name and '
@@ -893,10 +892,7 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
         if '*' not in name:
             param_names.append(name.split(':')[0].strip('` '))
 
-    if ignore is not None:
-        for p in ignore:
-            if p in param_names:
-                param_names.remove(p)
+    param_names = list(filter(lambda x: x in ignore, param_names))
 
     if len(param_names) != len(args):
         bad = str(sorted(list(set(param_names) - set(args)) +

From 11bc8c0922a5ffc163eae0e92ea134b0b404b7b2 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 3 Jul 2017 14:35:40 +0200
Subject: [PATCH 54/66] ENH Use symmetric_difference for comparison

---
 sklearn/utils/testing.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index ebacc7481503e..8e0880608a329 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -895,8 +895,7 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
     param_names = list(filter(lambda x: x in ignore, param_names))
 
     if len(param_names) != len(args):
-        bad = str(sorted(list(set(param_names) - set(args)) +
-                         list(set(args) - set(param_names))))
+        bad = str(sorted(list(set(param_names) ^ set(args)))
         incorrect += [func_name + ' arg mismatch: ' + bad]
     else:
         for n1, n2 in zip(param_names, args):

From aaa677016583d3b0b0f28fd65147048008ab419e Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 3 Jul 2017 16:28:37 +0200
Subject: [PATCH 55/66] Address Andy's comments

---
 sklearn/utils/testing.py            |  6 +-
 sklearn/utils/tests/test_testing.py | 93 +++++++++++++++++++++++++++--
 2 files changed, 92 insertions(+), 7 deletions(-)

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 8e0880608a329..1335909a4b6f0 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -856,7 +856,7 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
     # Don't check docstring for property-functions
     if inspect.isdatadescriptor(func):
         return incorrect
-    args = list(filter(lambda x: x in ignore, _get_args(func)))
+    args = list(filter(lambda x: x not in ignore, _get_args(func)))
     # drop self
     if len(args) > 0 and args[0] == 'self':
         args.remove('self')
@@ -892,10 +892,10 @@ def check_parameters_match(func, doc=None, ignore=None, class_name=None):
         if '*' not in name:
             param_names.append(name.split(':')[0].strip('` '))
 
-    param_names = list(filter(lambda x: x in ignore, param_names))
+    param_names = list(filter(lambda x: x not in ignore, param_names))
 
     if len(param_names) != len(args):
-        bad = str(sorted(list(set(param_names) ^ set(args)))
+        bad = str(sorted(list(set(param_names) ^ set(args))))
         incorrect += [func_name + ' arg mismatch: ' + bad]
     else:
         for n1, n2 in zip(param_names, args):
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 38d6b2c3d39c5..d2c92aeb95a8d 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -4,6 +4,8 @@
 import numpy as np
 from scipy import sparse
 
+from sklearn.utils.deprecation import deprecated
+from sklearn.utils.metaestimators import if_delegate_has_method
 from sklearn.utils.testing import (
     assert_true,
     assert_raises,
@@ -355,6 +357,81 @@ def f_bad_sections(self, X, y):
         pass
 
 
+class MockEst(object):
+    def __init__(self):
+        """MockEstimator"""
+    def fit(self, X, y):
+        return X
+    def predict(self, X):
+        return X
+    def predict_proba(self, X):
+        return X
+    def score(self, X):
+        return 1.
+
+
+class MockMetaEstimator(object):
+    def __init__(self, delegate):
+        """MetaEstimator to check if doctest on delegated methods work.
+
+        Parameters
+        ---------
+        delegate : estimator
+            Delegated estimator.
+        """
+        self.delegate = delegate
+
+    @if_delegate_has_method(delegate=('delegate'))
+    def predict(self, X):
+        """This is available only if delegate has predict.
+
+        Parameters
+        ----------
+        y : ndarray
+            Parameter y
+        """
+        return self.delegate.predict(X)
+
+    @deprecated("Testing a deprecated delegated method")
+    @if_delegate_has_method(delegate=('delegate'))
+    def score(self, X):
+        """This is available only if delegate has score.
+
+        Parameters
+        ---------
+        y : ndarray
+            Parameter y
+        """
+
+    @if_delegate_has_method(delegate=('delegate'))
+    def predict_proba(self, X):
+        """This is available only if delegate has predict_proba.
+
+        Parameters
+        ---------
+        X : ndarray
+            Parameter X
+        """
+        return X
+
+    @deprecated('Testing deprecated function with correct params')
+    @if_delegate_has_method(delegate=('delegate'))
+    def predict_proba(self, X):
+        """This is available only if delegate has predict_proba.
+
+        Parameters
+        ---------
+        X : ndarray
+            Parameter X
+        """
+        return X
+
+    @deprecated('Testing deprecated function with wrong params')
+    @if_delegate_has_method(delegate=('delegate'))
+    def fit(self, X, y):
+        """Incorrect docstring but should not be tested"""
+
+
 def test_check_parameters_match():
     try:
         import numpydoc  # noqa
@@ -373,10 +450,18 @@ def test_check_parameters_match():
     assert_raise_message(RuntimeError, 'Unknown section Parameter',
                          check_parameters_match, Klass.f_bad_sections)
 
-    messages = ['a != b']
-    messages += ["arg mismatch: ['b']"]
-    messages += ["arg mismatch: ['X', 'y']"]
-    for mess, f in zip(messages, [f_bad_order, f_missing, Klass.f_missing]):
+    messages = ["a != b", "arg mismatch: ['b']", "arg mismatch: ['X', 'y']",
+                "predict y != X",
+                "predict_proba arg mismatch: ['X']",
+                "score arg mismatch: ['X']",
+                ".fit arg mismatch: ['X', 'y']"]
+
+    mock_meta = MockMetaEstimator(delegate=MockEst())
+
+    for mess, f in zip(messages,
+                       [f_bad_order, f_missing, Klass.f_missing,
+                        mock_meta.predict, mock_meta.predict_proba,
+                        mock_meta.score, mock_meta.fit]):
         incorrect = check_parameters_match(f)
         assert_true(len(incorrect) >= 1)
         assert_true(mess in incorrect[0],

From fcf5538a3910569d501ffe511b0c4e1124ff9fdd Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 3 Jul 2017 16:39:06 +0200
Subject: [PATCH 56/66] Skip test for builds which have python <= 3.5

---
 sklearn/tests/test_docstring_parameters.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 153359ee8762a..777a949aff8e8 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -5,6 +5,7 @@
 from __future__ import print_function
 
 import inspect
+import sys
 import warnings
 import importlib
 
@@ -68,9 +69,12 @@
 
 def test_docstring_parameters():
     # Test module docstring formatting
+
+    # Skip test if numpydoc is not found or if python version is < 3.5
     try:
         import numpydoc  # noqa
-    except ImportError:
+        assert sys.version_info >= (3, 5)
+    except (ImportError, AssertionError):
         raise SkipTest(
             "numpydoc is required to test the docstrings")
 

From 6319bf6910c0670a68efd4c8ccb125627c5e9236 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Tue, 4 Jul 2017 13:46:15 +0200
Subject: [PATCH 57/66] Skip both tests

---
 sklearn/utils/tests/test_testing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index d2c92aeb95a8d..71d6edc6be461 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -435,7 +435,8 @@ def fit(self, X, y):
 def test_check_parameters_match():
     try:
         import numpydoc  # noqa
-    except ImportError:
+        assert sys.version_info >= (3, 5)
+    except (ImportError, AssertionError):
         raise SkipTest(
             "numpydoc is required to test the docstrings")
 

From d3ab24098e3d36688ce1f2474562245a487e0437 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 6 Jul 2017 11:14:17 +0200
Subject: [PATCH 58/66] Make _is_deprecated work in python2.7

---
 sklearn/utils/deprecation.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index 614f2ebd56fa5..1299acdd1e228 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -94,6 +94,9 @@ def _is_deprecated(func):
     closures = getattr(func, '__closure__', [])
     if closures is None:
         closures = []
-
-    return 'deprecated' in ''.join([c.cell_contents for c in closures
-                                    if isinstance(c.cell_contents, str)])
+    func_coname = getattr(getattr(func, '__code__', ''), 'co_name', '')
+    is_deprecated = 'deprecation_wrapped' not in func_coname
+    is_deprecated |= ('deprecated' in ''.join([c.cell_contents
+                                               for c in closures
+                      if isinstance(c.cell_contents, str)]))
+    return is_deprecated

From e4e1f6ce01a3e10b1895fdc08e5d86834c5b0a3a Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 6 Jul 2017 12:13:34 +0200
Subject: [PATCH 59/66] FIX up the tests to include predict_log_proba

---
 sklearn/utils/tests/test_testing.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 71d6edc6be461..af7f129f906b8 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -362,10 +362,13 @@ def __init__(self):
         """MockEstimator"""
     def fit(self, X, y):
         return X
+
     def predict(self, X):
         return X
+
     def predict_proba(self, X):
         return X
+
     def score(self, X):
         return 1.
 
@@ -414,14 +417,14 @@ def predict_proba(self, X):
         """
         return X
 
-    @deprecated('Testing deprecated function with correct params')
+    @deprecated('Testing deprecated function with incorrect params')
     @if_delegate_has_method(delegate=('delegate'))
-    def predict_proba(self, X):
+    def predict_log_proba(self, X):
         """This is available only if delegate has predict_proba.
 
         Parameters
         ---------
-        X : ndarray
+        y : ndarray
             Parameter X
         """
         return X
@@ -454,6 +457,7 @@ def test_check_parameters_match():
     messages = ["a != b", "arg mismatch: ['b']", "arg mismatch: ['X', 'y']",
                 "predict y != X",
                 "predict_proba arg mismatch: ['X']",
+                "predict_log_proba arg mismatch: ['X']",
                 "score arg mismatch: ['X']",
                 ".fit arg mismatch: ['X', 'y']"]
 
@@ -462,6 +466,7 @@ def test_check_parameters_match():
     for mess, f in zip(messages,
                        [f_bad_order, f_missing, Klass.f_missing,
                         mock_meta.predict, mock_meta.predict_proba,
+                        mock_meta.predict_log_proba,
                         mock_meta.score, mock_meta.fit]):
         incorrect = check_parameters_match(f)
         assert_true(len(incorrect) >= 1)

From c4c30febbcc04c259f7fb50ea8cb7f938613a92d Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 6 Jul 2017 16:46:45 +0200
Subject: [PATCH 60/66] Fix typo in _is_deprecated logic

---
 sklearn/utils/deprecation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index 1299acdd1e228..96a8fd0099c76 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -95,7 +95,7 @@ def _is_deprecated(func):
     if closures is None:
         closures = []
     func_coname = getattr(getattr(func, '__code__', ''), 'co_name', '')
-    is_deprecated = 'deprecation_wrapped' not in func_coname
+    is_deprecated = 'deprecation_wrapped' in func_coname
     is_deprecated |= ('deprecated' in ''.join([c.cell_contents
                                                for c in closures
                       if isinstance(c.cell_contents, str)]))

From fe54c9332a411e3fc971b7c31d518ab263d5f0d4 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 6 Jul 2017 16:49:47 +0200
Subject: [PATCH 61/66] FIX tests shouldn't separately test for __call__. It's
 included while iterating methods

---
 sklearn/tests/test_docstring_parameters.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 777a949aff8e8..ec9d0cdfa6fb9 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -89,7 +89,7 @@ def test_docstring_parameters():
             this_incorrect = []
             if cname in _DOCSTRING_IGNORES:
                 continue
-            if cname.startswith('_') and cname not in _DOC_SPECIAL_MEMBERS:
+            if cname.startswith('_'):
                 continue
             with warnings.catch_warnings(record=True) as w:
                 cdoc = docscrape.ClassDoc(cls)
@@ -121,10 +121,6 @@ def test_docstring_parameters():
                                                 class_name=cname)
                 this_incorrect += result
 
-            if hasattr(cls, '__call__') and not _is_deprecated(cls.__call__):
-                this_incorrect += check_parameters_match(cls.__call__,
-                                                         class_name=cname)
-
             incorrect += this_incorrect
 
         functions = inspect.getmembers(module, inspect.isfunction)

From e1bc4c10d1ac47baa75751ac7a054dec3ff17d62 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 6 Jul 2017 16:58:00 +0200
Subject: [PATCH 62/66] Remove redundant _DOC_SPECIAL_MEMBERS

---
 sklearn/tests/test_docstring_parameters.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index ec9d0cdfa6fb9..c3930d54d8f64 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -20,10 +20,6 @@
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.deprecation import _is_deprecated
 
-_DOC_SPECIAL_MEMBERS = ('__contains__', '__getitem__', '__iter__', '__len__',
-                        '__call__', '__add__', '__sub__', '__mul__', '__div__',
-                        '__neg__', '__hash__')
-
 PUBLIC_MODULES = set(['sklearn.' + pckg[1]
                       for pckg in walk_packages('sklearn.*')
                       if not pckg[1].startswith('_')])

From 14656050aa40df601c54cb9afb6bb29ecac0de77 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 6 Jul 2017 17:22:04 +0200
Subject: [PATCH 63/66] Skip _is_deprecated and it's tests for < python3.5

---
 sklearn/utils/deprecation.py            | 11 +++++++----
 sklearn/utils/tests/test_deprecation.py | 10 ++++++++++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index 96a8fd0099c76..50f61494d838f 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -1,3 +1,4 @@
+import sys
 import warnings
 
 __all__ = ["deprecated", ]
@@ -91,12 +92,14 @@ def _update_doc(self, olddoc):
 
 def _is_deprecated(func):
     """Helper to check if func is wraped by our deprecated decorator"""
+    if sys.version_info < (3, 5):
+        raise NotImplementedError("This is only available for python3.5 "
+                                  "or above")
     closures = getattr(func, '__closure__', [])
     if closures is None:
         closures = []
     func_coname = getattr(getattr(func, '__code__', ''), 'co_name', '')
-    is_deprecated = 'deprecation_wrapped' in func_coname
-    is_deprecated |= ('deprecated' in ''.join([c.cell_contents
-                                               for c in closures
-                      if isinstance(c.cell_contents, str)]))
+    is_deprecated = ('deprecated' in ''.join([c.cell_contents
+                                              for c in closures
+                     if isinstance(c.cell_contents, str)]))
     return is_deprecated
diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py
index 267109cc2e3cd..31a92bc442cc9 100644
--- a/sklearn/utils/tests/test_deprecation.py
+++ b/sklearn/utils/tests/test_deprecation.py
@@ -2,9 +2,12 @@
 # License: BSD 3 clause
 
 
+import sys
+
 from sklearn.utils.deprecation import _is_deprecated
 from sklearn.utils.deprecation import deprecated
 from sklearn.utils.testing import assert_warns_message
+from sklearn.utils.testing import SkipTest
 
 
 @deprecated('qwerty')
@@ -24,6 +27,10 @@ def __init__(self):
         pass
 
 
+class MockClass4:
+    pass
+
+
 @deprecated()
 def mock_function():
     return 10
@@ -39,9 +46,12 @@ def test_deprecated():
 
 
 def test_is_deprecated():
+    if sys.version_info < (3, 5):
+        raise SkipTest("This test will run only on python3.5 and above")
     # Test if _is_deprecated helper identifies wrapping via deprecated
     # NOTE it works only for class methods and functions
     assert _is_deprecated(MockClass1.__init__)
     assert _is_deprecated(MockClass2().method)
     assert _is_deprecated(MockClass3.__init__)
+    assert not _is_deprecated(MockClass4.__init__)
     assert _is_deprecated(mock_function)

From 631d0c501572bf91d82c402bc9459900097e7348 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 6 Jul 2017 18:02:07 +0200
Subject: [PATCH 64/66] Flake8

---
 sklearn/utils/deprecation.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index 50f61494d838f..ca305e5cb3f62 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -98,7 +98,6 @@ def _is_deprecated(func):
     closures = getattr(func, '__closure__', [])
     if closures is None:
         closures = []
-    func_coname = getattr(getattr(func, '__code__', ''), 'co_name', '')
     is_deprecated = ('deprecated' in ''.join([c.cell_contents
                                               for c in closures
                      if isinstance(c.cell_contents, str)]))

From f5192da0fe10f12279685d6ef3a67c5ecdd06a58 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Sun, 9 Jul 2017 11:02:12 -0500
Subject: [PATCH 65/66] ENH address joel's comments

---
 sklearn/decomposition/dict_learning.py     |  2 +-
 sklearn/tests/test_docstring_parameters.py | 14 +++++++-------
 sklearn/utils/testing.py                   |  4 ++--
 sklearn/utils/tests/test_testing.py        | 18 +++++++++---------
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 899a73d0068d4..62cd2cd2aa101 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -435,7 +435,7 @@ def dict_learning(X, n_components, alpha, max_iter=100, tol=1e-8,
         Initial value for the sparse code for warm restart scenarios.
 
     callback : callable or None, optional (default: None)
-        callable that gets invoked every five iterations
+        Callable that gets invoked every five iterations
 
     verbose : bool, optional (default: False)
         To control the verbosity of the procedure.
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index c3930d54d8f64..2ac6cfbccc264 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -15,8 +15,8 @@
 import sklearn
 from sklearn.base import signature
 from sklearn.utils.testing import SkipTest
-from sklearn.utils.testing import check_parameters_match
-from sklearn.utils.testing import get_func_name
+from sklearn.utils.testing import check_docstring_parameters
+from sklearn.utils.testing import _get_func_name
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.deprecation import _is_deprecated
 
@@ -99,8 +99,8 @@ def test_docstring_parameters():
                 continue
 
             elif cls_init is not None:
-                this_incorrect += check_parameters_match(cls.__init__, cdoc,
-                                                         class_name=cname)
+                this_incorrect += check_docstring_parameters(
+                    cls.__init__, cdoc, class_name=cname)
             for method_name in cdoc.methods:
                 method = getattr(cls, method_name)
                 if _is_deprecated(method):
@@ -113,8 +113,8 @@ def test_docstring_parameters():
                     if ('y' in sig.parameters and
                             sig.parameters['y'].default is None):
                         param_ignore = ['y']  # ignore y for fit and score
-                result = check_parameters_match(method, ignore=param_ignore,
-                                                class_name=cname)
+                result = check_docstring_parameters(
+                    method, ignore=param_ignore, class_name=cname)
                 this_incorrect += result
 
             incorrect += this_incorrect
@@ -127,7 +127,7 @@ def test_docstring_parameters():
             name_ = get_func_name(func)
             if (not any(d in name_ for d in _DOCSTRING_IGNORES) and
                     not _is_deprecated(func)):
-                incorrect += check_parameters_match(func)
+                incorrect += check_docstring_parameters(func)
     msg = '\n' + '\n'.join(sorted(list(set(incorrect))))
     if len(incorrect) > 0:
         raise AssertionError(msg)
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 1335909a4b6f0..0c2088acbb8d9 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -796,7 +796,7 @@ def _get_args(function, varargs=False):
         return args
 
 
-def get_func_name(func, class_name=None):
+def _get_func_name(func, class_name=None):
     """Get function full name
 
     Parameters
@@ -825,7 +825,7 @@ def get_func_name(func, class_name=None):
     return '.'.join(parts)
 
 
-def check_parameters_match(func, doc=None, ignore=None, class_name=None):
+def check_docstring_parameters(func, doc=None, ignore=None, class_name=None):
     """Helper to check docstring
 
     Parameters
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index af7f129f906b8..cf18de0b35b11 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -19,7 +19,7 @@
     set_random_state,
     assert_raise_message,
     ignore_warnings,
-    check_parameters_match,
+    check_docstring_parameters,
     assert_allclose_dense_sparse)
 
 from sklearn.utils.testing import SkipTest
@@ -435,7 +435,7 @@ def fit(self, X, y):
         """Incorrect docstring but should not be tested"""
 
 
-def test_check_parameters_match():
+def test_check_docstring_parameters():
     try:
         import numpydoc  # noqa
         assert sys.version_info >= (3, 5)
@@ -443,16 +443,16 @@ def test_check_parameters_match():
         raise SkipTest(
             "numpydoc is required to test the docstrings")
 
-    incorrect = check_parameters_match(f_ok)
+    incorrect = check_docstring_parameters(f_ok)
     assert_equal(incorrect, [])
-    incorrect = check_parameters_match(f_ok, ignore=['b'])
+    incorrect = check_docstring_parameters(f_ok, ignore=['b'])
     assert_equal(incorrect, [])
-    incorrect = check_parameters_match(f_missing, ignore=['b'])
+    incorrect = check_docstring_parameters(f_missing, ignore=['b'])
     assert_equal(incorrect, [])
     assert_raise_message(RuntimeError, 'Unknown section Results',
-                         check_parameters_match, f_bad_sections)
+                         check_docstring_parameters, f_bad_sections)
     assert_raise_message(RuntimeError, 'Unknown section Parameter',
-                         check_parameters_match, Klass.f_bad_sections)
+                         check_docstring_parameters, Klass.f_bad_sections)
 
     messages = ["a != b", "arg mismatch: ['b']", "arg mismatch: ['X', 'y']",
                 "predict y != X",
@@ -468,12 +468,12 @@ def test_check_parameters_match():
                         mock_meta.predict, mock_meta.predict_proba,
                         mock_meta.predict_log_proba,
                         mock_meta.score, mock_meta.fit]):
-        incorrect = check_parameters_match(f)
+        incorrect = check_docstring_parameters(f)
         assert_true(len(incorrect) >= 1)
         assert_true(mess in incorrect[0],
                     '"%s" not in "%s"' % (mess, incorrect[0]))
 
-    incorrect = check_parameters_match(f_check_param_definition)
+    incorrect = check_docstring_parameters(f_check_param_definition)
     assert_equal(
         incorrect,
         ['sklearn.utils.tests.test_testing.f_check_param_definition There was '

From 7e3e2ca55c6c9f26b8c0450cf57695cd99f09a05 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 10 Jul 2017 09:45:22 -0500
Subject: [PATCH 66/66] Address the rest of the comments

---
 sklearn/tests/test_docstring_parameters.py | 2 +-
 sklearn/utils/testing.py                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 2ac6cfbccc264..584c4f2e7ceed 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -124,7 +124,7 @@ def test_docstring_parameters():
             # Don't test private methods / functions
             if fname.startswith('_'):
                 continue
-            name_ = get_func_name(func)
+            name_ = _get_func_name(func)
             if (not any(d in name_ for d in _DOCSTRING_IGNORES) and
                     not _is_deprecated(func)):
                 incorrect += check_docstring_parameters(func)
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 0264149ea11c8..cfaefc88d2308 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -849,7 +849,7 @@ def check_docstring_parameters(func, doc=None, ignore=None, class_name=None):
     incorrect = []
     ignore = [] if ignore is None else ignore
 
-    func_name = get_func_name(func, class_name=class_name)
+    func_name = _get_func_name(func, class_name=class_name)
     if (not func_name.startswith('sklearn.') or
             func_name.startswith('sklearn.externals')):
         return incorrect