From eb2d93aab2929239e586e2fb341a3a69fdc2345e Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Sat, 19 Nov 2016 14:44:48 +0300
Subject: [PATCH 01/41] ENH: nu-SVDD with sample weights, based on Chang, Lee,
 Lin (2013)

---
 doc/modules/classes.rst               |   3 +-
 doc/modules/outlier_detection.rst     |  46 +++++-
 doc/modules/svm.rst                   | 200 +++++++++++++++++++++++++-
 examples/svm/plot_oneclass.py         |   6 +-
 examples/svm/plot_oneclass_vs_svdd.py | 102 +++++++++++++
 sklearn/svm/__init__.py               |   4 +-
 sklearn/svm/_base.py                  |   7 +-
 sklearn/svm/_classes.py               | 151 ++++++++++++++++++-
 sklearn/svm/_libsvm.pyx               |  12 ++
 sklearn/svm/src/libsvm/svm.cpp        | 147 +++++++++++++++++--
 sklearn/svm/src/libsvm/svm.h          |   2 +-
 sklearn/svm/tests/test_sparse.py      |  18 +++
 sklearn/svm/tests/test_svm.py         |  89 ++++++++++++
 13 files changed, 762 insertions(+), 25 deletions(-)
 create mode 100644 examples/svm/plot_oneclass_vs_svdd.py

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index c6838556d50ad..97cc866780347 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -1529,9 +1529,10 @@ Estimators
    svm.LinearSVR
    svm.NuSVC
    svm.NuSVR
-   svm.OneClassSVM
    svm.SVC
    svm.SVR
+   svm.OneClassSVM
+   svm.SVDD
 
 .. autosummary::
    :toctree: generated/
diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst
index 75a191a767aa5..4abb126cb6f22 100644
--- a/doc/modules/outlier_detection.rst
+++ b/doc/modules/outlier_detection.rst
@@ -157,8 +157,8 @@ coming from the same population than the initial
 observations. Otherwise, if they lay outside the frontier, we can say
 that they are abnormal with a given confidence in our assessment.
 
-The One-Class SVM has been introduced by Schölkopf et al. for that purpose
-and implemented in the :ref:`svm` module in the
+The :ref:`svm_one_class_svm` has been introduced by Schölkopf et al.
+for that purpose and implemented in the :ref:`svm` module in the
 :class:`svm.OneClassSVM` object. It requires the choice of a
 kernel and a scalar parameter to define a frontier.  The RBF kernel is
 usually chosen although there exists no exact formula or algorithm to
@@ -167,12 +167,29 @@ implementation. The `nu` parameter, also known as the margin of
 the One-Class SVM, corresponds to the probability of finding a new,
 but regular, observation outside the frontier.
 
+The Support Vector Data Description (:ref:`svm_svdd`) is an alternative
+model for estimating the support of a data distribution. It was proposed
+by Tax and Duin, and later reformulated by Chang et al. The reparametrized
+SVDD model, which has better parameter interpretability, is implemented
+in the :class:`svm.SVDD` object in the  :ref:`svm` module. The interface
+as well as the interpretation of the parameters is similar to the
+:ref:`svm_one_class_svm` model.
+
 .. topic:: References:
 
     * `Estimating the support of a high-dimensional distribution
       <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-99-87.pdf>`_
       Schölkopf, Bernhard, et al. Neural computation 13.7 (2001): 1443-1471.
 
+    * `Support vector data description
+      <http://dx.doi.org/10.1023/B:MACH.0000008084.60811.49>`_
+      Tax, and Duin. Machine learning, 54(1) (2004), pp.45-66.
+
+    * `A revisit to support vector data description (SVDD).
+      <http://w.csie.org/~cjlin/papers/svdd.pdf>`_ Chang, Lee,
+      and Lin. Technical Report (2013), Dept. of Computer Science,
+      National Taiwan University.
+
 .. topic:: Examples:
 
    * See :ref:`sphx_glr_auto_examples_svm_plot_oneclass.py` for visualizing the
@@ -415,3 +432,28 @@ Novelty detection with Local Outlier Factor is illustrated below.
      :target: ../auto_examples/neighbors/plot_lof_novelty_detection.html
      :align: center
      :scale: 75%
+
+.. _outlier_detection_ocsvm_vs_svdd:
+
+One-Class SVM versus SVDD-L1
+----------------------------
+
+The :ref:`svm_one_class_svm` and :ref:`svm_svdd` models, though apparently
+different, both try to construct a hypersurface, enveloping the densest regions
+of the training sample. In the case of a stationary kernel :math:`K(x,y)=K(x-y)`,
+such as RBF (see :ref:`svm_kernels`), for :math:`\nu\in (0,1)` the decision
+functions are identical:
+
+.. figure:: ../auto_examples/svm/images/sphx_glr_plot_oneclass_vs_svdd_001.png
+   :target: ../auto_examples/svm/plot_oneclass_vs_svdd.html
+   :align: center
+   :scale: 75%
+
+But for a non-stationary kernel :math:`K(x,y)`, such as polynomial, the decision
+functions may be dramatically different:
+
+.. figure:: ../auto_examples/svm/images/sphx_glr_plot_oneclass_vs_svdd_002.png
+   :target: ../auto_examples/svm/plot_oneclass_vs_svdd.html
+   :align: center
+   :scale: 75%
+
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index 75609adf38c9c..b2aa26d11bd3e 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -271,7 +271,7 @@ with and without weight correction.
 
 
 :class:`SVC`, :class:`NuSVC`, :class:`SVR`, :class:`NuSVR`, :class:`LinearSVC`,
-:class:`LinearSVR` and :class:`OneClassSVM` implement also weights for
+:class:`LinearSVR`, :class:`OneClassSVM` and :class:`SVDD` implement also weights for
 individual samples in the `fit` method through the ``sample_weight`` parameter.
 Similar to ``class_weight``, this sets the parameter ``C`` for the i-th
 example to ``C * sample_weight[i]``, which will encourage the classifier to
@@ -339,6 +339,28 @@ Density estimation, novelty detection
 The class :class:`OneClassSVM` implements a One-Class SVM which is used in
 outlier detection.
 
+:ref:`svm_one_class_svm` and :ref:`svm_svdd` models can be used for novelty
+detection: given a set of samples, the model detects a soft boundary of that
+set so as to classify new points as belonging to that set or not. The
+classes that implement these models are :class:`OneClassSVM` and
+:class:`SVDD` respectively.
+
+Since novelty detection is a type of unsupervised learning, the ``fit`` method
+requires only an array X as input, as there are no class labels.
+
+See section :ref:`outlier_detection` for more details on this usage.
+
+.. figure:: ../auto_examples/svm/images/sphx_glr_plot_oneclass_001.png
+   :target: ../auto_examples/svm/plot_oneclass.html
+   :align: center
+   :scale: 75
+
+
+.. topic:: Examples:
+
+ * :ref:`sphx_glr_auto_examples_svm_plot_oneclass.py`
+ * :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`
+
 See :ref:`outlier_detection` for the description and usage of OneClassSVM.
 
 Complexity
@@ -422,8 +444,9 @@ Tips on Practical Use
     using a large stopping tolerance), the code without using shrinking may
     be much faster*
 
-  * Parameter ``nu`` in :class:`NuSVC`/:class:`OneClassSVM`/:class:`NuSVR`
-    approximates the fraction of training errors and support vectors.
+  * Parameter ``nu`` in :class:`NuSVC`, :class:`OneClassSVM`, :class:`NuSVR`,
+    and :class:`SVDD` approximates the fraction of training errors and support
+    vectors.
 
   * In :class:`SVC`, if the data is unbalanced (e.g. many
     positive and few negative), set ``class_weight='balanced'`` and/or try
@@ -760,6 +783,177 @@ where we make use of the epsilon-insensitive loss, i.e. errors of less than
 :math:`\varepsilon` are ignored. This is the form that is directly optimized
 by :class:`LinearSVR`.
 
+.. _svm_one_class_svm:
+
+One-Class SVM
+-------------
+
+This model, proposed by Schölkopf et al. (2001), estimates the support
+of a high-dimensional distribution by constructing a supporting hyperplane
+in the feature space corresponding to the kernel, which effectively
+separates the data set from the origin with maximum margin.
+
+For the training sample :math:`(x_i)_{i=1}^{n}` with weights :math:`(w_i)_{i=1}^{n}`,
+:math:`\sum_{i=1}^{n} w_i>0`, the One-Class SVM solves the following primal problem:
+
+
+.. math::
+
+   \min_{\rho,\xi,w} \frac12 w^Tw - \rho + \frac{1}{\nu W} \sum_{i=1}^{n} w_i \xi_i \,, \\
+
+   \textrm {subject to } & w^T\phi(x_i) \geq \rho - \xi_i \,, \\
+                         & \xi_i \geq 0\,,\, i=1, \ldots, n \,,
+
+
+where :math:`\phi(\cdot)` is the feature map associated with the
+kernel :math:`K(\cdot,\cdot)`, and :math:`W = \sum_{i=1}^{n} w_i`.
+
+The dual problem is
+
+
+.. math::
+
+   \min_\alpha \frac12 \alpha^T Q\alpha\,\\
+
+   \textrm {subject to } & 0\leq \alpha_i \leq w_i\,,\, i=1, \ldots, n \,,\\
+                         & e^T\alpha = \nu W \,,
+
+
+where :math:`e\in \mathbb{R}^{n\times 1}` is the vector of ones and
+:math:`Q_{ij} = K(x_i, x_j)` is the kernel Gram matrix.
+
+The optimal decision function is given by:
+
+.. math:: x\mapsto \operatorname{sgn}(\sum_{i=1}^{n} \alpha_i K(x_i, x) - \rho) \,,
+
+where :math:`+1` indicates an inliner and :math:`-1` an outlier.
+
+The parameter :math:`\nu\in(0,1]` determines the fraction of outliers
+in the training dataset. More technically :math:`\nu` is:
+  * an upper bound on the fraction of the training points lying outside
+    the estimated region;
+
+  * a lower bound on the fraction of support vectors.
+
+.. topic:: References:
+
+  * `Estimating the support of a high-dimensional distribution
+    <http://dl.acm.org/citation.cfm?id=1119749>`_ Schölkopf,
+    Bernhard, et al. Neural computation 13.7 (2001): 1443-1471.
+    doi:10.1162/089976601750264965
+
+
+.. _svm_svdd:
+
+SVDD
+----
+
+Support Vector Data Description (SVDD), proposed by Tax and Duin (2004),
+aims at finding a spherically shaped boundary around a data set. Specifially,
+it computes a minimum volume hypersphere containing the most of the data with
+the number of outliers controlled by the parameter of the model.
+
+The original formulation suffered from non-convexity issues related to optimality of 
+the attained solution for certain values of the regularization parameter :math:`C`.
+Chang, Lee, and Lin (2013) suggested a reformulation of the SVDD model
+which had a well-defined and provably unique global solution for any :math:`C>0`.
+
+The implementation in the class :class:`SVDD` is based on a modified version
+of the 2013 SVDD formulation. The following changes were made to problem (7)
+in Chang et al. (2013):
+
+  * **sample weights**: instead of a uniform penalty :math:`C>0` sample
+    observations are allowed to have different costs :math:`(C_i)_{i=1}^{n}`,
+    :math:`\sum_{i=1}^{n} C_i > 0`;
+
+  * :math:`\nu`-**parametrization**: the penalties are determined by
+    :math:`C_i = \frac{w_i}{\nu \sum_{i=1}^{n} w_i}`, where :math:`\nu\in(0, 1]`
+    and :math:`(w_i)_{i=1}^{n}` are non-negative sample weights.
+
+Straightforward extension of theorems 2-4 of Chang et al. (2013) to the case
+of different penalty yielded the :math:`\sum_{i=1}^{n} C_i > 1`, or equivalently
+:math:`\nu < 1`, as the condition, which distinguishes the case of :math:`R>0`
+(theorem 4 case 1) from :math:`R=0` (theorem 4 case 2).
+
+The main benefit of the :math:`\nu`-parametrization is a clearer interpretation
+and a unified interface to the :ref:`svm_one_class_svm` model: :math:`\nu` is an
+upper bound on the fraction of the training points lying outside the estimated
+region, and a lower bound on the fraction of support vectors. Under the original
+:math:`C`-parametrization the value :math:`\frac{1}{n C}` served as these bounds.
+
+Note that in a typical run of the SVDD model the weights are set to :math:`w_i = 1`,
+which is equivalent to the original 2013 SVDD formulation for :math:`C = \frac{1}{\nu n}`.
+
+The primal problem of this modified version of SVDD for the training sample
+:math:`(x_i)_{i=1}^{n}` with weights :math:`(w_i)_{i=1}^{n}`,
+:math:`\sum_{i=1}^{n} w_i>0`, is:
+
+
+.. math::
+
+   \min_{R,\xi,a} R + \frac{1}{\nu W} \sum_{i=1}^{n} w_i \xi_i\,,\\
+
+   \textrm {subject to } & \|\phi(x_i) - a\|^2 \leq R + \xi_i\,,\\
+                         & \xi_i \geq 0\,,\, i=1, \ldots, n\,,\\
+                         & R \geq 0\,,
+
+
+where :math:`\phi(\cdot)` is the feature map associated with the kernel
+:math:`K(\cdot,\cdot)`, and :math:`W = \sum_{i=1}^{n} w_i`.
+
+When :math:`\nu \geq 1`, the optimal :math:`R=0` and the primal problem
+reduces to an unconstrained convex optimization problem independent of
+:math:`\nu`:
+
+.. math :: \min_a \sum_{i=1}^{n} w_i \|\phi(x_i) - a\|^2\,.
+
+Note that in this case every observation is an outlier.
+
+In the case when :math:`\nu < 1` the constraint :math:`R\geq 0` is redundant,
+strong duality holds, and the dual problem has the form:
+
+
+.. math ::
+
+   \min_\alpha \frac12 \alpha^T Q\alpha - \frac{\nu W}{2} \sum_{i=1}^{n} \alpha_i Q_{ii}\,,\\
+
+   \textrm {subject to } & 0 \leq \alpha_i \leq w_i\,,\, i=1, \ldots, n\,,\\
+                         & e^T \alpha = \nu W\,,
+
+
+where :math:`e\in \mathbb{R}^{n\times 1}` is the vector of ones and
+:math:`Q_{ij} = K(x_i, x_j)` is the kernel Gram matrix.
+
+The decision function of the SVDD is given by:
+
+.. math:: x\mapsto \operatorname{sgn}(R - \|\phi(x) - a\|^2) \,,
+
+where :math:`+1` indicates an inliner and :math:`-1` an outlier. The
+distances in the feature space and :math:`R` are computed implicitly through
+the coefficients and the optimal value of the objective of the corresponding
+dual problem.
+
+It is worth noting, that in the case of a stationary kernel :math:`K(x,y)=K(x-y)`
+the SVDD and One-Class SVM models are provably equivalent. Indeed, the values
+:math:`Q_{ii} = K(x_i, x_i)` in the last term in the dual of the SVDD are all
+equal to :math:`K(0)`, which makes the whole term independent of :math:`\alpha`.
+Therefore the objective functions of the dual problems of the One-Class SVM
+and the SVDD are equivalent up to a constant. This, however, **does not imply**
+that one model generalizes the other: their solutions just happen to coincide
+for a particular family of kernels (see :ref:`outlier_detection_ocsvm_vs_svdd`).
+
+.. topic:: References:
+
+  * `Support vector data description
+    <http://dx.doi.org/10.1023/B:MACH.0000008084.60811.49>`_
+    Tax, and Duin. Machine learning, 54(1) (2004), pp.45-66.
+
+  * `A revisit to support vector data description (SVDD).
+    <http://w.csie.org/~cjlin/papers/svdd.pdf>`_ Chang, Lee,
+    and Lin. Technical Report (2013), Dept. of Computer Science,
+    National Taiwan University.
+
+
 .. _svm_implementation_details:
 
 Implementation details
diff --git a/examples/svm/plot_oneclass.py b/examples/svm/plot_oneclass.py
index 082cbcd6de2be..7c30370324846 100644
--- a/examples/svm/plot_oneclass.py
+++ b/examples/svm/plot_oneclass.py
@@ -1,11 +1,11 @@
 """
 ==========================================
-One-class SVM with non-linear kernel (RBF)
+One-Class SVM with non-linear kernel (RBF)
 ==========================================
 
-An example using a one-class SVM for novelty detection.
+An example using a One-Class SVM for novelty detection.
 
-:ref:`One-class SVM <svm_outlier_detection>` is an unsupervised
+:ref:`One-Class SVM <svm_outlier_detection>` is an unsupervised
 algorithm that learns a decision function for novelty detection:
 classifying new data as similar or different to the training set.
 
diff --git a/examples/svm/plot_oneclass_vs_svdd.py b/examples/svm/plot_oneclass_vs_svdd.py
new file mode 100644
index 0000000000000..a2d20df63a72a
--- /dev/null
+++ b/examples/svm/plot_oneclass_vs_svdd.py
@@ -0,0 +1,102 @@
+"""
+=========================
+One-Class SVM versus SVDD
+=========================
+
+An example comparing the One-Class SVM and SVDD models for novelty
+detection.
+
+:ref:`Support Vector Data Description (SVDD) <svm_outlier_detection>`
+and :ref:`One-Class SVM <svm_outlier_detection>` are unsupervised
+algorithms that learn a decision function for novelty detection, i.e
+the problem of classifying new data as similar or different to the
+training set.
+
+It can be shown that the One-Class SVM and SVDD models yield identical
+results in the case of a stationary kernel, like RBF, but produce different
+decision functions for non-stationary kernels, e.g. polynomial. This
+example demonstrates this.
+
+Note, that it is incorrect to say that the SVDD generalizes the One-Class
+SVM: these are different models, which just happen to coincide for a
+particular family of kernels.
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.font_manager
+from sklearn import svm
+
+print(__doc__)
+
+random_state = np.random.RandomState(42)
+
+xx, yy = np.meshgrid(np.linspace(-7, 7, 501), np.linspace(-7, 7, 501))
+# Generate train data
+X = 0.3 * random_state.randn(100, 2)
+X_train = np.r_[X + 2, X - 2]
+# Generate some regular novel observations
+X = 0.3 * random_state.randn(20, 2)
+X_test = np.r_[X + 2, X - 2]
+# Generate some abnormal novel observations
+X_outliers = random_state.uniform(low=-4, high=4, size=(20, 2))
+
+# Define the models
+nu = .1
+kernels = [("RBF", dict(kernel="rbf", gamma=0.1)),
+           ("Poly", dict(kernel="poly", degree=2, coef0=1.0)),
+           ]
+
+for kernel_name, kernel in kernels:
+
+    # Use low tolerance to ensure better precision of the SVM
+    # optimization procedure.
+    classifiers = [("OCSVM", svm.OneClassSVM(nu=nu, tol=1e-8, **kernel)),
+                   ("SVDD", svm.SVDD(nu=nu, tol=1e-8, **kernel)),
+                   ]
+
+    fig = plt.figure(figsize=(12, 5))
+    fig.suptitle("One-Class SVM versus SVDD "
+                 "(error train, error novel regular, error novel abnormal)")
+
+    for i, (model_name, clf) in enumerate(classifiers):
+        clf.fit(X_train)
+
+        y_pred_train = clf.predict(X_train)
+        y_pred_test = clf.predict(X_test)
+        y_pred_outliers = clf.predict(X_outliers)
+        n_error_train = y_pred_train[y_pred_train == -1].size
+        n_error_test = y_pred_test[y_pred_test == -1].size
+        n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size
+
+        ax = fig.add_subplot(1, 2, i + 1)
+
+        # plot the line, the points, and the nearest vectors to the plane
+        Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
+        Z = Z.reshape(xx.shape)
+
+        ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7),
+                    cmap=plt.cm.PuBu, zorder=-99)
+        ax.contourf(xx, yy, Z, levels=[0, Z.max()], colors='palevioletred',
+                    zorder=-98)
+        a = ax.contour(xx, yy, Z, levels=[0], linewidths=2, colors='darkred',
+                       zorder=-97, label="learned frontier")
+
+        s = 40
+        b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c='white', s=s)
+        b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c='blueviolet', s=s)
+        c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c='gold', s=s)
+        ax.axis('tight')
+        ax.set_xlim((-7, 7))
+        ax.set_ylim((-7, 7))
+
+        ax.set_title("%s %s (%d/200, %d/40, %d/40)"
+                     % (model_name, kernel_name, n_error_train,
+                        n_error_test, n_error_outliers))
+
+        ax.legend([a.collections[0], b1, b2, c],
+                  ["learned frontier", "training observations",
+                   "new regular observations", "new abnormal observations"],
+                  loc="lower right",
+                  prop=matplotlib.font_manager.FontProperties(size=10))
+
+plt.show()
diff --git a/sklearn/svm/__init__.py b/sklearn/svm/__init__.py
index f5b4123230f93..34976e71e797a 100644
--- a/sklearn/svm/__init__.py
+++ b/sklearn/svm/__init__.py
@@ -10,7 +10,8 @@
 #         of their respective owners.
 # License: BSD 3 clause (C) INRIA 2010
 
-from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR
+from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, \
+        LinearSVR, SVDD
 from ._bounds import l1_min_c
 
 __all__ = [
@@ -19,6 +20,7 @@
     "NuSVC",
     "NuSVR",
     "OneClassSVM",
+    "SVDD",
     "SVC",
     "SVR",
     "l1_min_c",
diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
index 3fb213f5ea20d..c18589d9a14bc 100644
--- a/sklearn/svm/_base.py
+++ b/sklearn/svm/_base.py
@@ -27,7 +27,8 @@
 from ..exceptions import NotFittedError
 
 
-LIBSVM_IMPL = ["c_svc", "nu_svc", "one_class", "epsilon_svr", "nu_svr"]
+LIBSVM_IMPL = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr',
+               'svdd_l1']
 
 
 def _one_vs_one_coef(dual_coef, n_support, support_vectors):
@@ -205,9 +206,9 @@ def fit(self, X, y, sample_weight=None):
         )
         solver_type = LIBSVM_IMPL.index(self._impl)
 
-        # input validation
+        # input validation: novelty detection models not not use 'y'
         n_samples = _num_samples(X)
-        if solver_type != 2 and n_samples != y.shape[0]:
+        if solver_type not in (2, 5) and n_samples != y.shape[0]:
             raise ValueError(
                 "X and y have incompatible shapes.\n"
                 + "X has %s samples, but y has %s." % (n_samples, y.shape[0])
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index d1e59e7799b69..918e7f3f8a116 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1523,9 +1523,12 @@ def _more_tags(self):
 
 
 class OneClassSVM(OutlierMixin, BaseLibSVM):
-    """Unsupervised Outlier Detection.
+    """One-Class SVM for Unsupervised Outlier Detection.
 
-    Estimate the support of a high-dimensional distribution.
+    Estimate the support of a high-dimensional distribution by finding the
+    maximum margin soft boundary hyperplane separating a data set from the
+    origin. At most the fraction ``nu`` (``0 < nu <= 1``) of the data
+    are permitted to be outliers.
 
     The implementation is based on libsvm.
 
@@ -1817,3 +1820,147 @@ def _more_tags(self):
                 ),
             }
         }
+
+
+class SVDD(BaseLibSVM):
+    """Support Vector Data Description (SVDD) for Unsupervised Outlier Detection.
+
+    Estimate the support of a high-dimensional distribution by finding the
+    tightest soft boundary hypersphere around a data set, which permits at
+    most the fraction ``nu`` (``0 < nu <= 1``) of the data as outliers.
+
+    The implementation is based on libsvm.
+
+    Read more in the :ref:`User Guide <svm_outlier_detection>`.
+
+    Parameters
+    ----------
+    kernel : string, optional (default='rbf')
+         Specifies the kernel type to be used in the algorithm.
+         It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'
+         or a callable.
+         If none is given, 'rbf' will be used. If a callable is given it is
+         used to precompute the kernel matrix.
+
+    nu : float, optional
+        An upper bound on the fraction of training errors and a lower bound
+        of the fraction of support vectors. Should be in the interval (0, 1].
+        By default 0.5 will be taken.
+
+    degree : int, optional (default=3)
+        Degree of the polynomial kernel function ('poly').
+        Ignored by all other kernels.
+
+    gamma : float, optional (default='auto')
+        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
+        If gamma is 'auto' then 1/n_features will be used instead.
+
+    coef0 : float, optional (default=0.0)
+        Independent term in kernel function.
+        It is only significant in 'poly' and 'sigmoid'.
+
+    tol : float, optional
+        Tolerance for stopping criterion.
+
+    shrinking : boolean, optional
+        Whether to use the shrinking heuristic.
+
+    cache_size : float, optional
+        Specify the size of the kernel cache (in MB).
+
+    verbose : bool, default: False
+        Enable verbose output. Note that this setting takes advantage of a
+        per-process runtime setting in libsvm that, if enabled, may not work
+        properly in a multithreaded context.
+
+    max_iter : int, optional (default=-1)
+        Hard limit on iterations within solver, or -1 for no limit.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        The seed of the pseudo random number generator to use when shuffling
+        the data.  If int, random_state is the seed used by the random number
+        generator; If RandomState instance, random_state is the random number
+        generator; If None, the random number generator is the RandomState
+        instance used by `np.random`.
+
+    Attributes
+    ----------
+    support_ : array-like, shape = [n_SV]
+        Indices of support vectors.
+
+    support_vectors_ : array-like, shape = [nSV, n_features]
+        Support vectors.
+
+    dual_coef_ : array, shape = [n_classes-1, n_SV]
+        Coefficients of the support vectors in the decision function.
+
+    coef_ : array, shape = [n_classes-1, n_features]
+        Weights assigned to the features (coefficients in the primal
+        problem). This is only available in the case of a linear kernel.
+
+        `coef_` is readonly property derived from `dual_coef_` and
+        `support_vectors_`
+
+    intercept_ : array, shape = [n_classes-1]
+        Constants in decision function.
+
+    References
+    ----------
+    .. [1] Tax, D.M. and Duin, R.P., 2004. "Support vector data
+           description." Machine learning, 54(1), pp.45-66.
+           doi:10.1023/B:MACH.0000008084.60811.49
+
+    .. [2] Chang, W.C., Lee, C.P. and Lin, C.J., 2013. "A revisit
+           to support vector data description (SVDD)." Technical
+           Report, Department of Computer Science, National Taiwan
+           University.
+    """
+    def __init__(self, kernel='rbf', degree=3, gamma='auto', coef0=0.0,
+                 tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
+                 verbose=False, max_iter=-1, random_state=None):
+        super(SVDD, self).__init__(
+            'svdd_l1', kernel, degree, gamma, coef0, tol, 0., nu, 0.,
+            shrinking, False, cache_size, None, verbose, max_iter,
+            random_state)
+
+    def fit(self, X, y=None, sample_weight=None, **params):
+        """Detects the soft minimum volume hypersphere around the sample X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Set of samples, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        sample_weight : array-like, shape (n_samples,)
+            Per-sample weights. Higher weights force the novelty detector
+            to put more emphasis on these points.
+
+        Returns
+        -------
+        self : object
+            Returns self.
+
+        Notes
+        -----
+        If X is not a C-ordered contiguous array it is copied.
+
+        """
+        super(SVDD, self).fit(X, np.ones(_num_samples(X)),
+                              sample_weight=sample_weight, **params)
+        return self
+
+    def decision_function(self, X):
+        """Distance of the samples X to the separating hyperplane.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+
+        Returns
+        -------
+        X : array-like, shape (n_samples,)
+            Returns the decision function of the samples.
+        """
+        dec = self._decision_function(X)
+        return dec
diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx
index 89b36ddb3a813..4ca510c9dcdf3 100644
--- a/sklearn/svm/_libsvm.pyx
+++ b/sklearn/svm/_libsvm.pyx
@@ -74,9 +74,15 @@ def fit(
     Y : array, dtype=float64 of shape (n_samples,)
         target vector
 
+<<<<<<< HEAD
     svm_type : {0, 1, 2, 3, 4}, default=0
         Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
         respectively.
+=======
+    svm_type : {0, 1, 2, 3, 4, 5}, optional
+        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR, NuSVR, or
+        SVDD-L1 respectively. 0 by default.
+>>>>>>> ENH: nu-SVDD with sample weights, based on Chang, Lee, Lin (2013)
 
     kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
         Kernel to use in the model: linear, polynomial, RBF, sigmoid
@@ -608,8 +614,14 @@ def cross_validation(
     Y : array, dtype=float of shape (n_samples,)
         target vector
 
+<<<<<<< HEAD
     n_fold : int32
         Number of folds for cross validation.
+=======
+    svm_type : {0, 1, 2, 3, 4, 5}
+        Type of SVM: C SVC, nu SVC, one class, epsilon SVR, nu SVR,
+        or SVDD-L1.
+>>>>>>> ENH: nu-SVDD with sample weights, based on Chang, Lee, Lin (2013)
 
     svm_type : {0, 1, 2, 3, 4}, default=0
         Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
diff --git a/sklearn/svm/src/libsvm/svm.cpp b/sklearn/svm/src/libsvm/svm.cpp
index de07fecdba2ac..8becae88ece14 100644
--- a/sklearn/svm/src/libsvm/svm.cpp
+++ b/sklearn/svm/src/libsvm/svm.cpp
@@ -1838,6 +1838,118 @@ static void solve_nu_svr(
 	delete[] y;
 }
 
+static void solve_svdd_l1(
+	const PREFIX(problem) *prob, const svm_parameter *param,
+	double *alpha, Solver::SolutionInfo* si)
+{
+	int l = prob->l;
+	int i, j;
+
+	double r_square;
+
+	ONE_CLASS_Q Q = ONE_CLASS_Q(*prob, *param);
+
+	if(param->nu < 1) {
+		// case \nu < 1: the dual problem is
+		//	min 0.5(\alpha^T Q \alpha) + (-0.5 \nu W diag Q)^T \alpha
+		//		e^T \alpha = \nu W
+		//		0 <= alpha_i <= W_i
+		//  W = sum W_i
+
+		schar *ones = new schar[l];
+		double *QD = new double[l];
+		double *linear_term = new double[l];
+			double *C = new double[l];
+
+		double nu_W = 0;
+		for(i=0;i<l;i++)
+		{
+			C[i] = prob->W[i];
+			nu_W += C[i] * param->nu;
+		}
+
+		for(i=0;i<l;i++)
+		{
+			QD[i] = nu_W * Q.get_QD()[i];
+			linear_term[i] = - QD[i] / 2;
+		}
+
+		for(i=0;i<l;i++)
+			ones[i] = 1;
+
+		double sum_alpha = nu_W;
+		i = 0;
+		while(sum_alpha > 0)
+		{
+			alpha[i] = min(C[i], sum_alpha);
+			sum_alpha -= alpha[i];
+			++i;
+		}
+		for(;i<l;i++)
+			alpha[i] = 0;
+
+		Solver s;
+		s.Solve(l, Q, linear_term, ones, alpha, C, param->eps,
+			si, param->shrinking, param->max_iter);
+
+		// Compute R: the solver returns
+		//  obj = 0.5 \alpha^T Q \alpha - 0.5 \nu W sum_i K_{ii}*\alpha_i
+		//  rho = 0.5 \nu W (\alpha^T Q \alpha / (\nu W)^2 - R)
+		r_square = 2*(si->obj - nu_W * si->rho);
+		for(i=0;i<l;i++)
+			r_square += alpha[i] * QD[i];
+		r_square /= nu_W * nu_W;
+
+	        delete[] C;
+		delete[] linear_term;
+		delete[] QD;
+		delete[] ones;
+	} else {
+		// case \nu >= 1: then R = 0, and the SVDD-L1 problem is reduced to
+		//  a quadratic problem with a unique solution independent of \nu.
+		// The centre of the sphere is the average of feature maps with weights W_i.
+
+		info("*\nSVDD-L1 solution independent of nu\n");
+
+		double sum_W = 0;
+		for(i=0;i<l;i++)
+		{
+			alpha[i] = prob->W[i];
+			si->upper_bound[i] = prob->W[i];
+			sum_W += prob->W[i];
+		}
+
+		// Simulate the run of the Solver by computing the objective
+		//  and the intercept:
+		//    obj = 0.5 \alpha^T Q \alpha - 0.5 W sum_i K_{ii}*\alpha_i
+		//    rho = 0.5 \alpha^T Q \alpha / W
+		// note that \sum_i \alpha_i = W.
+		double rho = 0;
+		double obj = 0;
+		double sum;
+		for(i=0;i<l;i++)
+		{
+			const Qfloat *Q_i = Q.get_Q(i,l);
+
+			obj -= sum_W * alpha[i] * Q_i[i] / 2;
+
+			// Utilize the symmetry of Q
+			sum = alpha[i] * Q_i[i] / 2;
+			for(j=i+1;j<l;j++)
+				sum += alpha[j] * Q_i[j];
+			rho += alpha[i] * sum;
+		}
+		si->obj = rho + obj;
+		si->rho = rho / sum_W;
+
+		si->solve_timed_out = false;
+
+		r_square = 0.0;
+	}
+
+	info("R^2 = %f\n",r_square);
+}
+
 //
 // decision_function
 //
@@ -1876,6 +1988,10 @@ static decision_function svm_train_one(
 			si.upper_bound = Malloc(double,2*prob->l); 
  			solve_nu_svr(prob,param,alpha,&si,blas_functions);
  			break;
+		case SVDD_L1:
+			si.upper_bound = Malloc(double,prob->l);
+			solve_svdd_l1(prob,param,alpha,&si);
+			break;
 	}
 
         *status |= si.solve_timed_out;
@@ -2377,9 +2493,10 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 
 	if(param->svm_type == ONE_CLASS ||
 	   param->svm_type == EPSILON_SVR ||
-	   param->svm_type == NU_SVR)
+	   param->svm_type == NU_SVR ||
+	   param->svm_type == SVDD_L1)
 	{
-		// regression or one-class-svm
+		// regression or novelty detection
 		model->nr_class = 2;
 		model->label = NULL;
 		model->nSV = NULL;
@@ -2820,11 +2937,19 @@ double PREFIX(predict_values)(const PREFIX(model) *model, const PREFIX(node) *x,
 	int i;
 	if(model->param.svm_type == ONE_CLASS ||
 	   model->param.svm_type == EPSILON_SVR ||
-	   model->param.svm_type == NU_SVR)
+	   model->param.svm_type == NU_SVR ||
+	   model->param.svm_type == SVDD_L1)
 	{
 		double *sv_coef = model->sv_coef[0];
 		double sum = 0;
-		
+
+		if(model->param.svm_type == SVDD_L1)
+		{
+			double K_xx = NAMESPACE::Kernel::k_function(x,x,model->param) / 2;
+			for(int i=0;i<model->l;i++)
+				sum -= sv_coef[i] * K_xx;
+		}
+
 		for(i=0;i<model->l;i++)
 #ifdef _DENSE_REP
                     sum += sv_coef[i] * NAMESPACE::Kernel::k_function(x,model->SV+i,model->param,blas_functions);
@@ -2834,7 +2959,8 @@ double PREFIX(predict_values)(const PREFIX(model) *model, const PREFIX(node) *x,
 		sum -= model->rho[0];
 		*dec_values = sum;
 
-		if(model->param.svm_type == ONE_CLASS)
+		if(model->param.svm_type == ONE_CLASS ||
+		   model->param.svm_type == SVDD_L1)
 			return (sum>0)?1:-1;
 		else
 			return sum;
@@ -2906,7 +3032,8 @@ double PREFIX(predict)(const PREFIX(model) *model, const PREFIX(node) *x, BlasFu
 	double *dec_values;
 	if(model->param.svm_type == ONE_CLASS ||
 	   model->param.svm_type == EPSILON_SVR ||
-	   model->param.svm_type == NU_SVR)
+	   model->param.svm_type == NU_SVR ||
+	   model->param.svm_type == SVDD_L1)
 		dec_values = Malloc(double, 1);
 	else 
 		dec_values = Malloc(double, nr_class*(nr_class-1)/2);
@@ -3024,7 +3151,8 @@ const char *PREFIX(check_parameter)(const PREFIX(problem) *prob, const svm_param
 	   svm_type != NU_SVC &&
 	   svm_type != ONE_CLASS &&
 	   svm_type != EPSILON_SVR &&
-	   svm_type != NU_SVR)
+	   svm_type != NU_SVR &&
+	   svm_type != SVDD_L1)
 		return "unknown svm type";
 	
 	// kernel_type, degree
@@ -3059,7 +3187,8 @@ const char *PREFIX(check_parameter)(const PREFIX(problem) *prob, const svm_param
 
 	if(svm_type == NU_SVC ||
 	   svm_type == ONE_CLASS ||
-	   svm_type == NU_SVR)
+	   svm_type == NU_SVR ||
+	   svm_type == SVDD_L1)
 		if(param->nu <= 0 || param->nu > 1)
 			return "nu <= 0 or nu > 1";
 
@@ -3076,7 +3205,7 @@ const char *PREFIX(check_parameter)(const PREFIX(problem) *prob, const svm_param
 		return "probability != 0 and probability != 1";
 
 	if(param->probability == 1 &&
-	   svm_type == ONE_CLASS)
+	   (svm_type == ONE_CLASS || svm_type == SVDD_L1))
 		return "one-class SVM probability output not supported yet";
 
 
diff --git a/sklearn/svm/src/libsvm/svm.h b/sklearn/svm/src/libsvm/svm.h
index 518872c67bc5c..b4113d0ef24d2 100644
--- a/sklearn/svm/src/libsvm/svm.h
+++ b/sklearn/svm/src/libsvm/svm.h
@@ -40,7 +40,7 @@ struct svm_csr_problem
 };
 
 
-enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR };	/* svm_type */
+enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR, SVDD_L1 };	/* svm_type */
 enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
 
 struct svm_parameter
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index 3bb6d0f268d07..5ffaf8f0af08c 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -335,6 +335,24 @@ def test_sparse_oneclasssvm(datasets_index, kernel):
     check_svm_model_equal(clf, sp_clf, *dataset)
 
 
+def test_sparse_svdd():
+    """Check that sparse SVDD gives the same result as dense SVDD
+    """
+    # many class dataset:
+    X_blobs, _ = make_blobs(n_samples=100, centers=10, random_state=0)
+    X_blobs = sparse.csr_matrix(X_blobs)
+
+    datasets = [[X_sp, None, T], [X2_sp, None, T2],
+                [X_blobs[:80], None, X_blobs[80:]],
+                [iris.data, None, iris.data]]
+    kernels = ["linear", "poly", "rbf", "sigmoid"]
+    for dataset in datasets:
+        for kernel in kernels:
+            clf = svm.SVDD(kernel=kernel, random_state=0)
+            sp_clf = svm.SVDD(kernel=kernel, random_state=0)
+            check_svm_model_equal(clf, sp_clf, *dataset)
+
+
 def test_sparse_realdata():
     # Test on a subset from the 20newsgroups dataset.
     # This catches some bugs if input is not correctly converted into
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 9cc684d93ea71..b3b864826c546 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -362,6 +362,94 @@ def test_oneclass_fit_params_is_deprecated():
         clf.fit(X, **params)
 
 
+def test_svdd():
+    # Test the output of libsvm for the SVDD problem with default parameters
+    clf = svm.SVDD()
+    clf.fit(X)
+    pred = clf.predict(T)
+
+    assert_array_almost_equal(pred, [-1, -1, -1])
+    assert_array_almost_equal(clf.intercept_, [0.491], decimal=3)
+    assert_array_almost_equal(clf.dual_coef_,
+                              [[0.632, 0.233, 0.633, 0.234, 0.632, 0.633]],
+                              decimal=3)
+    assert_false(hasattr(clf, "coef_"))
+
+
+def test_svdd_decision_function():
+    # For the RBF (stationary) kernel the SVDD and the OneClass SVM
+    #  are identical. Therefore here the test is run on a non-stationary
+    #  kernel.
+
+    # Test SVDD decision function
+    rnd = check_random_state(2)
+
+    # Generate train data
+    X = 0.3 * rnd.randn(100, 2)
+    X_train = np.r_[X + 2, X - 2]
+
+    # Generate some regular novel observations
+    X = 0.3 * rnd.randn(20, 2)
+    X_test = np.r_[X + 2, X - 2]
+
+    # Generate some abnormal novel observations
+    X_outliers = rnd.uniform(low=-4, high=4, size=(20, 2))
+
+    # fit the model
+    clf = svm.SVDD(nu=0.1, kernel="poly", degree=2, coef0=1.0).fit(X_train)
+
+    # predict and validate things
+    y_pred_test = clf.predict(X_test)
+    assert_greater(np.mean(y_pred_test == 1), .9)
+
+    y_pred_outliers = clf.predict(X_outliers)
+    assert_greater(np.mean(y_pred_outliers == -1), .8)
+
+    dec_func_test = clf.decision_function(X_test)
+    assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1)
+
+    dec_func_outliers = clf.decision_function(X_outliers)
+    assert_array_equal((dec_func_outliers > 0).ravel(), y_pred_outliers == 1)
+
+
+def test_oneclass_and_svdd():
+    # Generate a sample: two symmetrically placed clusters
+    rnd = check_random_state(2)
+
+    X = 0.3 * rnd.randn(100, 2)
+    X_train = np.r_[X + 2, X - 2]
+
+    # Test the output of libsvm for the SVDD and the One-Class SVM
+    nu = 0.15
+
+    svdd = svm.SVDD(nu=nu, kernel="rbf")
+    svdd.fit(X_train)
+
+    ocsvm = svm.OneClassSVM(nu=nu, kernel="rbf")
+    ocsvm.fit(X_train)
+
+    # The intercept of the SVDD differs from that of the One-Class SVM:
+    #   `rho_svdd = (aTQa * (nu * l)^(-2) - R) * (nu * l) / 2` ,
+    # and
+    #   `rho_oc = (C0 + aTQa * (nu * l)^(-2) - R) * (nu * l) / 2` ,
+    # since `R = C0 - 2 rho_oc / (nu l) + aTQa * (nu l)^(-2)`,
+    # where `C0 = K(x,x) = K(x-x)` for a stationary K.
+    # >>> The intercept_ value is negative rho!
+    # For the RBF kernel: K(x,y) = exp(-theta * |x-y|^2), the C0 is 1.
+    C0 = 1.0
+    svdd_intercept = (2 * ocsvm.intercept_ + C0 * (nu * X_train.shape[0])) / 2
+    assert_array_almost_equal(svdd.intercept_, svdd_intercept, decimal=3)
+
+    # Evaluate the decision function on a uniformly spaced 2-d mesh
+    xx, yy = np.meshgrid(np.linspace(-5, 5, num=101),
+                         np.linspace(-5, 5, num=101))
+    mesh = np.c_[xx.ravel(), yy.ravel()]
+
+    svdd_df = svdd.decision_function(mesh)
+    ocsvm_df = ocsvm.decision_function(mesh)
+    assert_array_almost_equal(svdd_df, ocsvm_df)
+
+
 def test_tweak_params():
     # Make sure some tweaking of parameters works.
     # We change clf.dual_coef_ at run time and expect .predict() to change
@@ -969,6 +1057,7 @@ def test_immutable_coef_property():
         svm.SVR(kernel="linear").fit(iris.data, iris.target),
         svm.NuSVR(kernel="linear").fit(iris.data, iris.target),
         svm.OneClassSVM(kernel="linear").fit(iris.data),
+        svm.SVDD(kernel='linear').fit(iris.data),
     ]
     for clf in svms:
         with pytest.raises(AttributeError):

From 6d373a62eb8bda0ac129e9b5d97979585f195f3b Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Tue, 27 Dec 2016 11:28:02 +0300
Subject: [PATCH 02/41] a Whatsnew entry and a minor comment fix in base.py

---
 doc/whats_new.rst | 5782 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 5782 insertions(+)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 3354a6b13f32b..a2e79cb930838 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -28,3 +28,5785 @@ on libraries.io to be notified when new versions are released.
     Version 0.14 <whats_new/v0.14.rst>
     Version 0.13 <whats_new/v0.13.rst>
     Older Versions <whats_new/older_versions.rst>
+
+Version 0.20 (under development)
+================================
+
+Changed models
+--------------
+
+The following estimators and functions, when fit with the same data and
+parameters, may produce different models from the previous version. This often
+occurs due to changes in the modelling logic (bug fixes or enhancements), or in
+random sampling procedures.
+
+- :class:`decomposition.IncrementalPCA` in Python 2 (bug fix)
+
+Details are listed in the changelog below.
+
+(While we are trying to better inform users by providing this information, we
+cannot assure that this list is complete.)
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and regressors
+
+- :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` now support early stopping
+  via ``n_iter_no_change``, ``validation_fraction`` and ``tol``. :issue:`7071`
+  by `Raghav RV`_
+
+- Added :class:`naive_bayes.ComplementNB`, which implements the Complement
+  Naive Bayes classifier described in Rennie et al. (2003).
+  By :user:`Michael A. Alcorn <airalcorn2>`.
+
+Enhancements
+............
+
+Model evaluation and meta-estimators
+
+- A scorer based on :func:`metrics.brier_score_loss` is also available.
+  :issue:`9521` by :user:`Hanmin Qin <qinhanmin2014>`.
+
+Bug fixes
+.........
+
+Decomposition, manifold learning and clustering
+
+- Fix for uninformative error in :class:`decomposition.incremental_pca`:
+  now an error is raised if the number of components is larger than the
+  chosen batch size. The ``n_components=None`` case was adapted accordingly.
+  :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
+
+- Fixed a bug where the ``partial_fit`` method of
+  :class:`decomposition.IncrementalPCA` used integer division instead of float
+  division on Python 2 versions. :issue:`9492` by
+  :user:`James Bourbeau <jrbourbeau>`.
+
+Version 0.19
+============
+
+**Release Candidate (0.19b2) July 17, 2017**
+
+Highlights
+----------
+
+We are excited to release a number of great new features including
+:class:`neighbors.LocalOutlierFactor` for anomaly detection,
+:class:`preprocessing.QuantileTransformer` for robust feature transformation,
+and the :class:`multioutput.ClassifierChain` meta-estimator to simply account
+for dependencies between classes in multilabel problems. We have some new
+algorithms in existing estimators, such as multiplicative update in
+:class:`decomposition.NMF` and multinomial
+:class:`linear_model.LogisticRegression` with L1 loss (use ``solver='saga'``).
+
+Cross validation is now able to return the results from multiple metric
+evaluations. The new :func:`model_selection.cross_validate` can return many
+scores on the test data as well as training set performance and timings, and we
+have extended the ``scoring`` and ``refit`` parameters for grid/randomized
+search :ref:`to handle multiple metrics <multimetric_grid_search>`.
+
+You can also learn faster.  For instance, the :ref:`new option to cache
+transformations <pipeline_cache>` in :class:`pipeline.Pipeline` makes grid
+search over pipelines including slow transformations much more efficient.  And
+you can predict faster: if you're sure you know what you're doing, you can turn
+off validating that the input is finite using :func:`config_context`.
+
+We've made some important fixes too.  We've fixed a longstanding implementation
+error in :func:`metrics.average_precision_score`, so please be cautious with
+prior results reported from that function.  A number of errors in the
+:class:`manifold.TSNE` implementation have been fixed, particularly in the
+default Barnes-Hut approximation.  :class:`semi_supervised.LabelSpreading` and
+:class:`semi_supervised.LabelPropagation` have had substantial fixes.
+LabelPropagation was previously broken. LabelSpreading should now correctly
+respect its alpha parameter.
+
+Changed models
+--------------
+
+The following estimators and functions, when fit with the same data and
+parameters, may produce different models from the previous version. This often
+occurs due to changes in the modelling logic (bug fixes or enhancements), or in
+random sampling procedures.
+
+- :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix)
+- :class:`cross_decomposition.PLSRegression`
+  with ``scale=True`` (bug fix)
+- :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix)
+- gradient boosting ``loss='quantile'`` (bug fix)
+- :class:`ensemble.IsolationForest` (bug fix)
+- :class:`feature_selection.SelectFdr` (bug fix)
+- :class:`linear_model.RANSACRegressor` (bug fix)
+- :class:`linear_model.LassoLars` (bug fix)
+- :class:`linear_model.LassoLarsIC` (bug fix)
+- :class:`manifold.TSNE` (bug fix)
+- :class:`neighbors.NearestCentroid` (bug fix)
+- :class:`semi_supervised.LabelSpreading` (bug fix)
+- :class:`semi_supervised.LabelPropagation` (bug fix)
+- tree based models where ``min_weight_fraction_leaf`` is used (enhancement)
+
+Details are listed in the changelog below.
+
+(While we are trying to better inform users by providing this information, we
+cannot assure that this list is complete.)
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and regressors
+
+- Added :class:`multioutput.ClassifierChain` for multi-label
+  classification. By `Adam Kleczewski <adamklec>`_.
+
+- Added solver ``'saga'`` that implements the improved version of Stochastic
+  Average Gradient, in :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.Ridge`. It allows the use of L1 penalty with
+  multinomial logistic loss, and behaves marginally better than 'sag'
+  during the first epochs of ridge and logistic regression.
+  :issue:`8446` by `Arthur Mensch`_.
+
+Other estimators
+
+- Added the :class:`neighbors.LocalOutlierFactor` class for anomaly
+  detection based on nearest neighbors.
+  :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_.
+
+- Added :class:`preprocessing.QuantileTransformer` class and
+  :func:`preprocessing.quantile_transform` function for features
+  normalization based on quantiles.
+  :issue:`8363` by :user:`Denis Engemann <dengemann>`,
+  :user:`Guillaume Lemaitre <glemaitre>`, `Olivier Grisel`_, `Raghav RV`_,
+  :user:`Thierry Guillemot <tguillemot>`, and `Gael Varoquaux`_.
+
+- The new solver ``'mu'`` implements a Multiplicate Update in
+  :class:`decomposition.NMF`, allowing the optimization of all
+  beta-divergences, including the Frobenius norm, the generalized
+  Kullback-Leibler divergence and the Itakura-Saito divergence.
+  :issue:`5295` by `Tom Dupre la Tour`_.
+
+Model selection and evaluation
+
+- :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` now support simultaneous
+  evaluation of multiple metrics. Refer to the
+  :ref:`multimetric_grid_search` section of the user guide for more
+  information. :issue:`7388` by `Raghav RV`_
+
+- Added the :func:`model_selection.cross_validate` which allows evaluation
+  of multiple metrics. This function returns a dict with more useful
+  information from cross-validation such as the train scores, fit times and
+  score times.
+  Refer to :ref:`multimetric_cross_validation` section of the userguide
+  for more information. :issue:`7388` by `Raghav RV`_
+
+- Added :func:`metrics.mean_squared_log_error`, which computes
+  the mean square error of the logarithmic transformation of targets,
+  particularly useful for targets with an exponential trend.
+  :issue:`7655` by :user:`Karan Desai <karandesai-96>`.
+
+- Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which
+  compute Discounted cumulative gain (DCG) and Normalized discounted
+  cumulative gain (NDCG).
+  :issue:`7739` by :user:`David Gasquez <davidgasquez>`.
+
+- Added the :class:`model_selection.RepeatedKFold` and
+  :class:`model_selection.RepeatedStratifiedKFold`.
+  :issue:`8120` by `Neeraj Gangwar`_.
+   - :class:`model_selection.GridSearchCV` and
+     :class:`model_selection.RandomizedSearchCV` now support simultaneous
+     evaluation of multiple metrics. Refer to the
+     :ref:`multimetric_grid_search` section of the user guide for more
+     information. :issue:`7388` by `Raghav RV`_
+
+   - Added the :func:`model_selection.cross_validate` which allows evaluation
+     of multiple metrics. This function returns a dict with more useful
+     information from cross-validation such as the train scores, fit times and
+     score times.
+     Refer to :ref:`multimetric_cross_validation` section of the userguide
+     for more information. :issue:`7388` by `Raghav RV`_
+
+   - Added :func:`metrics.mean_squared_log_error`, which computes
+     the mean square error of the logarithmic transformation of targets,
+     particularly useful for targets with an exponential trend.
+     :issue:`7655` by :user:`Karan Desai <karandesai-96>`.
+
+   - Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which
+     compute Discounted cumulative gain (DCG) and Normalized discounted
+     cumulative gain (NDCG).
+     :issue:`7739` by :user:`David Gasquez <davidgasquez>`.
+     By `Arthur Mensch`_.
+   - Added the :class:`svm.SVDD` class for novelty detection based on
+     soft minimal volume hypersphere around the sample data.
+     By `Ivan Nazarov`_.
+
+   - Added the :class:`model_selection.RepeatedKFold` and
+     :class:`model_selection.RepeatedStratifiedKFold`.
+     :issue:`8120` by `Neeraj Gangwar`_.
+
+Miscellaneous
+
+- Validation that input data contains no NaN or inf can now be suppressed
+  using :func:`config_context`, at your own risk. This will save on runtime,
+  and may be particularly useful for prediction time. :issue:`7548` by
+  `Joel Nothman`_.
+
+- Added a test to ensure parameter listing in docstrings match the
+  function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and
+  `Raghav RV`_.
+
+Enhancements
+............
+
+Trees and ensembles
+
+- The ``min_weight_fraction_leaf`` constraint in tree construction is now
+  more efficient, taking a fast path to declare a node a leaf if its weight
+  is less than 2 * the minimum. Note that the constructed tree will be
+  different from previous versions where ``min_weight_fraction_leaf`` is
+  used. :issue:`7441` by :user:`Nelson Liu <nelson-liu>`.
+
+- :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor`
+  now support sparse input for prediction.
+  :issue:`6101` by :user:`Ibraim Ganiev <olologin>`.
+
+- :class:`ensemble.VotingClassifier` now allows changing estimators by using
+  :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be
+  removed by setting it to ``None``.
+  :issue:`7674` by :user:`Yichuan Liu <yl565>`.
+
+- :func:`tree.export_graphviz` now shows configurable number of decimal
+  places. :issue:`8698` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier`
+  to change output shape of `transform` method to 2 dimensional.
+  :issue:`7794` by :user:`Ibraim Ganiev <olologin>` and
+  :user:`Herilalaina Rakotoarison <herilalaina>`.
+
+Linear, kernelized and related models
+
+- :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`,
+  :class:`linear_model.PassiveAggressiveClassifier`,
+  :class:`linear_model.PassiveAggressiveRegressor` and
+  :class:`linear_model.Perceptron` now expose ``max_iter`` and
+  ``tol`` parameters, to handle convergence more precisely.
+  ``n_iter`` parameter is deprecated, and the fitted estimator exposes
+  a ``n_iter_`` attribute, with actual number of iterations before
+  convergence. :issue:`5036` by `Tom Dupre la Tour`_.
+
+- Added ``average`` parameter to perform weight averaging in
+  :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939`
+  by :user:`Andrea Esuli <aesuli>`.
+
+- :class:`linear_model.RANSACRegressor` no longer throws an error
+  when calling ``fit`` if no inliers are found in its first iteration.
+  Furthermore, causes of skipped iterations are tracked in newly added
+  attributes, ``n_skips_*``.
+  :issue:`7914` by :user:`Michael Horrell <mthorrell>`.
+
+- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
+  is a lot faster with ``return_std=True``. :issue:`8591` by
+  :user:`Hadrien Bertrand <hbertrand>`.
+
+- Added ``return_std`` to ``predict`` method of
+  :class:`linear_model.ARDRegression` and
+  :class:`linear_model.BayesianRidge`.
+  :issue:`7838` by :user:`Sergey Feldman <sergeyf>`.
+
+- Memory usage enhancements: Prevent cast from float32 to float64 in:
+  :class:`linear_model.MultiTaskElasticNet`;
+  :class:`linear_model.LogisticRegression` when using newton-cg solver; and
+  :class:`linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr
+  solvers. :issue:`8835`, :issue:`8061` by :user:`Joan Massich <massich>` and :user:`Nicolas
+  Cordier <ncordier>` and :user:`Thierry Guillemot <tguillemot>`.
+
+Other predictors
+
+- Custom metrics for the :mod:`neighbors` binary trees now have
+  fewer constraints: they must take two 1d-arrays and return a float.
+  :issue:`6288` by `Jake Vanderplas`_.
+
+- ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most
+  appropriate algorithm for all input types and metrics. :issue:`9145` by
+  :user:`Herilalaina Rakotoarison <herilalaina>` and :user:`Reddy Chinthala
+  <preddy5Pradyumna>`.
+
+Decomposition, manifold learning and clustering
+
+- :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans`
+  now use significantly less memory when assigning data points to their
+  nearest cluster center. :issue:`7721` by :user:`Jon Crall <Erotemic>`.
+
+- :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and
+  :class:`decomposition.TruncatedSVD` now expose the singular values
+  from the underlying SVD. They are stored in the attribute
+  ``singular_values_``, like in :class:`decomposition.IncrementalPCA`.
+  :issue:`7685` by :user:`Tommy Löfstedt <tomlof>`
+
+- :class:`decomposition.NMF` now faster when ``beta_loss=0``.
+  :issue:`9277` by :user:`hongkahjun`.
+
+- Memory improvements for method ``barnes_hut`` in :class:`manifold.TSNE`
+  :issue:`7089` by :user:`Thomas Moreau <tomMoral>` and `Olivier Grisel`_.
+
+- Optimization schedule improvements for Barnes-Hut :class:`manifold.TSNE`
+  so the results are closer to the one from the reference implementation
+  `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_ by :user:`Thomas
+  Moreau <tomMoral>` and `Olivier Grisel`_.
+
+- Memory usage enhancements: Prevent cast from float32 to float64 in
+  :class:`decomposition.PCA` and
+  :func:`decomposition.randomized_svd_low_rank`.
+  :issue:`9067` by `Raghav RV`_.
+
+Preprocessing and feature selection
+
+- Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel`
+  to enable selection of the norm order when ``coef_`` is more than 1D.
+  :issue:`6181` by :user:`Antoine Wendlinger <antoinewdg>`.
+
+- Added ability to use sparse matrices in :func:`feature_selection.f_regression`
+  with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune <acadiansith>`.
+
+- Small performance improvement to n-gram creation in
+  :mod:`feature_extraction.text` by binding methods for loops and
+  special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke <jtdoepke>`
+
+- Relax assumption on the data for the
+  :class:`kernel_approximation.SkewedChi2Sampler`. Since the Skewed-Chi2
+  kernel is defined on the open interval :math:`(-skewedness; +\infty)^d`,
+  the transform function should not check whether ``X < 0`` but whether ``X <
+  -self.skewedness``. :issue:`7573` by :user:`Romain Brault <RomainBrault>`.
+
+- Made default kernel parameters kernel-dependent in
+  :class:`kernel_approximation.Nystroem`.
+  :issue:`5229` by :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
+
+Model evaluation and meta-estimators
+
+- :class:`pipeline.Pipeline` is now able to cache transformers
+  within a pipeline by using the ``memory`` constructor parameter.
+  :issue:`7990` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- :class:`pipeline.Pipeline` steps can now be accessed as attributes of its
+  ``named_steps`` attribute. :issue:`8586` by :user:`Herilalaina
+  Rakotoarison <herilalaina>`.
+
+- Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`.
+  :issue:`7723` by :user:`Mikhail Korobov <kmike>`.
+
+- Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`.
+  A ``TypeError`` will be raised for any other kwargs. :issue:`8028`
+  by :user:`Alexander Booth <alexandercbooth>`.
+
+- :class:`model_selection.GridSearchCV`,
+  :class:`model_selection.RandomizedSearchCV` and
+  :func:`model_selection.cross_val_score` now allow estimators with callable
+  kernels which were previously prohibited.
+  :issue:`8005` by `Andreas Müller`_ .
+
+- :func:`model_selection.cross_val_predict` now returns output of the
+  correct shape for all values of the argument ``method``.
+  :issue:`7863` by :user:`Aman Dalmia <dalmia>`.
+
+- Added ``shuffle`` and ``random_state`` parameters to shuffle training
+  data before taking prefixes of it based on training sizes in
+  :func:`model_selection.learning_curve`.
+  :issue:`7506` by :user:`Narine Kokhlikyan <NarineK>`.
+
+- :class:`model_selection.StratifiedShuffleSplit` now works with multioutput
+  multiclass (or multilabel) data.  :issue:`9044` by `Vlad Niculae`_.
+
+- Speed improvements to :class:`model_selection.StratifiedShuffleSplit`.
+  :issue:`5991` by :user:`Arthur Mensch <arthurmensch>` and `Joel Nothman`_.
+
+- Add ``shuffle`` parameter to :func:`model_selection.train_test_split`.
+  :issue:`8845` by  :user:`themrmax <themrmax>`
+
+- :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier`
+  now support online learning using ``partial_fit``.
+  :issue: `8053` by :user:`Peng Yu <yupbank>`.
+
+- Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit`
+  :issue:`8282` by :user:`Aman Dalmia <dalmia>`.
+
+- More clustering metrics are now available through :func:`metrics.get_scorer`
+  and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_.
+
+- A scorer based on :func:`metrics.explained_variance_score` is also available.
+  :issue:`9259` by :user:`Hanmin Qin <qinhanmin2014>`.
+
+Metrics
+
+- :func:`metrics.matthews_corrcoef` now support multiclass classification.
+  :issue:`8094` by :user:`Jon Crall <Erotemic>`.
+
+- Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score`.
+  :issue:`8335` by :user:`Victor Poughon <vpoughon>`.
+
+Miscellaneous
+
+- :func:`utils.check_estimator` now attempts to ensure that methods
+  transform, predict, etc.  do not set attributes on the estimator.
+  :issue:`7533` by :user:`Ekaterina Krivich <kiote>`.
+
+- Added type checking to the ``accept_sparse`` parameter in
+  :mod:`utils.validation` methods. This parameter now accepts only boolean,
+  string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and
+  should be replaced by ``accept_sparse=False``.
+  :issue:`7880` by :user:`Josh Karnofsky <jkarno>`.
+
+- Make it possible to load a chunk of an svmlight formatted file by
+  passing a range of bytes to :func:`datasets.load_svmlight_file`.
+  :issue:`935` by :user:`Olivier Grisel <ogrisel>`.
+
+- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`
+  now accept non-finite features. :issue:`8931` by :user:`Attractadore`.
+
+Bug fixes
+.........
+
+Trees and ensembles
+
+- Fixed a memory leak in trees when using trees with ``criterion='mae'``.
+  :issue:`8002` by `Raghav RV`_.
+
+- Fixed a bug where :class:`ensemble.IsolationForest` uses an
+  an incorrect formula for the average path length
+  :issue:`8549` by `Peter Wang <https://github.com/PTRWang>`_.
+
+- Fixed a bug where :class:`ensemble.AdaBoostClassifier` throws
+  ``ZeroDivisionError`` while fitting data with single class labels.
+  :issue:`7501` by :user:`Dominik Krzeminski <dokato>`.
+
+- Fixed a bug in :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` where a float being compared
+  to ``0.0`` using ``==`` caused a divide by zero error. :issue:`7970` by
+  :user:`He Chen <chenhe95>`.
+
+- Fix a bug where :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` ignored the
+  ``min_impurity_split`` parameter.
+  :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
+
+- Fixed ``oob_score`` in :class:`ensemble.BaggingClassifier`.
+  :issue:`8936` by :user:`Michael Lewis <mlewis1729>`
+
+- Fixed excessive memory usage in prediction for random forests estimators.
+  :issue:`8672` by :user:`Mike Benfield <mikebenfield>`.
+
+- Fixed a bug where ``sample_weight`` as a list broke random forests in Python 2
+  :issue:`8068` by :user:`xor`.
+
+- Fixed a bug where :class:`ensemble.IsolationForest` fails when
+  ``max_features`` is less than 1.
+  :issue:`5732` by :user:`Ishank Gulati <IshankGulati>`.
+
+- Fix a bug where gradient boosting with ``loss='quantile'`` computed
+  negative errors for negative values of ``ytrue - ypred`` leading to wrong
+  values when calling ``__call__``.
+  :issue:`8087` by :user:`Alexis Mignon <AlexisMignon>`
+
+- Fix a bug where :class:`ensemble.VotingClassifier` raises an error
+  when a numpy array is passed in for weights. :issue:`7983` by
+  :user:`Vincent Pham <vincentpham1991>`.
+
+- Fixed a bug where :func:`tree.export_graphviz` raised an error
+  when the length of features_names does not match n_features in the decision
+  tree. :issue:`8512` by :user:`Li Li <aikinogard>`.
+
+Linear, kernelized and related models
+
+- Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until
+  ``max_iter`` if it finds a large inlier group early. :issue:`8251` by
+  :user:`aivision2020`.
+
+- Fixed a bug where :class:`naive_bayes.MultinomialNB` and
+  :class:`naive_bayes.BernoulliNB` failed when ``alpha=0``. :issue:`5814` by
+  :user:`Yichuan Liu <yl565>` and :user:`Herilalaina Rakotoarison
+  <herilalaina>`.
+
+- Fixed a bug where :class:`linear_model.LassoLars` does not give
+  the same result as the LassoLars implementation available
+  in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`.
+
+- Fixed a bug in :class:`linear_model.RandomizedLasso`,
+  :class:`linear_model.Lars`, :class:`linear_model.LassoLars`,
+  :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`,
+  where the parameter ``precompute`` was not used consistently across
+  classes, and some values proposed in the docstring could raise errors.
+  :issue:`5359` by `Tom Dupre la Tour`_.
+
+- Fix inconsistent results between :class:`linear_model.RidgeCV` and
+  :class:`linear_model.Ridge` when using ``normalize=True``. :issue:`9302`
+  by `Alexandre Gramfort`_.
+
+- Fix a bug where :func:`linear_model.LassoLars.fit` sometimes
+  left ``coef_`` as a list, rather than an ndarray.
+  :issue:`8160` by :user:`CJ Carey <perimosocordiae>`.
+
+- Fix :func:`linear_model.BayesianRidge.fit` to return
+  ridge parameter ``alpha_`` and ``lambda_`` consistent with calculated
+  coefficients ``coef_`` and ``intercept_``.
+  :issue:`8224` by :user:`Peter Gedeck <gedeck>`.
+
+- Fixed a bug in :class:`svm.OneClassSVM` where it returned floats instead of
+  integer classes. :issue:`8676` by :user:`Vathsala Achar <VathsalaAchar>`.
+
+- Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`.
+  :issue:`9022` by `Alexandre Gramfort`_ and :user:`Mehmet Basbug <mehmetbasbug>`.
+
+- Fixed a memory leak in our LibLinear implementation. :issue:`9024` by
+  :user:`Sergei Lebedev <superbobry>`
+
+- Fix bug where stratified CV splitters did not work with
+  :class:`linear_model.LassoCV`. :issue:`8973` by
+  :user:`Paulo Haddad <paulochf>`.
+
+- Fixed a bug in :class:`gaussian_process.GaussianProcessRegressor`
+  when the standard deviation and covariance predicted without fit
+  would fail with a unmeaningful error by default.
+  :issue:`6573` by :user:`Quazi Marufur Rahman <qmaruf>` and
+  `Manoj Kumar`_.
+
+Other predictors
+
+- Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement
+  ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced
+  papers. :issue:`9239`
+  by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
+  <musically-ut>`, and `Joel Nothman`_.
+
+Decomposition, manifold learning and clustering
+
+- Fixed the implementation of :class:`manifold.TSNE`:
+- ``early_exageration`` parameter had no effect and is now used for the
+  first 250 optimization iterations.
+- Fixed the ``AssertionError: Tree consistency failed`` exception
+  reported in :issue:`8992`.
+- Improve the learning schedule to match the one from the reference
+  implementation `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_.
+     by :user:`Thomas Moreau <tomMoral>` and `Olivier Grisel`_.
+
+- Fix a bug in :class:`decomposition.LatentDirichletAllocation`
+  where the ``perplexity`` method was returning incorrect results because
+  the ``transform`` method returns normalized document topic distributions
+  as of version 0.18. :issue:`7954` by :user:`Gary Foreman <garyForeman>`.
+
+- Fix output shape and bugs with n_jobs > 1 in
+  :class:`decomposition.SparseCoder` transform and
+  :func:`decomposition.sparse_encode`
+  for one-dimensional data and one component.
+  This also impacts the output shape of :class:`decomposition.DictionaryLearning`.
+  :issue:`8086` by `Andreas Müller`_.
+
+- Fixed the implementation of ``explained_variance_``
+  in :class:`decomposition.PCA`,
+  :class:`decomposition.RandomizedPCA` and
+  :class:`decomposition.IncrementalPCA`.
+  :issue:`9105` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
+- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`.
+  :issue:`9108` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
+- Fixed a bug where :class:`cluster.DBSCAN` gives incorrect
+  result when input is a precomputed sparse matrix with initial
+  rows all zero. :issue:`8306` by :user:`Akshay Gupta <Akshay0724>`
+
+- Fix a bug regarding fitting :class:`cluster.KMeans` with a sparse
+  array X and initial centroids, where X's means were unnecessarily being
+  subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky <jkarno>`.
+
+- Fixes to the input validation in :class:`covariance.EllipticEnvelope`.
+  :issue:`8086` by `Andreas Müller`_.
+
+- Fixed a bug in :class:`covariance.MinCovDet` where inputting data
+  that produced a singular covariance matrix would cause the helper method
+  ``_c_step`` to throw an exception.
+  :issue:`3367` by :user:`Jeremy Steward <ThatGeoGuy>`
+
+- Fixed a bug in :class:`manifold.TSNE` affecting convergence of the
+  gradient descent. :issue:`8768` by :user:`David DeTomaso <deto>`.
+
+- Fixed a bug in :class:`manifold.TSNE` where it stored the incorrect
+  ``kl_divergence_``. :issue:`6507` by :user:`Sebastian Saeger <ssaeger>`.
+
+- Fixed improper scaling in :class:`cross_decomposition.PLSRegression`
+  with ``scale=True``. :issue:`7819` by :user:`jayzed82 <jayzed82>`.
+
+- :class:`cluster.bicluster.SpectralCoclustering` and
+  :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms
+  with API by accepting ``y`` and returning the object.  :issue:`6126`,
+  :issue:`7814` by :user:`Laurent Direr <ldirer>` and :user:`Maniteja
+  Nandana <maniteja123>`.
+
+- Fix bug where :mod:`mixture` ``sample`` methods did not return as many
+  samples as requested. :issue:`7702` by :user:`Levi John Wolf <ljwolf>`.
+
+- Fixed the shrinkage implementation in :class:`neighbors.NearestCentroid`.
+  :issue:`9219` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
+Preprocessing and feature selection
+
+- For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True``
+  will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with
+  norm 'max' the norms returned will be the same as for dense matrices.
+  :issue:`7771` by `Ang Lu <https://github.com/luang008>`_.
+
+- Fix a bug where :class:`feature_selection.SelectFdr` did not
+  exactly implement Benjamini-Hochberg procedure. It formerly may have
+  selected fewer features than it should.
+  :issue:`7490` by :user:`Peng Meng <mpjlu>`.
+
+- Fixed a bug where :class:`linear_model.RandomizedLasso` and
+  :class:`linear_model.RandomizedLogisticRegression` breaks for
+  sparse input. :issue:`8259` by :user:`Aman Dalmia <dalmia>`.
+
+- Fix a bug where :class:`feature_extraction.FeatureHasher`
+  mandatorily applied a sparse random projection to the hashed features,
+  preventing the use of
+  :class:`feature_extraction.text.HashingVectorizer` in a
+  pipeline with  :class:`feature_extraction.text.TfidfTransformer`.
+  :issue:`7565` by :user:`Roman Yurchak <rth>`.
+
+- Fix a bug where :class:`feature_selection.mutual_info_regression` did not
+  correctly use ``n_neighbors``. :issue:`8181` by :user:`Guillaume Lemaitre
+  <glemaitre>`.
+
+Model evaluation and meta-estimators
+
+- Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform`
+  returns ``self.best_estimator_.transform()`` instead of
+  ``self.best_estimator_.inverse_transform()``.
+  :issue:`8344` by :user:`Akshay Gupta <Akshay0724>` and :user:`Rasmus Eriksson <MrMjauh>`.
+
+- Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`,
+  :class:`model_selection.RandomizedSearchCV`,  :class:`grid_search.GridSearchCV`,
+  and  :class:`grid_search.RandomizedSearchCV` that matches the ``classes_``
+  attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295`
+  by :user:`Alyssa Batula <abatula>`, :user:`Dylan Werner-Meier <unautre>`,
+  and :user:`Stephen Hoover <stephen-hoover>`.
+
+- Fixed a bug where :func:`model_selection.validation_curve`
+  reused the same estimator for each parameter value.
+  :issue:`7365` by :user:`Aleksandr Sandrovskii <Sundrique>`.
+
+- :func:`model_selection.permutation_test_score` now works with Pandas
+  types. :issue:`5697` by :user:`Stijn Tonk <equialgo>`.
+
+- Several fixes to input validation in
+  :class:`multiclass.OutputCodeClassifier`
+  :issue:`8086` by `Andreas Müller`_.
+
+- :class:`multiclass.OneVsOneClassifier`'s ``partial_fit`` now ensures all
+  classes are provided up-front. :issue:`6250` by
+  :user:`Asish Panda <kaichogami>`.
+
+- Fix :func:`multioutput.MultiOutputClassifier.predict_proba` to return a
+  list of 2d arrays, rather than a 3d array. In the case where different
+  target columns had different numbers of classes, a ``ValueError`` would be
+  raised on trying to stack matrices with different dimensions.
+  :issue:`8093` by :user:`Peter Bull <pjbull>`.
+
+- Cross validation now works with Pandas datatypes that that have a
+  read-only index. :issue:`9507` by `Loic Esteve`_.
+
+Metrics
+
+- :func:`metrics.average_precision_score` no longer linearly
+  interpolates between operating points, and instead weighs precisions
+  by the change in recall since the last operating point, as per the
+  `Wikipedia entry <http://en.wikipedia.org/wiki/Average_precision>`_.
+  (`#7356 <https://github.com/scikit-learn/scikit-learn/pull/7356>`_). By
+  :user:`Nick Dingwall <ndingwall>` and `Gael Varoquaux`_.
+
+- Fix a bug in :func:`metrics.classification._check_targets`
+  which would return ``'binary'`` if ``y_true`` and ``y_pred`` were
+  both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was
+  ``'multiclass'``. :issue:`8377` by `Loic Esteve`_.
+
+- Fixed an integer overflow bug in :func:`metrics.confusion_matrix` and
+  hence :func:`metrics.cohen_kappa_score`. :issue:`8354`, :issue:`7929`
+  by `Joel Nothman`_ and :user:`Jon Crall <Erotemic>`.
+
+- Fixed passing of ``gamma`` parameter to the ``chi2`` kernel in
+  :func:`metrics.pairwise.pairwise_kernels` :issue:`5211` by
+  :user:`Nick Rhinehart <nrhine1>`,
+  :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
+
+Miscellaneous
+
+- Fixed a bug when :func:`datasets.make_classification` fails
+  when generating more than 30 features. :issue:`8159` by
+  :user:`Herilalaina Rakotoarison <herilalaina>`.
+
+- Fixed a bug where :func:`datasets.make_moons` gives an
+  incorrect result when ``n_samples`` is odd.
+  :issue:`8198` by :user:`Josh Levy <levy5674>`.
+
+- Some ``fetch_`` functions in :mod:`datasets` were ignoring the
+  ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers <rgommers>`.
+
+- Fix estimators to accept a ``sample_weight`` parameter of type
+  ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
+  `Kathleen Chen`_.
+
+- Fix a bug in cases where ``numpy.cumsum`` may be numerically unstable,
+  raising an exception if instability is identified. :issue:`7376` and
+  :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`.
+
+- Fix a bug where :meth:`base.BaseEstimator.__getstate__`
+  obstructed pickling customizations of child-classes, when used in a
+  multiple inheritance context.
+  :issue:`8316` by :user:`Holger Peters <HolgerPeters>`.
+
+- Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in
+  documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` by
+  :user:`Oscar Najera <Titan-C>`
+
+- Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99`.
+  :issue:`9289` by `Loic Esteve`_.
+
+- Fix dataset loaders using Python 3 version of makedirs to also work in
+  Python 2. :issue:`9284` by :user:`Sebastin Santy <SebastinSanty>`.
+
+- Several minor issues were fixed with thanks to the alerts of
+  [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie <jhelie>`,
+  among others.
+
+API changes summary
+-------------------
+
+Trees and ensembles
+
+- Gradient boosting base models are no longer estimators. By `Andreas Müller`_.
+
+- All tree based estimators now accept a ``min_impurity_decrease``
+  parameter in lieu of the ``min_impurity_split``, which is now deprecated.
+  The ``min_impurity_decrease`` helps stop splitting the nodes in which
+  the weighted impurity decrease from splitting is no longer alteast
+  ``min_impurity_decrease``.  :issue:`8449` by `Raghav RV`_.
+
+Linear, kernelized and related models
+
+- ``n_iter`` parameter is deprecated in :class:`linear_model.SGDClassifier`,
+  :class:`linear_model.SGDRegressor`,
+  :class:`linear_model.PassiveAggressiveClassifier`,
+  :class:`linear_model.PassiveAggressiveRegressor` and
+  :class:`linear_model.Perceptron`. By `Tom Dupre la Tour`_.
+
+Other predictors
+
+- :class:`neighbors.LSHForest` has been deprecated and will be
+  removed in 0.21 due to poor performance.
+  :issue:`9078` by :user:`Laurent Direr <ldirer>`.
+
+- :class:`neighbors.NearestCentroid` no longer purports to support
+  ``metric='precomputed'`` which now raises an error. :issue:`8515` by
+  :user:`Sergul Aydore <sergulaydore>`.
+
+- The ``alpha`` parameter of :class:`semi_supervised.LabelPropagation` now
+  has no effect and is deprecated to be removed in 0.21. :issue:`9239`
+  by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
+  <musically-ut>`, and `Joel Nothman`_.
+
+Decomposition, manifold learning and clustering
+
+- Deprecate the ``doc_topic_distr`` argument of the ``perplexity`` method
+  in :class:`decomposition.LatentDirichletAllocation` because the
+  user no longer has access to the unnormalized document topic distribution
+  needed for the perplexity calculation. :issue:`7954` by
+  :user:`Gary Foreman <garyForeman>`.
+
+- The ``n_topics`` parameter of :class:`decomposition.LatentDirichletAllocation`
+  has been renamed to ``n_components`` and will be removed in version 0.21.
+  :issue:`8922` by :user:`Attractadore`.
+
+- :meth:`decomposition.SparsePCA.transform`'s ``ridge_alpha`` parameter is
+  deprecated in preference for class parameter.
+  :issue:`8137` by :user:`Naoya Kanai <naoyak>`.
+
+- :class:`cluster.DBSCAN` now has a ``metric_params`` parameter.
+  :issue:`8139` by :user:`Naoya Kanai <naoyak>`.
+
+Preprocessing and feature selection
+
+- :class:`feature_selection.SelectFromModel` now has a ``partial_fit``
+  method only if the underlying estimator does. By `Andreas Müller`_.
+
+- :class:`feature_selection.SelectFromModel` now validates the ``threshold``
+  parameter and sets the ``threshold_`` attribute during the call to
+  ``fit``, and no longer during the call to ``transform```. By `Andreas
+  Müller`_.
+
+- The ``non_negative`` parameter in :class:`feature_extraction.FeatureHasher`
+  has been deprecated, and replaced with a more principled alternative,
+  ``alternate_sign``.
+  :issue:`7565` by :user:`Roman Yurchak <rth>`.
+
+- :class:`linear_model.RandomizedLogisticRegression`,
+  and :class:`linear_model.RandomizedLasso` have been deprecated and will
+  be removed in version 0.21.
+  :issue:`8995` by :user:`Ramana.S <sentient07>`.
+
+Model evaluation and meta-estimators
+
+- Deprecate the ``fit_params`` constructor input to the
+  :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` in favor
+  of passing keyword parameters to the ``fit`` methods
+  of those classes. Data-dependent parameters needed for model
+  training should be passed as keyword arguments to ``fit``,
+  and conforming to this convention will allow the hyperparameter
+  selection classes to be used with tools such as
+  :func:`model_selection.cross_val_predict`.
+  :issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.
+
+- In version 0.21, the default behavior of splitters that use the
+  ``test_size`` and ``train_size`` parameter will change, such that
+  specifying ``train_size`` alone will cause ``test_size`` to be the
+  remainder. :issue:`7459` by :user:`Nelson Liu <nelson-liu>`.
+
+- :class:`multiclass.OneVsRestClassifier` now has ``partial_fit``,
+  ``decision_function`` and ``predict_proba`` methods only when the
+  underlying estimator does.  :issue:`7812` by `Andreas Müller`_ and
+  :user:`Mikhail Korobov <kmike>`.
+
+- :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method
+  only if the underlying estimator does.  By `Andreas Müller`_.
+
+- The ``decision_function`` output shape for binary classification in
+  :class:`multiclass.OneVsRestClassifier` and
+  :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform
+  to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_.
+
+- The :func:`multioutput.MultiOutputClassifier.predict_proba`
+  function used to return a 3d array (``n_samples``, ``n_classes``,
+  ``n_outputs``). In the case where different target columns had different
+  numbers of classes, a ``ValueError`` would be raised on trying to stack
+  matrices with different dimensions. This function now returns a list of
+  arrays where the length of the list is ``n_outputs``, and each array is
+  (``n_samples``, ``n_classes``) for that particular output.
+  :issue:`8093` by :user:`Peter Bull <pjbull>`.
+
+- Replace attribute ``named_steps`` ``dict`` to :class:`utils.Bunch`
+  in :class:`pipeline.Pipeline` to enable tab completion in interactive
+  environment. In the case conflict value on ``named_steps`` and ``dict``
+  attribute, ``dict`` behavior will be prioritized.
+  :issue:`8481` by :user:`Herilalaina Rakotoarison <herilalaina>`.
+
+Miscellaneous
+
+- Deprecate the ``y`` parameter in ``transform`` and ``inverse_transform``.
+  The method  should not accept ``y`` parameter, as it's used at the prediction time.
+  :issue:`8174` by :user:`Tahar Zanouda <tzano>`, `Alexandre Gramfort`_
+  and `Raghav RV`_.
+
+- SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions
+  for scikit-learn. The following backported functions in
+  :mod:`utils` have been removed or deprecated accordingly.
+  :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai <naoyak>`
+
+- The ``store_covariances`` and ``covariances_`` parameters of
+  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`
+  has been renamed to ``store_covariance`` and ``covariance_`` to be
+  consistent with the corresponding parameter names of the
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis`. They will be
+  removed in version 0.21. :issue:`7998` by :user:`Jiacheng <mrbeann>`
+
+  Removed in 0.19:
+
+  - ``utils.fixes.argpartition``
+  - ``utils.fixes.array_equal``
+  - ``utils.fixes.astype``
+  - ``utils.fixes.bincount``
+  - ``utils.fixes.expit``
+  - ``utils.fixes.frombuffer_empty``
+  - ``utils.fixes.in1d``
+  - ``utils.fixes.norm``
+  - ``utils.fixes.rankdata``
+  - ``utils.fixes.safe_copy``
+
+  Deprecated in 0.19, to be removed in 0.21:
+
+  - ``utils.arpack.eigs``
+  - ``utils.arpack.eigsh``
+  - ``utils.arpack.svds``
+  - ``utils.extmath.fast_dot``
+  - ``utils.extmath.logsumexp``
+  - ``utils.extmath.norm``
+  - ``utils.extmath.pinvh``
+  - ``utils.graph.graph_laplacian``
+  - ``utils.random.choice``
+  - ``utils.sparsetools.connected_components``
+  - ``utils.stats.rankdata``
+
+- Estimators with both methods ``decision_function`` and ``predict_proba``
+  are now required to have a monotonic relation between them. The
+  method ``check_decision_proba_consistency`` has been added in
+  **utils.estimator_checks** to check their consistency.
+  :issue:`7578` by :user:`Shubham Bhardwaj <shubham0704>`
+
+- All checks in ``utils.estimator_checks``, in particular
+  :func:`utils.estimator_checks.check_estimator` now accept estimator
+  instances. Most other checks do not accept
+  estimator classes any more. :issue:`9019` by `Andreas Müller`_.
+
+- Ensure that estimators' attributes ending with ``_`` are not set
+  in the constructor but only in the ``fit`` method. Most notably,
+  ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`)
+  now only have ``self.estimators_`` available after ``fit``.
+  :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_.
+
+
+Code and Documentation Contributors
+-----------------------------------
+
+Thanks to everyone who has contributed to the maintenance and improvement of the
+project since version 0.18, including:
+
+Joel Nothman, Loic Esteve, Andreas Mueller, Guillaume Lemaitre, Olivier Grisel,
+Hanmin Qin, Raghav RV, Alexandre Gramfort, themrmax, Aman Dalmia, Gael
+Varoquaux, Naoya Kanai, Tom Dupré la Tour, Rishikesh, Nelson Liu, Taehoon Lee,
+Nelle Varoquaux, Aashil, Mikhail Korobov, Sebastin Santy, Joan Massich, Roman
+Yurchak, RAKOTOARISON Herilalaina, Thierry Guillemot, Alexandre Abadie, Carol
+Willing, Balakumaran Manoharan, Josh Karnofsky, Vlad Niculae, Utkarsh Upadhyay,
+Dmitry Petrov, Minghui Liu, Srivatsan, Vincent Pham, Albert Thomas, Jake
+VanderPlas, Attractadore, JC Liu, alexandercbooth, chkoar, Óscar Nájera,
+Aarshay Jain, Kyle Gilliam, Ramana Subramanyam, CJ Carey, Clement Joudet, David
+Robles, He Chen, Joris Van den Bossche, Karan Desai, Katie Luangkote, Leland
+McInnes, Maniteja Nandana, Michele Lacchia, Sergei Lebedev, Shubham Bhardwaj,
+akshay0724, omtcyfz, rickiepark, waterponey, Vathsala Achar, jbDelafosse, Ralf
+Gommers, Ekaterina Krivich, Vivek Kumar, Ishank Gulati, Dave Elliott, ldirer,
+Reiichiro Nakano, Levi John Wolf, Mathieu Blondel, Sid Kapur, Dougal J.
+Sutherland, midinas, mikebenfield, Sourav Singh, Aseem Bansal, Ibraim Ganiev,
+Stephen Hoover, AishwaryaRK, Steven C. Howell, Gary Foreman, Neeraj Gangwar,
+Tahar, Jon Crall, dokato, Kathy Chen, ferria, Thomas Moreau, Charlie Brummitt,
+Nicolas Goix, Adam Kleczewski, Sam Shleifer, Nikita Singh, Basil Beirouti,
+Giorgio Patrini, Manoj Kumar, Rafael Possas, James Bourbeau, James A. Bednar,
+Janine Harper, Jaye, Jean Helie, Jeremy Steward, Artsiom, John Wei, Jonathan
+LIgo, Jonathan Rahn, seanpwilliams, Arthur Mensch, Josh Levy, Julian Kuhlmann,
+Julien Aubert, Jörn Hees, Kai, shivamgargsya, Kat Hempstalk, Kaushik
+Lakshmikanth, Kennedy, Kenneth Lyons, Kenneth Myers, Kevin Yap, Kirill Bobyrev,
+Konstantin Podshumok, Arthur Imbert, Lee Murray, toastedcornflakes, Lera, Li
+Li, Arthur Douillard, Mainak Jas, tobycheese, Manraj Singh, Manvendra Singh,
+Marc Meketon, MarcoFalke, Matthew Brett, Matthias Gilch, Mehul Ahuja, Melanie
+Goetz, Meng, Peng, Michael Dezube, Michal Baumgartner, vibrantabhi19, Artem
+Golubin, Milen Paskov, Antonin Carette, Morikko, MrMjauh, NALEPA Emmanuel,
+Namiya, Antoine Wendlinger, Narine Kokhlikyan, NarineK, Nate Guerin, Angus
+Williams, Ang Lu, Nicole Vavrova, Nitish Pandey, Okhlopkov Daniil Olegovich,
+Andy Craze, Om Prakash, Parminder Singh, Patrick Carlson, Patrick Pei, Paul
+Ganssle, Paulo Haddad, Paweł Lorek, Peng Yu, Pete Bachant, Peter Bull, Peter
+Csizsek, Peter Wang, Pieter Arthur de Jong, Ping-Yao, Chang, Preston Parry,
+Puneet Mathur, Quentin Hibon, Andrew Smith, Andrew Jackson, 1kastner, Rameshwar
+Bhaskaran, Rebecca Bilbro, Remi Rampin, Andrea Esuli, Rob Hall, Robert
+Bradshaw, Romain Brault, Aman Pratik, Ruifeng Zheng, Russell Smith, Sachin
+Agarwal, Sailesh Choyal, Samson Tan, Samuël Weber, Sarah Brown, Sebastian
+Pölsterl, Sebastian Raschka, Sebastian Saeger, Alyssa Batula, Abhyuday Pratap
+Singh, Sergey Feldman, Sergul Aydore, Sharan Yalburgi, willduan, Siddharth
+Gupta, Sri Krishna, Almer, Stijn Tonk, Allen Riddell, Theofilos Papapanagiotou,
+Alison, Alexis Mignon, Tommy Boucher, Tommy Löfstedt, Toshihiro Kamishima,
+Tyler Folkman, Tyler Lanigan, Alexander Junge, Varun Shenoy, Victor Poughon,
+Vilhelm von Ehrenheim, Aleksandr Sandrovskii, Alan Yee, Vlasios Vasileiou,
+Warut Vijitbenjaronk, Yang Zhang, Yaroslav Halchenko, Yichuan Liu, Yuichi
+Fujikawa, affanv14, aivision2020, xor, andreh7, brady salz, campustrampus,
+Agamemnon Krasoulis, ditenberg, elena-sharova, filipj8, fukatani, gedeck,
+guiniol, guoci, hakaa1, hongkahjun, i-am-xhy, jakirkham, jaroslaw-weber,
+jayzed82, jeroko, jmontoyam, jonathan.striebel, josephsalmon, jschendel,
+leereeves, martin-hahn, mathurinm, mehak-sachdeva, mlewis1729, mlliou112,
+mthorrell, ndingwall, nuffe, yangarbiter, plagree, pldtc325, Breno Freitas,
+Brett Olsen, Brian A. Alfano, Brian Burns, polmauri, Brandon Carter, Charlton
+Austin, Chayant T15h, Chinmaya Pancholi, Christian Danielsen, Chung Yen,
+Chyi-Kwei Yau, pravarmahajan, DOHMATOB Elvis, Daniel LeJeune, Daniel Hnyk,
+Darius Morawiec, David DeTomaso, David Gasquez, David Haberthür, David
+Heryanto, David Kirkby, David Nicholson, rashchedrin, Deborah Gertrude Digges,
+Denis Engemann, Devansh D, Dickson, Bob Baxley, Don86, E. Lynch-Klarup, Ed
+Rogers, Elizabeth Ferriss, Ellen-Co2, Fabian Egli, Fang-Chieh Chou, Bing Tian
+Dai, Greg Stupp, Grzegorz Szpak, Bertrand Thirion, Hadrien Bertrand, Harizo
+Rajaona, zxcvbnius, Henry Lin, Holger Peters, Icyblade Dai, Igor
+Andriushchenko, Ilya, Isaac Laughlin, Iván Vallés, Aurélien Bellet, JPFrancoia,
+Jacob Schreiber, Asish Mahapatra
+
+.. _changes_0_18_2:
+
+Version 0.18.2
+==============
+
+**June 20, 2017**
+
+.. topic:: Last release with Python 2.6 support
+
+    Scikit-learn 0.18 is the last major release of scikit-learn to support Python 2.6.
+    Later versions of scikit-learn will require Python 2.7 or above.
+
+
+Changelog
+---------
+
+- Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by
+  `Loic Esteve`_.
+
+- Minor compatibility changes in the examples :issue:`9010` :issue:`8040`
+  :issue:`9149`.
+
+Code Contributors
+-----------------
+Aman Dalmia, Loic Esteve, Nate Guerin, Sergei Lebedev
+
+
+.. _changes_0_18_1:
+
+Version 0.18.1
+==============
+
+**November 11, 2016**
+
+Changelog
+---------
+
+Enhancements
+............
+
+- Improved ``sample_without_replacement`` speed by utilizing
+  numpy.random.permutation for most cases. As a result,
+  samples may differ in this release for a fixed random state.
+  Affected estimators:
+
+  - :class:`ensemble.BaggingClassifier`
+  - :class:`ensemble.BaggingRegressor`
+  - :class:`linear_model.RANSACRegressor`
+  - :class:`model_selection.RandomizedSearchCV`
+  - :class:`random_projection.SparseRandomProjection`
+
+  This also affects the :meth:`datasets.make_classification`
+  method.
+
+Bug fixes
+.........
+
+- Fix issue where ``min_grad_norm`` and ``n_iter_without_progress``
+  parameters were not being utilised by :class:`manifold.TSNE`.
+  :issue:`6497` by :user:`Sebastian Säger <ssaeger>`
+
+- Fix bug for svm's decision values when ``decision_function_shape``
+  is ``ovr`` in :class:`svm.SVC`.
+  :class:`svm.SVC`'s decision_function was incorrect from versions
+  0.17.0 through 0.18.0.
+  :issue:`7724` by `Bing Tian Dai`_
+
+- Attribute ``explained_variance_ratio`` of
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated
+  with SVD and Eigen solver are now of the same length. :issue:`7632`
+  by :user:`JPFrancoia <JPFrancoia>`
+
+- Fixes issue in :ref:`univariate_feature_selection` where score
+  functions were not accepting multi-label targets. :issue:`7676`
+  by :user:`Mohammed Affan <affanv14>`
+
+- Fixed setting parameters when calling ``fit`` multiple times on
+  :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_
+
+- Fixes issue in ``partial_fit`` method of
+  :class:`multiclass.OneVsRestClassifier` when number of classes used in
+  ``partial_fit`` was less than the total number of classes in the
+  data. :issue:`7786` by `Srivatsan Ramesh`_
+
+- Fixes issue in :class:`calibration.CalibratedClassifierCV` where
+  the sum of probabilities of each class for a data was not 1, and
+  ``CalibratedClassifierCV`` now handles the case where the training set
+  has less number of classes than the total data. :issue:`7799` by
+  `Srivatsan Ramesh`_
+
+- Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not
+  exactly implement Benjamini-Hochberg procedure. It formerly may have
+  selected fewer features than it should.
+  :issue:`7490` by :user:`Peng Meng <mpjlu>`.
+
+- :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles
+  integer inputs. :issue:`6282` by `Jake Vanderplas`_.
+
+- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
+  regressors now assumes uniform sample weights by default if the
+  ``sample_weight`` argument is not passed to the ``fit`` function.
+  Previously, the parameter was silently ignored. :issue:`7301`
+  by :user:`Nelson Liu <nelson-liu>`.
+
+- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
+  `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_
+
+- Tree splitting criterion classes' cloning/pickling is now memory safe
+  :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
+
+- Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_``
+  attribute in `transform()`. :issue:`7553` by :user:`Ekaterina
+  Krivich <kiote>`.
+
+- :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles
+  string labels. :issue:`5874` by `Raghav RV`_.
+
+- Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised
+  an error when ``stratify`` is a list of string labels. :issue:`7593` by
+  `Raghav RV`_.
+
+- Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and
+  :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable
+  because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by
+  `Raghav RV`_.
+
+- All cross-validation utilities in :mod:`sklearn.model_selection` now
+  permit one time cross-validation splitters for the ``cv`` parameter. Also
+  non-deterministic cross-validation splitters (where multiple calls to
+  ``split`` produce dissimilar splits) can be used as ``cv`` parameter.
+  The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each
+  parameter setting on the split produced by the first ``split`` call
+  to the cross-validation splitter.  :issue:`7660` by `Raghav RV`_.
+
+- Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform`
+  returned an invalid CSR matrix.
+  :issue:`7750` by :user:`CJ Carey <perimosocordiae>`.
+
+- Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a
+  small negative distance. :issue:`7732` by :user:`Artsion <asanakoy>`.
+
+API changes summary
+-------------------
+
+Trees and forests
+
+- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
+  regressors now assumes uniform sample weights by default if the
+  ``sample_weight`` argument is not passed to the ``fit`` function.
+  Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson
+  Liu <nelson-liu>`.
+
+- Tree splitting criterion classes' cloning/pickling is now memory safe.
+  :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
+
+
+Linear, kernelized and related models
+
+- Length of ``explained_variance_ratio`` of
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  changed for both Eigen and SVD solvers. The attribute has now a length
+  of min(n_components, n_classes - 1). :issue:`7632`
+  by :user:`JPFrancoia <JPFrancoia>`
+
+- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
+  ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_
+
+.. _changes_0_18:
+
+Version 0.18
+============
+
+**September 28, 2016**
+
+.. topic:: Last release with Python 2.6 support
+
+    Scikit-learn 0.18 will be the last version of scikit-learn to support Python 2.6.
+    Later versions of scikit-learn will require Python 2.7 or above.
+
+.. _model_selection_changes:
+
+Model Selection Enhancements and API Changes
+--------------------------------------------
+
+- **The model_selection module**
+
+  The new module :mod:`sklearn.model_selection`, which groups together the
+  functionalities of formerly :mod:`sklearn.cross_validation`,
+  :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new
+  possibilities such as nested cross-validation and better manipulation of
+  parameter searches with Pandas.
+
+  Many things will stay the same but there are some key differences. Read
+  below to know more about the changes.
+
+- **Data-independent CV splitters enabling nested cross-validation**
+
+  The new cross-validation splitters, defined in the
+  :mod:`sklearn.model_selection`, are no longer initialized with any
+  data-dependent parameters such as ``y``. Instead they expose a
+  :func:`split` method that takes in the data and yields a generator for the
+  different splits.
+
+  This change makes it possible to use the cross-validation splitters to
+  perform nested cross-validation, facilitated by
+  :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` utilities.
+
+- **The enhanced cv_results_ attribute**
+
+  The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV`
+  and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the
+  ``grid_scores_`` attribute is a dict of 1D arrays with elements in each
+  array corresponding to the parameter settings (i.e. search candidates).
+
+  The ``cv_results_`` dict can be easily imported into ``pandas`` as a
+  ``DataFrame`` for exploring the search results.
+
+  The ``cv_results_`` arrays include scores for each cross-validation split
+  (with keys such as ``'split0_test_score'``), as well as their mean
+  (``'mean_test_score'``) and standard deviation (``'std_test_score'``).
+
+  The ranks for the search candidates (based on their mean
+  cross-validation score) is available at ``cv_results_['rank_test_score']``.
+
+  The parameter values for each parameter is stored separately as numpy
+  masked object arrays. The value, for that search candidate, is masked if
+  the corresponding parameter is not applicable. Additionally a list of all
+  the parameter dicts are stored at ``cv_results_['params']``.
+
+- **Parameters n_folds and n_iter renamed to n_splits**
+
+  Some parameter names have changed:
+  The ``n_folds`` parameter in new :class:`model_selection.KFold`,
+  :class:`model_selection.GroupKFold` (see below for the name change),
+  and :class:`model_selection.StratifiedKFold` is now renamed to
+  ``n_splits``. The ``n_iter`` parameter in
+  :class:`model_selection.ShuffleSplit`, the new class
+  :class:`model_selection.GroupShuffleSplit` and
+  :class:`model_selection.StratifiedShuffleSplit` is now renamed to
+  ``n_splits``.
+
+- **Rename of splitter classes which accepts group labels along with data**
+
+  The cross-validation splitters ``LabelKFold``,
+  ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have
+  been renamed to :class:`model_selection.GroupKFold`,
+  :class:`model_selection.GroupShuffleSplit`,
+  :class:`model_selection.LeaveOneGroupOut` and
+  :class:`model_selection.LeavePGroupsOut` respectively.
+
+  Note the change from singular to plural form in
+  :class:`model_selection.LeavePGroupsOut`.
+
+- **Fit parameter labels renamed to groups**
+
+  The ``labels`` parameter in the :func:`split` method of the newly renamed
+  splitters :class:`model_selection.GroupKFold`,
+  :class:`model_selection.LeaveOneGroupOut`,
+  :class:`model_selection.LeavePGroupsOut`,
+  :class:`model_selection.GroupShuffleSplit` is renamed to ``groups``
+  following the new nomenclature of their class names.
+
+- **Parameter n_labels renamed to n_groups**
+
+  The parameter ``n_labels`` in the newly renamed
+  :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``.
+
+- Training scores and Timing information
+
+  ``cv_results_`` also includes the training scores for each
+  cross-validation split (with keys such as ``'split0_train_score'``), as
+  well as their mean (``'mean_train_score'``) and standard deviation
+  (``'std_train_score'``). To avoid the cost of evaluating training score,
+  set ``return_train_score=False``.
+
+  Additionally the mean and standard deviation of the times taken to split,
+  train and score the model across all the cross-validation splits is
+  available at the key ``'mean_time'`` and ``'std_time'`` respectively.
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and Regressors
+
+- The Gaussian Process module has been reimplemented and now offers classification
+  and regression estimators through :class:`gaussian_process.GaussianProcessClassifier`
+  and  :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new
+  implementation supports kernel engineering, gradient-based hyperparameter optimization or
+  sampling of functions from GP prior and GP posterior. Extensive documentation and
+  examples are provided. By `Jan Hendrik Metzen`_.
+
+- Added new supervised learning algorithm: :ref:`Multi-layer Perceptron <multilayer_perceptron>`
+  :issue:`3204` by :user:`Issam H. Laradji <IssamLaradji>`
+
+- Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers.
+  :issue:`5291` by `Manoj Kumar`_.
+
+- Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It
+  converts single output regressors to multi-output regressors by fitting
+  one regressor per output. By :user:`Tim Head <betatim>`.
+
+Other estimators
+
+- New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture`
+  replace former mixture models, employing faster inference
+  for sounder results. :issue:`7295` by :user:`Wei Xue <xuewei4d>` and
+  :user:`Thierry Guillemot <tguillemot>`.
+
+- Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA`
+  and it is available calling with parameter ``svd_solver='randomized'``.
+  The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old
+  behavior of PCA is recovered by ``svd_solver='full'``. An additional solver
+  calls ``arpack`` and performs truncated (non-randomized) SVD. By default,
+  the best solver is selected depending on the size of the input and the
+  number of components requested. :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
+
+- Added two functions for mutual information estimation:
+  :func:`feature_selection.mutual_info_classif` and
+  :func:`feature_selection.mutual_info_regression`. These functions can be
+  used in :class:`feature_selection.SelectKBest` and
+  :class:`feature_selection.SelectPercentile` as score functions.
+  By :user:`Andrea Bravi <AndreaBravi>` and :user:`Nikolay Mayorov <nmayorov>`.
+
+- Added the :class:`ensemble.IsolationForest` class for anomaly detection based on
+  random forests. By `Nicolas Goix`_.
+
+- Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing
+  Elkan's fast K-Means algorithm. By `Andreas Müller`_.
+
+Model selection and evaluation
+
+- Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows
+  Index which measures the similarity of two clusterings of a set of points
+  By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski
+  and Harabaz score to evaluate the resulting clustering of a set of points.
+  By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- Added new cross-validation splitter
+  :class:`model_selection.TimeSeriesSplit` to handle time series data.
+  :issue:`6586` by :user:`YenChen Lin <yenchenlin>`
+
+- The cross-validation iterators are replaced by cross-validation splitters
+  available from :mod:`sklearn.model_selection`, allowing for nested
+  cross-validation. See :ref:`model_selection_changes` for more information.
+  :issue:`4294` by `Raghav RV`_.
+
+Enhancements
+............
+
+Trees and ensembles
+
+- Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`,
+  the mean absolute error. This criterion can also be used in
+  :class:`ensemble.ExtraTreesRegressor`,
+  :class:`ensemble.RandomForestRegressor`, and the gradient boosting
+  estimators. :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
+
+- Added weighted impurity-based early stopping criterion for decision tree
+  growth. :issue:`6954` by :user:`Nelson Liu <nelson-liu>`
+
+- The random forest, extra tree and decision tree estimators now has a
+  method ``decision_path`` which returns the decision path of samples in
+  the tree. By `Arnaud Joly`_.
+
+- A new example has been added unveiling the decision tree structure.
+  By `Arnaud Joly`_.
+
+- Random forest, extra trees, decision trees and gradient boosting estimator
+  accept the parameter ``min_samples_split`` and ``min_samples_leaf``
+  provided as a percentage of the training samples. By :user:`yelite <yelite>` and `Arnaud Joly`_.
+
+- Gradient boosting estimators accept the parameter ``criterion`` to specify
+  to splitting criterion used in built decision trees.
+  :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
+
+- The memory footprint is reduced (sometimes greatly) for
+  :class:`ensemble.bagging.BaseBagging` and classes that inherit from it,
+  i.e, :class:`ensemble.BaggingClassifier`,
+  :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`,
+  by dynamically generating attribute ``estimators_samples_`` only when it is
+  needed. By :user:`David Staub <staubda>`.
+
+- Added ``n_jobs`` and ``sample_weight`` parameters for
+  :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel.
+  :issue:`5805` by :user:`Ibraim Ganiev <olologin>`.
+
+Linear, kernelized and related models
+
+- In :class:`linear_model.LogisticRegression`, the SAG solver is now
+  available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_.
+
+- :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and
+  :class:`svm.LinearSVR` now support ``sample_weight``.
+  By :user:`Imaculate <Imaculate>`.
+
+- Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the
+  error on the samples for every trial. By `Manoj Kumar`_.
+
+- Prediction of out-of-sample events with Isotonic Regression
+  (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic
+  data). By :user:`Jonathan Arfa <jarfa>`.
+
+- Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid
+  `O(n^2)` behavior in pathological cases, and is also generally faster
+  (:issue:`#6691`). By `Antony Lee`_.
+
+- :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors
+  through the parameter ``priors``. By :user:`Guillaume Lemaitre <glemaitre>`.
+
+- :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso`
+  now works with ``np.float32`` input data without converting it
+  into ``np.float64``. This allows to reduce the memory
+  consumption. :issue:`6913` by :user:`YenChen Lin <yenchenlin>`.
+
+- :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading`
+  now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``.
+  :issue:`5762` by :user:`Utkarsh Upadhyay <musically-ut>`.
+
+Decomposition, manifold learning and clustering
+
+- Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute
+  data matrix of original shape. By :user:`Anish Shah <AnishShah>`.
+
+- :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works
+  with ``np.float32`` and ``np.float64`` input data without converting it.
+  This allows to reduce the memory consumption by using ``np.float32``.
+  :issue:`6846` by :user:`Sebastian Säger <ssaeger>` and
+  :user:`YenChen Lin <yenchenlin>`.
+
+Preprocessing and feature selection
+
+- :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter.
+  :issue:`5929` by :user:`Konstantin Podshumok <podshumok>`.
+
+- :class:`feature_extraction.FeatureHasher` now accepts string values.
+  :issue:`6173` by :user:`Ryad Zenine <ryadzenine>` and
+  :user:`Devashish Deshpande <dsquareindia>`.
+
+- Keyword arguments can now be supplied to ``func`` in
+  :class:`preprocessing.FunctionTransformer` by means of the ``kw_args``
+  parameter. By `Brian McFee`_.
+
+- :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile`
+  now accept score functions that take X, y as input and return only the scores.
+  By :user:`Nikolay Mayorov <nmayorov>`.
+
+Model evaluation and meta-estimators
+
+- :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier`
+  now support ``partial_fit``. By :user:`Asish Panda <kaichogami>` and
+  :user:`Philipp Dowling <phdowling>`.
+
+- Added support for substituting or disabling :class:`pipeline.Pipeline`
+  and :class:`pipeline.FeatureUnion` components using the ``set_params``
+  interface that powers :mod:`sklearn.grid_search`.
+  See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py`
+  By `Joel Nothman`_ and :user:`Robert McGibbon <rmcgibbo>`.
+
+- The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV`
+  (and :class:`model_selection.RandomizedSearchCV`) can be easily imported
+  into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for
+  more information. :issue:`6697` by `Raghav RV`_.
+
+- Generalization of :func:`model_selection.cross_val_predict`.
+  One can pass method names such as `predict_proba` to be used in the cross
+  validation framework instead of the default `predict`.
+  By :user:`Ori Ziv <zivori>` and :user:`Sears Merritt <merritts>`.
+
+- The training scores and time taken for training followed by scoring for
+  each search candidate are now available at the ``cv_results_`` dict.
+  See :ref:`model_selection_changes` for more information.
+  :issue:`7325` by :user:`Eugene Chen <eyc88>` and `Raghav RV`_.
+
+Metrics
+
+- Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide
+  the labels when the number of classes in ``y_true`` and ``y_pred`` differ.
+  :issue:`7239` by :user:`Hong Guangguo <hongguangguo>` with help from
+  :user:`Mads Jensen <indianajensen>` and :user:`Nelson Liu <nelson-liu>`.
+
+- Support sparse contingency matrices in cluster evaluation
+  (:mod:`metrics.cluster.supervised`) to scale to a large number of
+  clusters.
+  :issue:`7419` by :user:`Gregory Stupp <stuppie>` and `Joel Nothman`_.
+
+- Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`.
+  By :user:`Jatin Shah <jatinshah>` and `Raghav RV`_.
+
+- Speed up :func:`metrics.silhouette_score` by using vectorized operations.
+  By `Manoj Kumar`_.
+
+- Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`.
+  By :user:`Bernardo Stein <DanielSidhion>`.
+
+Miscellaneous
+
+- Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute
+  the score on the test folds in parallel. By `Manoj Kumar`_
+
+- Codebase does not contain C/C++ cython generated files: they are
+  generated during build. Distribution packages will still contain generated
+  C/C++ files. By :user:`Arthur Mensch <arthurmensch>`.
+
+- Reduce the memory usage for 32-bit float input arrays of
+  :func:`utils.sparse_func.mean_variance_axis` and
+  :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython
+  fused types. By :user:`YenChen Lin <yenchenlin>`.
+
+- The :func:`ignore_warnings` now accept a category argument to ignore only
+  the warnings of a specified type. By :user:`Thierry Guillemot <tguillemot>`.
+
+- Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to
+  :func:`load_iris` dataset
+  :issue:`7049`,
+  :func:`load_breast_cancer` dataset
+  :issue:`7152`,
+  :func:`load_digits` dataset,
+  :func:`load_diabetes` dataset,
+  :func:`load_linnerud` dataset,
+  :func:`load_boston` dataset
+  :issue:`7154` by
+  :user:`Manvendra Singh<manu-chroma>`.
+
+- Simplification of the ``clone`` function, deprecate support for estimators
+  that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_.
+
+- When unpickling a scikit-learn estimator in a different version than the one
+  the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation
+  on model persistence <persistence_limitations>` for more details. (:issue:`7248`)
+  By `Andreas Müller`_.
+
+Bug fixes
+.........
+
+Trees and ensembles
+
+- Random forest, extra trees, decision trees and gradient boosting
+  won't accept anymore ``min_samples_split=1`` as at least 2 samples
+  are required to split a decision tree node. By `Arnaud Joly`_
+
+- :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``,
+  ``transform`` or ``predict_proba`` are called on the non-fitted estimator.
+  by `Sebastian Raschka`_.
+
+- Fix bug where :class:`ensemble.AdaBoostClassifier` and
+  :class:`ensemble.AdaBoostRegressor` would perform poorly if the
+  ``random_state`` was fixed
+  (:issue:`7411`). By `Joel Nothman`_.
+
+- Fix bug in ensembles with randomization where the ensemble would not
+  set ``random_state`` on base estimators in a pipeline or similar nesting.
+  (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier`
+  :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier`
+  and :class:`ensemble.AdaBoostRegressor` will now differ from previous
+  versions. By `Joel Nothman`_.
+
+Linear, kernelized and related models
+
+- Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in
+  :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor`
+  (:issue:`6764`). By :user:`Wenhua Yang <geekoala>`.
+
+- Fix bug in :class:`linear_model.LogisticRegressionCV` where
+  ``solver='liblinear'`` did not accept ``class_weights='balanced``.
+  (:issue:`6817`). By `Tom Dupre la Tour`_.
+
+- Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error
+  occurred when there were outliers being labelled and a weight function
+  specified (:issue:`6902`).  By
+  `LeonieBorne <https://github.com/LeonieBorne>`_.
+
+- Fix :class:`linear_model.ElasticNet` sparse decision function to match
+  output with dense in the multioutput case.
+
+Decomposition, manifold learning and clustering
+
+- :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3.
+  :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
+
+- :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0.
+  In practice this is enough for obtaining a good approximation of the
+  true eigenvalues/vectors in the presence of noise. When `n_components` is
+  small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies
+  a higher number. This improves precision with few components.
+  :issue:`5299` by :user:`Giorgio Patrini<giorgiop>`.
+
+- Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA`
+  and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the
+  New features) is fixed. `components_` are stored with no whitening.
+  :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
+
+- Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
+  Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer <yanlend>`.
+
+- Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all
+  occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`,
+  :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
+  and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By
+  :user:`Peter Fischer <yanlend>`.
+
+- Attribute ``explained_variance_ratio_`` calculated with the SVD solver
+  of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
+  correct results. By :user:`JPFrancoia <JPFrancoia>`
+
+Preprocessing and feature selection
+
+- :func:`preprocessing.data._transform_selected` now always passes a copy
+  of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio
+  Oliveira <https://github.com/caioaao>`_.
+
+Model evaluation and meta-estimators
+
+- :class:`model_selection.StratifiedKFold` now raises error if all n_labels
+  for individual classes is less than n_folds.
+  :issue:`6182` by :user:`Devashish Deshpande <dsquareindia>`.
+
+- Fixed bug in :class:`model_selection.StratifiedShuffleSplit`
+  where train and test sample could overlap in some edge cases,
+  see :issue:`6121` for
+  more details. By `Loic Esteve`_.
+
+- Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to
+  return splits of size ``train_size`` and ``test_size`` in all cases
+  (:issue:`6472`). By `Andreas Müller`_.
+
+- Cross-validation of :class:`OneVsOneClassifier` and
+  :class:`OneVsRestClassifier` now works with precomputed kernels.
+  :issue:`7350` by :user:`Russell Smith <rsmith54>`.
+
+- Fix incomplete ``predict_proba`` method delegation from
+  :class:`model_selection.GridSearchCV` to
+  :class:`linear_model.SGDClassifier` (:issue:`7159`)
+  by `Yichuan Liu <https://github.com/yl565>`_.
+
+Metrics
+
+- Fix bug in :func:`metrics.silhouette_score` in which clusters of
+  size 1 were incorrectly scored. They should get a score of 0.
+  By `Joel Nothman`_.
+
+- Fix bug in :func:`metrics.silhouette_samples` so that it now works with
+  arbitrary labels, not just those ranging from 0 to n_clusters - 1.
+
+- Fix bug where expected and adjusted mutual information were incorrect if
+  cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_.
+
+- :func:`metrics.pairwise.pairwise_distances` now converts arrays to
+  boolean arrays when required in ``scipy.spatial.distance``.
+  :issue:`5460` by `Tom Dupre la Tour`_.
+
+- Fix sparse input support in :func:`metrics.silhouette_score` as well as
+  example examples/text/document_clustering.py. By :user:`YenChen Lin <yenchenlin>`.
+
+- :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no
+  longer round ``y_score`` values when creating ROC curves; this was causing
+  problems for users with very small differences in scores (:issue:`7353`).
+
+Miscellaneous
+
+- :func:`model_selection.tests._search._check_param_grid` now works correctly with all types
+  that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange
+  (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi.
+
+- :func:`utils.extmath.randomized_range_finder` is more numerically stable when many
+  power iterations are requested, since it applies LU normalization by default.
+  If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied.
+  Other normalization options are available: ``'none', 'LU'`` and ``'QR'``.
+  :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
+
+- Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators
+  with them as parameters, could not be passed to :func:`base.clone`.
+  By `Loic Esteve`_.
+
+- :func:`datasets.load_svmlight_file` now is able to read long int QID values.
+  :issue:`7101` by :user:`Ibraim Ganiev <olologin>`.
+
+
+API changes summary
+-------------------
+
+Linear, kernelized and related models
+
+- ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`.
+  Use ``loss`` instead. By `Manoj Kumar`_.
+
+- Access to public attributes ``.X_`` and ``.y_`` has been deprecated in
+  :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa <jarfa>`.
+
+Decomposition, manifold learning and clustering
+
+- The old :class:`mixture.DPGMM` is deprecated in favor of the new
+  :class:`mixture.BayesianGaussianMixture` (with the parameter
+  ``weight_concentration_prior_type='dirichlet_process'``).
+  The new class solves the computational
+  problems of the old class and computes the Gaussian mixture with a
+  Dirichlet process prior faster than before.
+  :issue:`7295` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- The old :class:`mixture.VBGMM` is deprecated in favor of the new
+  :class:`mixture.BayesianGaussianMixture` (with the parameter
+  ``weight_concentration_prior_type='dirichlet_distribution'``).
+  The new class solves the computational
+  problems of the old class and computes the Variational Bayesian Gaussian
+  mixture faster than before.
+  :issue:`6651` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- The old :class:`mixture.GMM` is deprecated in favor of the new
+  :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture
+  faster than before and some of computational problems have been solved.
+  :issue:`6666` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
+
+Model evaluation and meta-estimators
+
+- The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and
+  :mod:`sklearn.learning_curve` have been deprecated and the classes and
+  functions have been reorganized into the :mod:`sklearn.model_selection`
+  module. Ref :ref:`model_selection_changes` for more information.
+  :issue:`4294` by `Raghav RV`_.
+
+- The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV`
+  and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of
+  the attribute ``cv_results_``.
+  Ref :ref:`model_selection_changes` for more information.
+  :issue:`6697` by `Raghav RV`_.
+
+- The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced
+  by the new parameter ``n_splits`` since it can provide a consistent
+  and unambiguous interface to represent the number of train-test splits.
+  :issue:`7187` by :user:`YenChen Lin <yenchenlin>`.
+
+- ``classes`` parameter was renamed to ``labels`` in
+  :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell <srvanrell>`.
+
+- The splitter classes ``LabelKFold``, ``LabelShuffleSplit``,
+  ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to
+  :class:`model_selection.GroupKFold`,
+  :class:`model_selection.GroupShuffleSplit`,
+  :class:`model_selection.LeaveOneGroupOut`
+  and :class:`model_selection.LeavePGroupsOut` respectively.
+  Also the parameter ``labels`` in the :func:`split` method of the newly
+  renamed splitters :class:`model_selection.LeaveOneGroupOut` and
+  :class:`model_selection.LeavePGroupsOut` is renamed to
+  ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`,
+  the parameter ``n_labels`` is renamed to ``n_groups``.
+  :issue:`6660` by `Raghav RV`_.
+
+- Error and loss names for ``scoring`` parameters are now prefixed by
+  ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions
+  are deprecated and will be removed in version 0.20.
+  :issue:`7261` by :user:`Tim Head <betatim>`.
+
+Code Contributors
+-----------------
+Aditya Joshi, Alejandro, Alexander Fabisch, Alexander Loginov, Alexander
+Minyushkin, Alexander Rudy, Alexandre Abadie, Alexandre Abraham, Alexandre
+Gramfort, Alexandre Saint, alexfields, Alvaro Ulloa, alyssaq, Amlan Kar,
+Andreas Mueller, andrew giessel, Andrew Jackson, Andrew McCulloh, Andrew
+Murray, Anish Shah, Arafat, Archit Sharma, Ariel Rokem, Arnaud Joly, Arnaud
+Rachez, Arthur Mensch, Ash Hoover, asnt, b0noI, Behzad Tabibian, Bernardo,
+Bernhard Kratzwald, Bhargav Mangipudi, blakeflei, Boyuan Deng, Brandon Carter,
+Brett Naul, Brian McFee, Caio Oliveira, Camilo Lamus, Carol Willing, Cass,
+CeShine Lee, Charles Truong, Chyi-Kwei Yau, CJ Carey, codevig, Colin Ni, Dan
+Shiebler, Daniel, Daniel Hnyk, David Ellis, David Nicholson, David Staub, David
+Thaler, David Warshaw, Davide Lasagna, Deborah, definitelyuncertain, Didi
+Bar-Zev, djipey, dsquareindia, edwinENSAE, Elias Kuthe, Elvis DOHMATOB, Ethan
+White, Fabian Pedregosa, Fabio Ticconi, fisache, Florian Wilhelm, Francis,
+Francis O'Donovan, Gael Varoquaux, Ganiev Ibraim, ghg, Gilles Louppe, Giorgio
+Patrini, Giovanni Cherubin, Giovanni Lanzani, Glenn Qian, Gordon
+Mohr, govin-vatsan, Graham Clenaghan, Greg Reda, Greg Stupp, Guillaume
+Lemaitre, Gustav Mörtberg, halwai, Harizo Rajaona, Harry Mavroforakis,
+hashcode55, hdmetor, Henry Lin, Hobson Lane, Hugo Bowne-Anderson,
+Igor Andriushchenko, Imaculate, Inki Hwang, Isaac Sijaranamual,
+Ishank Gulati, Issam Laradji, Iver Jordal, jackmartin, Jacob Schreiber, Jake
+Vanderplas, James Fiedler, James Routley, Jan Zikes, Janna Brettingen, jarfa, Jason
+Laska, jblackburne, jeff levesque, Jeffrey Blackburne, Jeffrey04, Jeremy Hintz,
+jeremynixon, Jeroen, Jessica Yung, Jill-Jênn Vie, Jimmy Jia, Jiyuan Qian, Joel
+Nothman, johannah, John, John Boersma, John Kirkham, John Moeller,
+jonathan.striebel, joncrall, Jordi, Joseph Munoz, Joshua Cook, JPFrancoia,
+jrfiedler, JulianKahnert, juliathebrave, kaichogami, KamalakerDadi, Kenneth
+Lyons, Kevin Wang, kingjr, kjell, Konstantin Podshumok, Kornel Kielczewski,
+Krishna Kalyan, krishnakalyan3, Kvle Putnam, Kyle Jackson, Lars Buitinck,
+ldavid, LeiG, LeightonZhang, Leland McInnes, Liang-Chi Hsieh, Lilian Besson,
+lizsz, Loic Esteve, Louis Tiao, Léonie Borne, Mads Jensen, Maniteja Nandana,
+Manoj Kumar, Manvendra Singh, Marco, Mario Krell, Mark Bao, Mark Szepieniec,
+Martin Madsen, MartinBpr, MaryanMorel, Massil, Matheus, Mathieu Blondel,
+Mathieu Dubois, Matteo, Matthias Ekman, Max Moroz, Michael Scherer, michiaki
+ariga, Mikhail Korobov, Moussa Taifi, mrandrewandrade, Mridul Seth, nadya-p,
+Naoya Kanai, Nate George, Nelle Varoquaux, Nelson Liu, Nick James,
+NickleDave, Nico, Nicolas Goix, Nikolay Mayorov, ningchi, nlathia,
+okbalefthanded, Okhlopkov, Olivier Grisel, Panos Louridas, Paul Strickland,
+Perrine Letellier, pestrickland, Peter Fischer, Pieter, Ping-Yao, Chang,
+practicalswift, Preston Parry, Qimu Zheng, Rachit Kansal, Raghav RV,
+Ralf Gommers, Ramana.S, Rammig, Randy Olson, Rob Alexander, Robert Lutz,
+Robin Schucker, Rohan Jain, Ruifeng Zheng, Ryan Yu, Rémy Léone, saihttam,
+Saiwing Yeung, Sam Shleifer, Samuel St-Jean, Sartaj Singh, Sasank Chilamkurthy,
+saurabh.bansod, Scott Andrews, Scott Lowe, seales, Sebastian Raschka, Sebastian
+Saeger, Sebastián Vanrell, Sergei Lebedev, shagun Sodhani, shanmuga cv,
+Shashank Shekhar, shawpan, shengxiduan, Shota, shuckle16, Skipper Seabold,
+sklearn-ci, SmedbergM, srvanrell, Sébastien Lerique, Taranjeet, themrmax,
+Thierry, Thierry Guillemot, Thomas, Thomas Hallock, Thomas Moreau, Tim Head,
+tKammy, toastedcornflakes, Tom, TomDLT, Toshihiro Kamishima, tracer0tong, Trent
+Hauck, trevorstephens, Tue Vo, Varun, Varun Jewalikar, Viacheslav, Vighnesh
+Birodkar, Vikram, Villu Ruusmann, Vinayak Mehta, walter, waterponey, Wenhua
+Yang, Wenjian Huang, Will Welch, wyseguy7, xyguo, yanlend, Yaroslav Halchenko,
+yelite, Yen, YenChenLin, Yichuan Liu, Yoav Ram, Yoshiki, Zheng RuiFeng, zivori, Óscar Nájera
+
+.. currentmodule:: sklearn
+
+.. _changes_0_17_1:
+
+Version 0.17.1
+==============
+
+**February 18, 2016**
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+
+- Upgrade vendored joblib to version 0.9.4 that fixes an important bug in
+  ``joblib.Parallel`` that can silently yield to wrong results when working
+  on datasets larger than 1MB:
+  https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst
+
+- Fixed reading of Bunch pickles generated with scikit-learn
+  version <= 0.16. This can affect users who have already
+  downloaded a dataset with scikit-learn 0.16 and are loading it
+  with scikit-learn 0.17. See :issue:`6196` for
+  how this affected :func:`datasets.fetch_20newsgroups`. By `Loic
+  Esteve`_.
+
+- Fixed a bug that prevented using ROC AUC score to perform grid search on
+  several CPU / cores on large arrays. See :issue:`6147`
+  By `Olivier Grisel`_.
+
+- Fixed a bug that prevented to properly set the ``presort`` parameter
+  in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857`
+  By Andrew McCulloh.
+
+- Fixed a joblib error when evaluating the perplexity of a
+  :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258`
+  By Chyi-Kwei Yau.
+
+
+.. _changes_0_17:
+
+Version 0.17
+============
+
+**November 5, 2015**
+
+Changelog
+---------
+
+New features
+............
+
+- All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by
+  calling `partial_fit`. By :user:`Giorgio Patrini <giorgiop>`.
+
+- The new class :class:`ensemble.VotingClassifier` implements a
+  "majority rule" / "soft voting" ensemble classifier to combine
+  estimators for classification. By `Sebastian Raschka`_.
+
+- The new class :class:`preprocessing.RobustScaler` provides an
+  alternative to :class:`preprocessing.StandardScaler` for feature-wise
+  centering and range normalization that is robust to outliers.
+  By :user:`Thomas Unterthiner <untom>`.
+
+- The new class :class:`preprocessing.MaxAbsScaler` provides an
+  alternative to :class:`preprocessing.MinMaxScaler` for feature-wise
+  range normalization when the data is already centered or sparse.
+  By :user:`Thomas Unterthiner <untom>`.
+
+- The new class :class:`preprocessing.FunctionTransformer` turns a Python
+  function into a ``Pipeline``-compatible transformer object.
+  By Joe Jevnik.
+
+- The new classes :class:`cross_validation.LabelKFold` and
+  :class:`cross_validation.LabelShuffleSplit` generate train-test folds,
+  respectively similar to :class:`cross_validation.KFold` and
+  :class:`cross_validation.ShuffleSplit`, except that the folds are
+  conditioned on a label array. By `Brian McFee`_, :user:`Jean
+  Kossaifi <JeanKossaifi>` and `Gilles Louppe`_.
+
+- :class:`decomposition.LatentDirichletAllocation` implements the Latent
+  Dirichlet Allocation topic model with online  variational
+  inference. By :user:`Chyi-Kwei Yau <chyikwei>`, with code based on an implementation
+  by Matt Hoffman. (:issue:`3659`)
+
+- The new solver ``sag`` implements a Stochastic Average Gradient descent
+  and is available in both :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.Ridge`. This solver is very efficient for large
+  datasets. By :user:`Danny Sullivan <dsullivan7>` and `Tom Dupre la Tour`_.
+  (:issue:`4738`)
+
+- The new solver ``cd`` implements a Coordinate Descent in
+  :class:`decomposition.NMF`. Previous solver based on Projected Gradient is
+  still available setting new parameter ``solver`` to ``pg``, but is
+  deprecated and will be removed in 0.19, along with
+  :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``,
+  ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and
+  ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a
+  shuffling step in the ``cd`` solver.
+  By `Tom Dupre la Tour`_ and `Mathieu Blondel`_.
+
+Enhancements
+............
+- :class:`manifold.TSNE` now supports approximate optimization via the
+  Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody.
+  (:issue:`4025`)
+
+- :class:`cluster.mean_shift_.MeanShift` now supports parallel execution,
+  as implemented in the ``mean_shift`` function. By :user:`Martino
+  Sorbaro <martinosorb>`.
+
+- :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``.
+  By `Jan Hendrik Metzen`_.
+
+- :class:`dummy.DummyClassifier` now supports a prior fitting strategy.
+  By `Arnaud Joly`_.
+
+- Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses.
+  By :user:`Cory Lorenz <clorenz7>`.
+
+- Added the :func:`metrics.label_ranking_loss` metric.
+  By `Arnaud Joly`_.
+
+- Added the :func:`metrics.cohen_kappa_score` metric.
+
+- Added a ``warm_start`` constructor parameter to the bagging ensemble
+  models to increase the size of the ensemble. By :user:`Tim Head <betatim>`.
+
+- Added option to use multi-output regression metrics without averaging.
+  By Konstantin Shmelkov and :user:`Michael Eickenberg<eickenberg>`.
+
+- Added ``stratify`` option to :func:`cross_validation.train_test_split`
+  for stratified splitting. By Miroslav Batchkarov.
+
+- The :func:`tree.export_graphviz` function now supports aesthetic
+  improvements for :class:`tree.DecisionTreeClassifier` and
+  :class:`tree.DecisionTreeRegressor`, including options for coloring nodes
+  by their majority class or impurity, showing variable names, and using
+  node proportions instead of raw sample counts. By `Trevor Stephens`_.
+
+- Improved speed of ``newton-cg`` solver in
+  :class:`linear_model.LogisticRegression`, by avoiding loss computation.
+  By `Mathieu Blondel`_ and `Tom Dupre la Tour`_.
+
+- The ``class_weight="auto"`` heuristic in classifiers supporting
+  ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"``
+  option, which has a simpler formula and interpretation.
+  By `Hanna Wallach`_ and `Andreas Müller`_.
+
+- Add ``class_weight`` parameter to automatically weight samples by class
+  frequency for :class:`linear_model.PassiveAgressiveClassifier`. By
+  `Trevor Stephens`_.
+
+- Added backlinks from the API reference pages to the user guide. By
+  `Andreas Müller`_.
+
+- The ``labels`` parameter to :func:`sklearn.metrics.f1_score`,
+  :func:`sklearn.metrics.fbeta_score`,
+  :func:`sklearn.metrics.recall_score` and
+  :func:`sklearn.metrics.precision_score` has been extended.
+  It is now possible to ignore one or more labels, such as where
+  a multiclass problem has a majority class to ignore. By `Joel Nothman`_.
+
+- Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`.
+  By `Trevor Stephens`_.
+
+- Provide an option for sparse output from
+  :func:`sklearn.metrics.pairwise.cosine_similarity`. By
+  :user:`Jaidev Deshpande <jaidevd>`.
+
+- Add :func:`minmax_scale` to provide a function interface for
+  :class:`MinMaxScaler`. By :user:`Thomas Unterthiner <untom>`.
+
+- ``dump_svmlight_file`` now handles multi-label datasets.
+  By Chih-Wei Chang.
+
+- RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`).
+  By `Tom Dupre la Tour`_.
+
+- The "Wisconsin Breast Cancer" classical two-class classification dataset
+  is now included in scikit-learn, available with
+  :func:`sklearn.dataset.load_breast_cancer`.
+
+- Upgraded to joblib 0.9.3 to benefit from the new automatic batching of
+  short tasks. This makes it possible for scikit-learn to benefit from
+  parallelism when many very short tasks are executed in parallel, for
+  instance by the :class:`grid_search.GridSearchCV` meta-estimator
+  with ``n_jobs > 1`` used with a large grid of parameters on a small
+  dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_.
+
+- For more details about changes in joblib 0.9.3 see the release notes:
+  https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093
+
+- Improved speed (3 times per iteration) of
+  :class:`decomposition.DictLearning` with coordinate descent method
+  from :class:`linear_model.Lasso`. By :user:`Arthur Mensch <arthurmensch>`.
+
+- Parallel processing (threaded) for queries of nearest neighbors
+  (using the ball-tree) by Nikolay Mayorov.
+
+- Allow :func:`datasets.make_multilabel_classification` to output
+  a sparse ``y``. By Kashif Rasul.
+
+- :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed
+  distances, allowing memory-efficient distance precomputation. By
+  `Joel Nothman`_.
+
+- :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method
+  for retrieving the leaf indices samples are predicted as. By
+  :user:`Daniel Galvez <galv>` and `Gilles Louppe`_.
+
+- Speed up decision tree regressors, random forest regressors, extra trees
+  regressors and gradient boosting estimators by computing a proxy
+  of the impurity improvement during the tree growth. The proxy quantity is
+  such that the split that maximizes this value also maximizes the impurity
+  improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber <jmschrei>`
+  and `Gilles Louppe`_.
+
+- Speed up tree based methods by reducing the number of computations needed
+  when computing the impurity measure taking into account linear
+  relationship of the computed statistics. The effect is particularly
+  visible with extra trees and on datasets with categorical or sparse
+  features. By `Arnaud Joly`_.
+
+- :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` now expose an ``apply``
+  method for retrieving the leaf indices each sample ends up in under
+  each try. By :user:`Jacob Schreiber <jmschrei>`.
+
+- Add ``sample_weight`` support to :class:`linear_model.LinearRegression`.
+  By Sonny Hu. (:issue:`#4881`)
+
+- Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control
+  the stopping criterion. By Santi Villalba. (:issue:`5186`)
+
+- Added optional parameter ``random_state`` in :class:`linear_model.Ridge`
+  , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_.
+
+- Added optional parameter ``warm_start`` in
+  :class:`linear_model.LogisticRegression`. If set to True, the solvers
+  ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the
+  coefficients computed in the previous fit. By `Tom Dupre la Tour`_.
+
+- Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for
+  the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_.
+  Support added to the ``liblinear`` solver. By `Manoj Kumar`_.
+
+- Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor`
+  and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior
+  the same. This allows gradient boosters to turn off presorting when building
+  deep trees or using sparse data. By :user:`Jacob Schreiber <jmschrei>`.
+
+- Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by
+  default. By :user:`Graham Clenaghan <gclenaghan>`.
+
+- Added :class:`feature_selection.SelectFromModel` meta-transformer which can
+  be used along with estimators that have `coef_` or `feature_importances_`
+  attribute to select important features of the input data. By
+  :user:`Maheshakya Wijewardena <maheshakya>`, `Joel Nothman`_ and `Manoj Kumar`_.
+
+- Added :func:`metrics.pairwise.laplacian_kernel`.  By `Clyde Fare <https://github.com/Clyde-fare>`_.
+
+- :class:`covariance.GraphLasso` allows separate control of the convergence criterion
+  for the Elastic-Net subproblem via  the ``enet_tol`` parameter.
+
+- Improved verbosity in :class:`decomposition.DictionaryLearning`.
+
+- :class:`ensemble.RandomForestClassifier` and
+  :class:`ensemble.RandomForestRegressor` no longer explicitly store the
+  samples used in bagging, resulting in a much reduced memory footprint for
+  storing random forest models.
+
+- Added ``positive`` option to :class:`linear_model.Lars` and
+  :func:`linear_model.lars_path` to force coefficients to be positive.
+  (:issue:`5131`)
+
+- Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances`
+  to provide precomputed squared norms for ``X``.
+
+- Added the ``fit_predict`` method to :class:`pipeline.Pipeline`.
+
+- Added the :func:`preprocessing.min_max_scale` function.
+
+Bug fixes
+.........
+
+- Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse
+  multi-label output. By `Andreas Müller`_.
+
+- Fixed the output shape of :class:`linear_model.RANSACRegressor` to
+  ``(n_samples, )``. By `Andreas Müller`_.
+
+- Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By
+  `Andreas Müller`_.
+
+- Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a
+  lot of memory for large discrete grids. By `Joel Nothman`_.
+
+- Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored
+  in the final fit. By `Manoj Kumar`_.
+
+- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
+  oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan <ankurankan>`.
+
+- All regressors now consistently handle and warn when given ``y`` that is of
+  shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin.
+  (:issue:`5431`)
+
+- Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by
+  `Lars Buitinck`_.
+
+- Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance
+  matrices when using shrinkage. By `Martin Billinger`_.
+
+- Fixed :func:`cross_validation.cross_val_predict` for estimators with
+  sparse predictions. By Buddha Prakash.
+
+- Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression`
+  to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_.
+  (:issue:`5182`)
+
+- Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier`
+  when called with ``average=True``. By :user:`Andrew Lamb <andylamb>`.
+  (:issue:`5282`)
+
+- Dataset fetchers use different filenames under Python 2 and Python 3 to
+  avoid pickling compatibility issues. By `Olivier Grisel`_.
+  (:issue:`5355`)
+
+- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification
+  results to depend on scale. By `Jake Vanderplas`_.
+
+- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect
+  when fitting the intercept in the case of sparse data. The fix
+  automatically changes the solver to 'sag' in this case.
+  :issue:`5360` by `Tom Dupre la Tour`_.
+
+- Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data
+  with a large number of features and fewer samples. (:issue:`4478`)
+  By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini <giorgiop>`.
+
+- Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and
+  platform dependent output, and failed on `fit_transform`.
+  By :user:`Arthur Mensch <arthurmensch>`.
+
+- Fixes to the ``Bunch`` class used to store datasets.
+
+- Fixed :func:`ensemble.plot_partial_dependence` ignoring the
+  ``percentiles`` parameter.
+
+- Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer
+  leads to inconsistent results when pickling.
+
+- Fixed the conditions on when a precomputed Gram matrix needs to
+  be recomputed in :class:`linear_model.LinearRegression`,
+  :class:`linear_model.OrthogonalMatchingPursuit`,
+  :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`.
+
+- Fixed inconsistent memory layout in the coordinate descent solver
+  that affected :class:`linear_model.DictionaryLearning` and
+  :class:`covariance.GraphLasso`. (:issue:`5337`)
+  By `Olivier Grisel`_.
+
+- :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg``
+  parameter.
+
+- Nearest Neighbor estimators with custom distance metrics can now be pickled.
+  (:issue:`4362`)
+
+- Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights``
+  were not properly handled when performing grid-searches.
+
+- Fixed a bug in :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.LogisticRegressionCV` when using
+  ``class_weight='balanced'```or ``class_weight='auto'``.
+  By `Tom Dupre la Tour`_.
+
+- Fixed bug :issue:`5495` when
+  doing OVR(SVC(decision_function_shape="ovr")). Fixed by
+  :user:`Elvis Dohmatob <dohmatob>`.
+
+
+API changes summary
+-------------------
+- Attribute `data_min`, `data_max` and `data_range` in
+  :class:`preprocessing.MinMaxScaler` are deprecated and won't be available
+  from 0.19. Instead, the class now exposes `data_min_`, `data_max_`
+  and `data_range_`. By :user:`Giorgio Patrini <giorgiop>`.
+
+- All Scaler classes now have an `scale_` attribute, the feature-wise
+  rescaling applied by their `transform` methods. The old attribute `std_`
+  in :class:`preprocessing.StandardScaler` is deprecated and superseded
+  by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini <giorgiop>`.
+
+- :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape``
+  parameter to make their decision function of shape ``(n_samples, n_classes)``
+  by setting ``decision_function_shape='ovr'``. This will be the default behavior
+  starting in 0.19. By `Andreas Müller`_.
+
+- Passing 1D data arrays as input to estimators is now deprecated as it
+  caused confusion in how the array elements should be interpreted
+  as features or as samples. All data arrays are now expected
+  to be explicitly shaped ``(n_samples, n_features)``.
+  By :user:`Vighnesh Birodkar <vighneshbirodkar>`.
+
+- :class:`lda.LDA` and :class:`qda.QDA` have been moved to
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` and
+  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
+
+- The ``store_covariance`` and ``tol`` parameters have been moved from
+  the fit method to the constructor in
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the
+  ``store_covariances`` and ``tol`` parameters have been moved from the
+  fit method to the constructor in
+  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
+
+- Models inheriting from ``_LearntSelectorMixin`` will no longer support the
+  transform methods. (i.e,  RandomForests, GradientBoosting, LogisticRegression,
+  DecisionTrees, SVMs and SGD related models). Wrap these models around the
+  metatransfomer :class:`feature_selection.SelectFromModel` to remove
+  features (according to `coefs_` or `feature_importances_`)
+  which are below a certain threshold value instead.
+
+- :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence,
+  to ensure consistency of ``predict(X)`` and ``labels_``. By
+  :user:`Vighnesh Birodkar <vighneshbirodkar>`.
+
+- Classifier and Regressor models are now tagged as such using the
+  ``_estimator_type`` attribute.
+
+- Cross-validation iterators always provide indices into training and test set,
+  not boolean masks.
+
+- The ``decision_function`` on all regressors was deprecated and will be
+  removed in 0.19.  Use ``predict`` instead.
+
+- :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19.
+  Use :func:`datasets.fetch_lfw_pairs` instead.
+
+- The deprecated ``hmm`` module was removed.
+
+- The deprecated ``Bootstrap`` cross-validation iterator was removed.
+
+- The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed.
+  Use :class:`clustering.AgglomerativeClustering` instead.
+
+- :func:`cross_validation.check_cv` is now a public function.
+
+- The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated
+  and will be removed in 0.19.
+
+- The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved
+  to the constructor.
+
+- Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit``
+  method. Use the construction parameter instead.
+
+- The deprecated support for the sequence of sequences (or list of lists) multilabel
+  format was removed. To convert to and from the supported binary
+  indicator matrix format, use
+  :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
+
+- The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will
+  change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input.
+
+- The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of
+  :class:`preprocessing.LabelBinarizer` were removed.
+
+- Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the
+  gamma to ``1. / n_features`` is deprecated and will be removed in 0.19.
+  Use ``gamma="auto"`` instead.
+
+Code Contributors
+-----------------
+Aaron Schumacher, Adithya Ganesh, akitty, Alexandre Gramfort, Alexey Grigorev,
+Ali Baharev, Allen Riddell, Ando Saabas, Andreas Mueller, Andrew Lamb, Anish
+Shah, Ankur Ankan, Anthony Erlinger, Ari Rouvinen, Arnaud Joly, Arnaud Rachez,
+Arthur Mensch, banilo, Barmaley.exe, benjaminirving, Boyuan Deng, Brett Naul,
+Brian McFee, Buddha Prakash, Chi Zhang, Chih-Wei Chang, Christof Angermueller,
+Christoph Gohlke, Christophe Bourguignat, Christopher Erick Moody, Chyi-Kwei
+Yau, Cindy Sridharan, CJ Carey, Clyde-fare, Cory Lorenz, Dan Blanchard, Daniel
+Galvez, Daniel Kronovet, Danny Sullivan, Data1010, David, David D Lowe, David
+Dotson, djipey, Dmitry Spikhalskiy, Donne Martin, Dougal J. Sutherland, Dougal
+Sutherland, edson duarte, Eduardo Caro, Eric Larson, Eric Martin, Erich
+Schubert, Fernando Carrillo, Frank C. Eckert, Frank Zalkow, Gael Varoquaux,
+Ganiev Ibraim, Gilles Louppe, Giorgio Patrini, giorgiop, Graham Clenaghan,
+Gryllos Prokopis, gwulfs, Henry Lin, Hsuan-Tien Lin, Immanuel Bayer, Ishank
+Gulati, Jack Martin, Jacob Schreiber, Jaidev Deshpande, Jake Vanderplas, Jan
+Hendrik Metzen, Jean Kossaifi, Jeffrey04, Jeremy, jfraj, Jiali Mei,
+Joe Jevnik, Joel Nothman, John Kirkham, John Wittenauer, Joseph, Joshua Loyal,
+Jungkook Park, KamalakerDadi, Kashif Rasul, Keith Goodman, Kian Ho, Konstantin
+Shmelkov, Kyler Brown, Lars Buitinck, Lilian Besson, Loic Esteve, Louis Tiao,
+maheshakya, Maheshakya Wijewardena, Manoj Kumar, MarkTab marktab.net, Martin
+Ku, Martin Spacek, MartinBpr, martinosorb, MaryanMorel, Masafumi Oyamada,
+Mathieu Blondel, Matt Krump, Matti Lyra, Maxim Kolganov, mbillinger, mhg,
+Michael Heilman, Michael Patterson, Miroslav Batchkarov, Nelle Varoquaux,
+Nicolas, Nikolay Mayorov, Olivier Grisel, Omer Katz, Óscar Nájera, Pauli
+Virtanen, Peter Fischer, Peter Prettenhofer, Phil Roth, pianomania, Preston
+Parry, Raghav RV, Rob Zinkov, Robert Layton, Rohan Ramanath, Saket Choudhary,
+Sam Zhang, santi, saurabh.bansod, scls19fr, Sebastian Raschka, Sebastian
+Saeger, Shivan Sornarajah, SimonPL, sinhrks, Skipper Seabold, Sonny Hu, sseg,
+Stephen Hoover, Steven De Gryze, Steven Seguin, Theodore Vasiloudis, Thomas
+Unterthiner, Tiago Freitas Pereira, Tian Wang, Tim Head, Timothy Hopper,
+tokoroten, Tom Dupré la Tour, Trevor Stephens, Valentin Stolbunov, Vighnesh
+Birodkar, Vinayak Mehta, Vincent, Vincent Michel, vstolbunov, wangz10, Wei Xue,
+Yucheng Low, Yury Zhauniarovich, Zac Stewart, zhai_pro, Zichen Wang
+
+.. _changes_0_1_16:
+
+Version 0.16.1
+===============
+
+**April 14, 2015**
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+- Allow input data larger than ``block_size`` in
+  :class:`covariance.LedoitWolf` by `Andreas Müller`_.
+
+- Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that
+  caused unstable result in :class:`calibration.CalibratedClassifierCV` by
+  `Jan Hendrik Metzen`_.
+
+- Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman.
+
+- Fix several stability and convergence issues in
+  :class:`cross_decomposition.CCA` and
+  :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_
+
+- Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False``
+  on fortran-ordered data.
+
+- Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict``
+  and ``predict_proba`` by `Andreas Müller`_.
+
+- Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_
+
+.. _changes_0_16:
+
+Version 0.16
+============
+
+**March 26, 2015**
+
+Highlights
+-----------
+
+- Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory
+  requirements, bug-fixes and better default settings.
+
+- Multinomial Logistic regression and a path algorithm in
+  :class:`linear_model.LogisticRegressionCV`.
+
+- Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`.
+
+- Probability callibration of classifiers using
+  :class:`calibration.CalibratedClassifierCV`.
+
+- :class:`cluster.Birch` clustering method for large-scale datasets.
+
+- Scalable approximate nearest neighbors search with Locality-sensitive
+  hashing forests in :class:`neighbors.LSHForest`.
+
+- Improved error messages and better validation when using malformed input data.
+
+- More robust integration with pandas dataframes.
+
+Changelog
+---------
+
+New features
+............
+
+- The new :class:`neighbors.LSHForest` implements locality-sensitive hashing
+  for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena<maheshakya>`.
+
+- Added :class:`svm.LinearSVR`. This class uses the liblinear implementation
+  of Support Vector Regression which is much faster for large
+  sample sizes than :class:`svm.SVR` with linear kernel. By
+  `Fabian Pedregosa`_ and Qiang Luo.
+
+- Incremental fit for :class:`GaussianNB <naive_bayes.GaussianNB>`.
+
+- Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and
+  :class:`dummy.DummyRegressor`. By `Arnaud Joly`_.
+
+- Added the :func:`metrics.label_ranking_average_precision_score` metrics.
+  By `Arnaud Joly`_.
+
+- Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_.
+
+- Added :class:`linear_model.LogisticRegressionCV`. By
+  `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_
+  and `Alexandre Gramfort`_.
+
+- Added ``warm_start`` constructor parameter to make it possible for any
+  trained forest model to grow additional trees incrementally. By
+  :user:`Laurent Direr<ldirer>`.
+
+- Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_.
+
+- Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA
+  algorithm that supports out-of-core learning with a ``partial_fit``
+  method. By `Kyle Kastner`_.
+
+- Averaged SGD for :class:`SGDClassifier <linear_model.SGDClassifier>`
+  and :class:`SGDRegressor <linear_model.SGDRegressor>` By
+  :user:`Danny Sullivan <dsullivan7>`.
+
+- Added :func:`cross_val_predict <cross_validation.cross_val_predict>`
+  function which computes cross-validated estimates. By `Luis Pedro Coelho`_
+
+- Added :class:`linear_model.TheilSenRegressor`, a robust
+  generalized-median-based estimator. By :user:`Florian Wilhelm <FlorianWilhelm>`.
+
+- Added :func:`metrics.median_absolute_error`, a robust metric.
+  By `Gael Varoquaux`_ and :user:`Florian Wilhelm <FlorianWilhelm>`.
+
+- Add :class:`cluster.Birch`, an online clustering algorithm. By
+  `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_.
+
+- Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  using two new solvers. By :user:`Clemens Brunner <cle1109>` and `Martin Billinger`_.
+
+- Added :class:`kernel_ridge.KernelRidge`, an implementation of
+  kernelized ridge regression.
+  By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_.
+
+- All solvers in :class:`linear_model.Ridge` now support `sample_weight`.
+  By `Mathieu Blondel`_.
+
+- Added :class:`cross_validation.PredefinedSplit` cross-validation
+  for fixed user-provided cross-validation folds.
+  By :user:`Thomas Unterthiner <untom>`.
+
+- Added :class:`calibration.CalibratedClassifierCV`, an approach for
+  calibrating the predicted probabilities of a classifier.
+  By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_
+  and :user:`Balazs Kegl <kegl>`.
+
+
+Enhancements
+............
+
+- Add option ``return_distance`` in :func:`hierarchical.ward_tree`
+  to return distances between nodes for both structured and unstructured
+  versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_.
+  The same option was added in :func:`hierarchical.linkage_tree`.
+  By `Manoj Kumar`_
+
+- Add support for sample weights in scorer objects.  Metrics with sample
+  weight support will automatically benefit from it. By `Noel Dawe`_ and
+  `Vlad Niculae`_.
+
+- Added ``newton-cg`` and `lbfgs` solver support in
+  :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_.
+
+- Add ``selection="random"`` parameter to implement stochastic coordinate
+  descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet`
+  and related. By `Manoj Kumar`_.
+
+- Add ``sample_weight`` parameter to
+  :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`.
+  By :user:`Jatin Shah <jatinshah>`.
+
+- Support sparse multilabel indicator representation in
+  :class:`preprocessing.LabelBinarizer` and
+  :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi <hamsal>` with thanks
+  to Rohit Sivaprasad), as well as evaluation metrics (by
+  `Joel Nothman`_).
+
+- Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`.
+  By `Jatin Shah`.
+
+- Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None``
+  as optional parameter. By `Saurabh Jha`.
+
+- Add ``sample_weight`` parameter to `metrics.hinge_loss`.
+  By `Saurabh Jha`.
+
+- Add ``multi_class="multinomial"`` option in
+  :class:`linear_model.LogisticRegression` to implement a Logistic
+  Regression solver that minimizes the cross-entropy or multinomial loss
+  instead of the default One-vs-Rest setting. Supports `lbfgs` and
+  `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option
+  `newton-cg` by Simon Wu.
+
+- ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a
+  single pass, when giving the option ``sort=False``. By :user:`Dan
+  Blanchard <dan-blanchard>`.
+
+- :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be
+  configured to work with estimators that may fail and raise errors on
+  individual folds. This option is controlled by the `error_score`
+  parameter. This does not affect errors raised on re-fit. By
+  :user:`Michal Romaniuk <romaniukm>`.
+
+- Add ``digits`` parameter to `metrics.classification_report` to allow
+  report to show different precision of floating point numbers. By
+  :user:`Ian Gilmore <agileminor>`.
+
+- Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`.
+  By :user:`Aaron Staple <staple>`.
+
+- Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to
+  handle unknown categorical features more gracefully during transform.
+  By `Manoj Kumar`_.
+
+- Added support for sparse input data to decision trees and their ensembles.
+  By `Fares Hedyati`_ and `Arnaud Joly`_.
+
+- Optimized :class:`cluster.AffinityPropagation` by reducing the number of
+  memory allocations of large temporary data-structures. By `Antony Lee`_.
+
+- Parellization of the computation of feature importances in random forest.
+  By `Olivier Grisel`_ and `Arnaud Joly`_.
+
+- Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute
+  in their constructor. By `Manoj Kumar`_.
+
+- Added decision function for :class:`multiclass.OneVsOneClassifier`
+  By `Raghav RV`_ and :user:`Kyle Beauchamp <kyleabeauchamp>`.
+
+- :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph`
+  support non-Euclidean metrics. By `Manoj Kumar`_
+
+- Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering`
+  and family now accept callables that return a connectivity matrix.
+  By `Manoj Kumar`_.
+
+- Sparse support for :func:`paired_distances`. By `Joel Nothman`_.
+
+- :class:`cluster.DBSCAN` now supports sparse input and sample weights and
+  has been optimized: the inner loop has been rewritten in Cython and
+  radius neighbors queries are now computed in batch. By `Joel Nothman`_
+  and `Lars Buitinck`_.
+
+- Add ``class_weight`` parameter to automatically weight samples by class
+  frequency for :class:`ensemble.RandomForestClassifier`,
+  :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier`
+  and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_.
+
+- :class:`grid_search.RandomizedSearchCV` now does sampling without
+  replacement if all parameters are given as lists. By `Andreas Müller`_.
+
+- Parallelized calculation of :func:`pairwise_distances` is now supported
+  for scipy metrics and custom callables. By `Joel Nothman`_.
+
+- Allow the fitting and scoring of all clustering algorithms in
+  :class:`pipeline.Pipeline`. By `Andreas Müller`_.
+
+- More robust seeding and improved error messages in :class:`cluster.MeanShift`
+  by `Andreas Müller`_.
+
+- Make the stopping criterion for :class:`mixture.GMM`,
+  :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the
+  number of samples by thresholding the average log-likelihood change
+  instead of its sum over all samples. By `Hervé Bredin`_.
+
+- The outcome of :func:`manifold.spectral_embedding` was made deterministic
+  by flipping the sign of eigenvectors. By :user:`Hasil Sharma <Hasil-Sharma>`.
+
+- Significant performance and memory usage improvements in
+  :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_.
+
+- Numerical stability improvements for :class:`preprocessing.StandardScaler`
+  and :func:`preprocessing.scale`. By `Nicolas Goix`_
+
+- :class:`svm.SVC` fitted on sparse input now implements ``decision_function``.
+  By `Rob Zinkov`_ and `Andreas Müller`_.
+
+- :func:`cross_validation.train_test_split` now preserves the input type,
+  instead of converting to numpy arrays.
+
+
+Documentation improvements
+..........................
+
+- Added example of using :class:`FeatureUnion` for heterogeneous input.
+  By :user:`Matt Terry <mrterry>`
+
+- Documentation on scorers was improved, to highlight the handling of loss
+  functions. By :user:`Matt Pico <MattpSoftware>`.
+
+- A discrepancy between liblinear output and scikit-learn's wrappers
+  is now noted. By `Manoj Kumar`_.
+
+- Improved documentation generation: examples referring to a class or
+  function are now shown in a gallery on the class/function's API reference
+  page. By `Joel Nothman`_.
+
+- More explicit documentation of sample generators and of data
+  transformation. By `Joel Nothman`_.
+
+- :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree`
+  used to point to empty pages stating that they are aliases of BinaryTree.
+  This has been fixed to show the correct class docs. By `Manoj Kumar`_.
+
+- Added silhouette plots for analysis of KMeans clustering using
+  :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`.
+  See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`
+
+Bug fixes
+.........
+- Metaestimators now support ducktyping for the presence of ``decision_function``,
+  ``predict_proba`` and other methods. This fixes behavior of
+  :class:`grid_search.GridSearchCV`,
+  :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`,
+  :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested.
+  By `Joel Nothman`_
+
+- The ``scoring`` attribute of grid-search and cross-validation methods is no longer
+  ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or
+  the base estimator doesn't have predict.
+
+- The function :func:`hierarchical.ward_tree` now returns the children in
+  the same order for both the structured and unstructured versions. By
+  `Matteo Visconti di Oleggio Castello`_.
+
+- :class:`feature_selection.RFECV` now correctly handles cases when
+  ``step`` is not equal to 1. By :user:`Nikolay Mayorov <nmayorov>`
+
+- The :class:`decomposition.PCA` now undoes whitening in its
+  ``inverse_transform``. Also, its ``components_`` now always have unit
+  length. By :user:`Michael Eickenberg <eickenberg>`.
+
+- Fix incomplete download of the dataset when
+  :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_.
+
+- Various fixes to the Gaussian processes subpackage by Vincent Dubourg
+  and Jan Hendrik Metzen.
+
+- Calling ``partial_fit`` with ``class_weight=='auto'`` throws an
+  appropriate error message and suggests a work around.
+  By :user:`Danny Sullivan <dsullivan7>`.
+
+- :class:`RBFSampler <kernel_approximation.RBFSampler>` with ``gamma=g``
+  formerly approximated :func:`rbf_kernel <metrics.pairwise.rbf_kernel>`
+  with ``gamma=g/2.``; the definition of ``gamma`` is now consistent,
+  which may substantially change your results if you use a fixed value.
+  (If you cross-validated over ``gamma``, it probably doesn't matter
+  too much.) By :user:`Dougal Sutherland <dougalsutherland>`.
+
+- Pipeline object delegate the ``classes_`` attribute to the underlying
+  estimator. It allows, for instance, to make bagging of a pipeline object.
+  By `Arnaud Joly`_
+
+- :class:`neighbors.NearestCentroid` now uses the median as the centroid
+  when metric is set to ``manhattan``. It was using the mean before.
+  By `Manoj Kumar`_
+
+- Fix numerical stability issues in :class:`linear_model.SGDClassifier`
+  and :class:`linear_model.SGDRegressor` by clipping large gradients and
+  ensuring that weight decay rescaling is always positive (for large
+  l2 regularization and large learning rate values).
+  By `Olivier Grisel`_
+
+- When `compute_full_tree` is set to "auto", the full tree is
+  built when n_clusters is high and is early stopped when n_clusters is
+  low, while the behavior should be vice-versa in
+  :class:`cluster.AgglomerativeClustering` (and friends).
+  This has been fixed By `Manoj Kumar`_
+
+- Fix lazy centering of data in :func:`linear_model.enet_path` and
+  :func:`linear_model.lasso_path`. It was centered around one. It has
+  been changed to be centered around the origin. By `Manoj Kumar`_
+
+- Fix handling of precomputed affinity matrices in
+  :class:`cluster.AgglomerativeClustering` when using connectivity
+  constraints. By :user:`Cathy Deng <cathydeng>`
+
+- Correct ``partial_fit`` handling of ``class_prior`` for
+  :class:`sklearn.naive_bayes.MultinomialNB` and
+  :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_.
+
+- Fixed a crash in :func:`metrics.precision_recall_fscore_support`
+  when using unsorted ``labels`` in the multi-label setting.
+  By `Andreas Müller`_.
+
+- Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``,
+  ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in
+  :class:`sklearn.neighbors.NearestNeighbors` and family, when the query
+  data is not the same as fit data. By `Manoj Kumar`_.
+
+- Fix log-density calculation in the :class:`mixture.GMM` with
+  tied covariance. By `Will Dawson`_
+
+- Fixed a scaling error in :class:`feature_selection.SelectFdr`
+  where a factor ``n_features`` was missing. By `Andrew Tulloch`_
+
+- Fix zero division in :class:`neighbors.KNeighborsRegressor` and related
+  classes when using distance weighting and having identical data points.
+  By `Garret-R <https://github.com/Garrett-R>`_.
+
+- Fixed round off errors with non positive-definite covariance matrices
+  in GMM. By :user:`Alexis Mignon <AlexisMignon>`.
+
+- Fixed a error in the computation of conditional probabilities in
+  :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_.
+
+- Make the method ``radius_neighbors`` of
+  :class:`neighbors.NearestNeighbors` return the samples lying on the
+  boundary for ``algorithm='brute'``. By `Yan Yi`_.
+
+- Flip sign of ``dual_coef_`` of :class:`svm.SVC`
+  to make it consistent with the documentation and
+  ``decision_function``. By Artem Sobolev.
+
+- Fixed handling of ties in :class:`isotonic.IsotonicRegression`.
+  We now use the weighted average of targets (secondary method). By
+  `Andreas Müller`_ and `Michael Bommarito <http://bommaritollc.com/>`_.
+
+API changes summary
+-------------------
+
+- :class:`GridSearchCV <grid_search.GridSearchCV>` and
+  :func:`cross_val_score <cross_validation.cross_val_score>` and other
+  meta-estimators don't convert pandas DataFrames into arrays any more,
+  allowing DataFrame specific operations in custom estimators.
+
+- :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`,
+  :func:`predict_proba_ovr`,
+  :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`,
+  :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc`
+  are deprecated. Use the underlying estimators instead.
+
+- Nearest neighbors estimators used to take arbitrary keyword arguments
+  and pass these to their distance metric. This will no longer be supported
+  in scikit-learn 0.18; use the ``metric_params`` argument instead.
+
+- `n_jobs` parameter of the fit method shifted to the constructor of the
+       LinearRegression class.
+
+- The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier`
+  now returns two probabilities per sample in the multiclass case; this
+  is consistent with other estimators and with the method's documentation,
+  but previous versions accidentally returned only the positive
+  probability. Fixed by Will Lamond and `Lars Buitinck`_.
+
+- Change default value of precompute in :class:`ElasticNet` and :class:`Lasso`
+  to False. Setting precompute to "auto" was found to be slower when
+  n_samples > n_features since the computation of the Gram matrix is
+  computationally expensive and outweighs the benefit of fitting the Gram
+  for just one alpha.
+  ``precompute="auto"`` is now deprecated and will be removed in 0.18
+  By `Manoj Kumar`_.
+
+- Expose ``positive`` option in :func:`linear_model.enet_path` and
+  :func:`linear_model.enet_path` which constrains coefficients to be
+  positive. By `Manoj Kumar`_.
+
+- Users should now supply an explicit ``average`` parameter to
+  :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`,
+  :func:`sklearn.metrics.recall_score` and
+  :func:`sklearn.metrics.precision_score` when performing multiclass
+  or multilabel (i.e. not binary) classification. By `Joel Nothman`_.
+
+- `scoring` parameter for cross validation now accepts `'f1_micro'`,
+  `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification
+  only. Similar changes apply to `'precision'` and `'recall'`.
+  By `Joel Nothman`_.
+
+- The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in
+  :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have
+  been removed. They were deprecated since 0.14
+
+- From now onwards, all estimators will uniformly raise ``NotFittedError``
+  (:class:`utils.validation.NotFittedError`), when any of the ``predict``
+  like methods are called before the model is fit. By `Raghav RV`_.
+
+- Input data validation was refactored for more consistent input
+  validation. The ``check_arrays`` function was replaced by ``check_array``
+  and ``check_X_y``. By `Andreas Müller`_.
+
+- Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``,
+  ``kneighbors_graph`` and ``radius_neighbors_graph`` in
+  :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None,
+  then for every sample this avoids setting the sample itself as the
+  first nearest neighbor. By `Manoj Kumar`_.
+
+- Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph`
+  and :func:`neighbors.radius_neighbors_graph` which has to be explicitly
+  set by the user. If set to True, then the sample itself is considered
+  as the first nearest neighbor.
+
+- `thresh` parameter is deprecated in favor of new `tol` parameter in
+  :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements`
+  section for details. By `Hervé Bredin`_.
+
+- Estimators will treat input with dtype object as numeric when possible.
+  By `Andreas Müller`_
+
+- Estimators now raise `ValueError` consistently when fitted on empty
+  data (less than 1 sample or less than 1 feature for 2D input).
+  By `Olivier Grisel`_.
+
+
+- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`,
+  :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`,
+  :class:`linear_model.PassiveAgressiveClassifier` and
+  :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``.
+
+- :class:`cluster.DBSCAN` now uses a deterministic initialization. The
+  `random_state` parameter is deprecated. By :user:`Erich Schubert <kno10>`.
+
+Code Contributors
+-----------------
+A. Flaxman, Aaron Schumacher, Aaron Staple, abhishek thakur, Akshay, akshayah3,
+Aldrian Obaja, Alexander Fabisch, Alexandre Gramfort, Alexis Mignon, Anders
+Aagaard, Andreas Mueller, Andreas van Cranenburgh, Andrew Tulloch, Andrew
+Walker, Antony Lee, Arnaud Joly, banilo, Barmaley.exe, Ben Davies, Benedikt
+Koehler, bhsu, Boris Feld, Borja Ayerdi, Boyuan Deng, Brent Pedersen, Brian
+Wignall, Brooke Osborn, Calvin Giles, Cathy Deng, Celeo, cgohlke, chebee7i,
+Christian Stade-Schuldt, Christof Angermueller, Chyi-Kwei Yau, CJ Carey,
+Clemens Brunner, Daiki Aminaka, Dan Blanchard, danfrankj, Danny Sullivan, David
+Fletcher, Dmitrijs Milajevs, Dougal J. Sutherland, Erich Schubert, Fabian
+Pedregosa, Florian Wilhelm, floydsoft, Félix-Antoine Fortin, Gael Varoquaux,
+Garrett-R, Gilles Louppe, gpassino, gwulfs, Hampus Bengtsson, Hamzeh Alsalhi,
+Hanna Wallach, Harry Mavroforakis, Hasil Sharma, Helder, Herve Bredin,
+Hsiang-Fu Yu, Hugues SALAMIN, Ian Gilmore, Ilambharathi Kanniah, Imran Haque,
+isms, Jake VanderPlas, Jan Dlabal, Jan Hendrik Metzen, Jatin Shah, Javier López
+Peña, jdcaballero, Jean Kossaifi, Jeff Hammerbacher, Joel Nothman, Jonathan
+Helmus, Joseph, Kaicheng Zhang, Kevin Markham, Kyle Beauchamp, Kyle Kastner,
+Lagacherie Matthieu, Lars Buitinck, Laurent Direr, leepei, Loic Esteve, Luis
+Pedro Coelho, Lukas Michelbacher, maheshakya, Manoj Kumar, Manuel, Mario
+Michael Krell, Martin, Martin Billinger, Martin Ku, Mateusz Susik, Mathieu
+Blondel, Matt Pico, Matt Terry, Matteo Visconti dOC, Matti Lyra, Max Linke,
+Mehdi Cherti, Michael Bommarito, Michael Eickenberg, Michal Romaniuk, MLG,
+mr.Shu, Nelle Varoquaux, Nicola Montecchio, Nicolas, Nikolay Mayorov, Noel
+Dawe, Okal Billy, Olivier Grisel, Óscar Nájera, Paolo Puggioni, Peter
+Prettenhofer, Pratap Vardhan, pvnguyen, queqichao, Rafael Carrascosa, Raghav R
+V, Rahiel Kasim, Randall Mason, Rob Zinkov, Robert Bradshaw, Saket Choudhary,
+Sam Nicholls, Samuel Charron, Saurabh Jha, sethdandridge, sinhrks, snuderl,
+Stefan Otte, Stefan van der Walt, Steve Tjoa, swu, Sylvain Zimmer, tejesh95,
+terrycojones, Thomas Delteil, Thomas Unterthiner, Tomas Kazmar, trevorstephens,
+tttthomasssss, Tzu-Ming Kuo, ugurcaliskan, ugurthemaster, Vinayak Mehta,
+Vincent Dubourg, Vjacheslav Murashkin, Vlad Niculae, wadawson, Wei Xue, Will
+Lamond, Wu Jiang, x0l, Xinfan Meng, Yan Yi, Yu-Chin
+
+.. _changes_0_15_2:
+
+Version 0.15.2
+==============
+
+**September 4, 2014**
+
+Bug fixes
+---------
+
+- Fixed handling of the ``p`` parameter of the Minkowski distance that was
+  previously ignored in nearest neighbors models. By :user:`Nikolay
+  Mayorov <nmayorov>`.
+
+- Fixed duplicated alphas in :class:`linear_model.LassoLars` with early
+  stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_.
+
+- Fixed the build under Windows when scikit-learn is built with MSVC while
+  NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico
+  Vaggi <FedericoV>`.
+
+- Fixed an array index overflow bug in the coordinate descent solver. By
+  `Gael Varoquaux`_.
+
+- Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_.
+
+- Removed unnecessary data copy in :class:`cluster.KMeans`.
+  By `Gael Varoquaux`_.
+
+- Explicitly close open files to avoid ``ResourceWarnings`` under Python 3.
+  By Calvin Giles.
+
+- The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  now projects the input on the most discriminant directions. By Martin Billinger.
+
+- Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_.
+
+- Performance optimization in :class:`isotonic.IsotonicRegression`.
+  By Robert Bradshaw.
+
+- ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for
+  running the tests. By `Joel Nothman`_.
+
+- Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_
+  :user:`Matt Pico <MattpSoftware>`, and others.
+
+.. _changes_0_15_1:
+
+Version 0.15.1
+==============
+
+**August 1, 2014**
+
+Bug fixes
+---------
+
+- Made :func:`cross_validation.cross_val_score` use
+  :class:`cross_validation.KFold` instead of
+  :class:`cross_validation.StratifiedKFold` on multi-output classification
+  problems. By :user:`Nikolay Mayorov <nmayorov>`.
+
+- Support unseen labels :class:`preprocessing.LabelBinarizer` to restore
+  the default behavior of 0.14.1 for backward compatibility. By
+  :user:`Hamzeh Alsalhi <hamsal>`.
+
+- Fixed the :class:`cluster.KMeans` stopping criterion that prevented early
+  convergence detection. By Edward Raff and `Gael Varoquaux`_.
+
+- Fixed the behavior of :class:`multiclass.OneVsOneClassifier`.
+  in case of ties at the per-class vote level by computing the correct
+  per-class sum of prediction scores. By `Andreas Müller`_.
+
+- Made :func:`cross_validation.cross_val_score` and
+  :class:`grid_search.GridSearchCV` accept Python lists as input data.
+  This is especially useful for cross-validation and model selection of
+  text processing pipelines. By `Andreas Müller`_.
+
+- Fixed data input checks of most estimators to accept input data that
+  implements the NumPy ``__array__`` protocol. This is the case for
+  for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of
+  pandas. By `Gael Varoquaux`_.
+
+- Fixed a regression for :class:`linear_model.SGDClassifier` with
+  ``class_weight="auto"`` on data with non-contiguous labels. By
+  `Olivier Grisel`_.
+
+
+.. _changes_0_15:
+
+Version 0.15
+============
+
+**July 15, 2014**
+
+Highlights
+-----------
+
+- Many speed and memory improvements all across the code
+
+- Huge speed and memory improvements to random forests (and extra
+  trees) that also benefit better from parallel computing.
+
+- Incremental fit to :class:`BernoulliRBM <neural_network.BernoulliRBM>`
+
+- Added :class:`cluster.AgglomerativeClustering` for hierarchical
+  agglomerative clustering with average linkage, complete linkage and
+  ward strategies.
+
+- Added :class:`linear_model.RANSACRegressor` for robust regression
+  models.
+
+- Added dimensionality reduction with :class:`manifold.TSNE` which can be
+  used to visualize high-dimensional data.
+
+
+Changelog
+---------
+
+New features
+............
+
+- Added :class:`ensemble.BaggingClassifier` and
+  :class:`ensemble.BaggingRegressor` meta-estimators for ensembling
+  any kind of base estimator. See the :ref:`Bagging <bagging>` section of
+  the user guide for details and examples. By `Gilles Louppe`_.
+
+- New unsupervised feature selection algorithm
+  :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_.
+
+- Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust
+  fitting of regression models. By :user:`Johannes Schönberger <ahojnnes>`.
+
+- Added :class:`cluster.AgglomerativeClustering` for hierarchical
+  agglomerative clustering with average linkage, complete linkage and
+  ward strategies, by  `Nelle Varoquaux`_ and `Gael Varoquaux`_.
+
+- Shorthand constructors :func:`pipeline.make_pipeline` and
+  :func:`pipeline.make_union` were added by `Lars Buitinck`_.
+
+- Shuffle option for :class:`cross_validation.StratifiedKFold`.
+  By :user:`Jeffrey Blackburne <jblackburne>`.
+
+- Incremental learning (``partial_fit``) for Gaussian Naive Bayes by
+  Imran Haque.
+
+- Added ``partial_fit`` to :class:`BernoulliRBM
+  <neural_network.BernoulliRBM>`
+  By :user:`Danny Sullivan <dsullivan7>`.
+
+- Added :func:`learning_curve <learning_curve.learning_curve>` utility to
+  chart performance with respect to training size. See
+  :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch.
+
+- Add positive option in :class:`LassoCV <linear_model.LassoCV>` and
+  :class:`ElasticNetCV <linear_model.ElasticNetCV>`.
+  By Brian Wignall and `Alexandre Gramfort`_.
+
+- Added :class:`linear_model.MultiTaskElasticNetCV` and
+  :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_.
+
+- Added :class:`manifold.TSNE`. By Alexander Fabisch.
+
+Enhancements
+............
+
+- Add sparse input support to :class:`ensemble.AdaBoostClassifier` and
+  :class:`ensemble.AdaBoostRegressor` meta-estimators.
+  By :user:`Hamzeh Alsalhi <hamsal>`.
+
+- Memory improvements of decision trees, by `Arnaud Joly`_.
+
+- Decision trees can now be built in best-first manner by using ``max_leaf_nodes``
+  as the stopping criteria. Refactored the tree code to use either a
+  stack or a priority queue for tree building.
+  By `Peter Prettenhofer`_ and `Gilles Louppe`_.
+
+- Decision trees can now be fitted on fortran- and c-style arrays, and
+  non-continuous arrays without the need to make a copy.
+  If the input array has a different dtype than ``np.float32``, a fortran-
+  style copy will be made since fortran-style memory layout has speed
+  advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_.
+
+- Speed improvement of regression trees by optimizing the
+  the computation of the mean square error criterion. This lead
+  to speed improvement of the tree, forest and gradient boosting tree
+  modules. By `Arnaud Joly`_
+
+- The ``img_to_graph`` and ``grid_tograph`` functions in
+  :mod:`sklearn.feature_extraction.image` now return ``np.ndarray``
+  instead of ``np.matrix`` when ``return_as=np.ndarray``.  See the
+  Notes section for more information on compatibility.
+
+- Changed the internal storage of decision trees to use a struct array.
+  This fixed some small bugs, while improving code and providing a small
+  speed gain. By `Joel Nothman`_.
+
+- Reduce memory usage and overhead when fitting and predicting with forests
+  of randomized trees in parallel with ``n_jobs != 1`` by leveraging new
+  threading backend of joblib 0.8 and releasing the GIL in the tree fitting
+  Cython code.  By `Olivier Grisel`_ and `Gilles Louppe`_.
+
+- Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module.
+  By `Gilles Louppe`_ and `Peter Prettenhofer`_.
+
+- Various enhancements to the  :mod:`sklearn.ensemble.gradient_boosting`
+  module: a ``warm_start`` argument to fit additional trees,
+  a ``max_leaf_nodes`` argument to fit GBM style trees,
+  a ``monitor`` fit argument to inspect the estimator during training, and
+  refactoring of the verbose code. By `Peter Prettenhofer`_.
+
+- Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values.
+  By `Arnaud Joly`_.
+
+- Faster depth-based tree building algorithm such as decision tree,
+  random forest, extra trees or gradient tree boosting (with depth based
+  growing strategy) by avoiding trying to split on found constant features
+  in the sample subset. By `Arnaud Joly`_.
+
+- Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based
+  methods: the minimum weighted fraction of the input samples required to be
+  at a leaf node. By `Noel Dawe`_.
+
+- Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais.
+
+- Added predict method to :class:`cluster.AffinityPropagation` and
+  :class:`cluster.MeanShift`, by `Mathieu Blondel`_.
+
+- Vector and matrix multiplications have been optimised throughout the
+  library by `Denis Engemann`_, and `Alexandre Gramfort`_.
+  In particular, they should take less memory with older NumPy versions
+  (prior to 1.7.2).
+
+- Precision-recall and ROC examples now use train_test_split, and have more
+  explanation of why these metrics are useful. By `Kyle Kastner`_
+
+- The training algorithm for :class:`decomposition.NMF` is faster for
+  sparse matrices and has much lower memory complexity, meaning it will
+  scale up gracefully to large datasets. By `Lars Buitinck`_.
+
+- Added svd_method option with default value to "randomized" to
+  :class:`decomposition.FactorAnalysis` to save memory and
+  significantly speedup computation by `Denis Engemann`_, and
+  `Alexandre Gramfort`_.
+
+- Changed :class:`cross_validation.StratifiedKFold` to try and
+  preserve as much of the original ordering of samples as possible so as
+  not to hide overfitting on datasets with a non-negligible level of
+  samples dependency.
+  By `Daniel Nouri`_ and `Olivier Grisel`_.
+
+- Add multi-output support to :class:`gaussian_process.GaussianProcess`
+  by John Novak.
+
+- Support for precomputed distance matrices in nearest neighbor estimators
+  by `Robert Layton`_ and `Joel Nothman`_.
+
+- Norm computations optimized for NumPy 1.6 and later versions by
+  `Lars Buitinck`_. In particular, the k-means algorithm no longer
+  needs a temporary data structure the size of its input.
+
+- :class:`dummy.DummyClassifier` can now be used to predict a constant
+  output value. By `Manoj Kumar`_.
+
+- :class:`dummy.DummyRegressor` has now a strategy parameter which allows
+  to predict the mean, the median of the training set or a constant
+  output value. By :user:`Maheshakya Wijewardena <maheshakya>`.
+
+- Multi-label classification output in multilabel indicator format
+  is now supported by :func:`metrics.roc_auc_score` and
+  :func:`metrics.average_precision_score` by `Arnaud Joly`_.
+
+- Significant performance improvements (more than 100x speedup for
+  large problems) in :class:`isotonic.IsotonicRegression` by
+  `Andrew Tulloch`_.
+
+- Speed and memory usage improvements to the SGD algorithm for linear
+  models: it now uses threads, not separate processes, when ``n_jobs>1``.
+  By `Lars Buitinck`_.
+
+- Grid search and cross validation allow NaNs in the input arrays so that
+  preprocessors such as :class:`preprocessing.Imputer
+  <preprocessing.Imputer>` can be trained within the cross validation loop,
+  avoiding potentially skewed results.
+
+- Ridge regression can now deal with sample weights in feature space
+  (only sample space until then). By :user:`Michael Eickenberg <eickenberg>`.
+  Both solutions are provided by the Cholesky solver.
+
+- Several classification and regression metrics now support weighted
+  samples with the new ``sample_weight`` argument:
+  :func:`metrics.accuracy_score`,
+  :func:`metrics.zero_one_loss`,
+  :func:`metrics.precision_score`,
+  :func:`metrics.average_precision_score`,
+  :func:`metrics.f1_score`,
+  :func:`metrics.fbeta_score`,
+  :func:`metrics.recall_score`,
+  :func:`metrics.roc_auc_score`,
+  :func:`metrics.explained_variance_score`,
+  :func:`metrics.mean_squared_error`,
+  :func:`metrics.mean_absolute_error`,
+  :func:`metrics.r2_score`.
+  By `Noel Dawe`_.
+
+- Speed up of the sample generator
+  :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_.
+
+Documentation improvements
+...........................
+
+- The :ref:`Working With Text Data <text_data_tutorial>` tutorial
+  has now been worked in to the main documentation's tutorial section.
+  Includes exercises and skeletons for tutorial presentation.
+  Original tutorial created by several authors including
+  `Olivier Grisel`_, Lars Buitinck and many others.
+  Tutorial integration into the scikit-learn documentation
+  by `Jaques Grobler`_
+
+- Added :ref:`Computational Performance <computational_performance>`
+  documentation. Discussion and examples of prediction latency / throughput
+  and different factors that have influence over speed. Additional tips for
+  building faster models and choosing a relevant compromise between speed
+  and predictive power.
+  By :user:`Eustache Diemert <oddskool>`.
+
+Bug fixes
+.........
+
+- Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` :
+  ``partial_fit`` was not working properly.
+
+- Fixed bug in :class:`linear_model.stochastic_gradient` :
+  ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` .
+
+- Fixed bug in :class:`multiclass.OneVsOneClassifier` with string
+  labels
+
+- Fixed a bug in :class:`LassoCV <linear_model.LassoCV>` and
+  :class:`ElasticNetCV <linear_model.ElasticNetCV>`: they would not
+  pre-compute the Gram matrix with ``precompute=True`` or
+  ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_.
+
+- Fixed incorrect estimation of the degrees of freedom in
+  :func:`feature_selection.f_regression` when variates are not centered.
+  By :user:`Virgile Fritsch <VirgileFritsch>`.
+
+- Fixed a race condition in parallel processing with
+  ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``).
+  By `Olivier Grisel`_.
+
+- Raise error in :class:`cluster.FeatureAgglomeration` and
+  :class:`cluster.WardAgglomeration` when no samples are given,
+  rather than returning meaningless clustering.
+
+- Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with
+  ``loss='huber'``: ``gamma`` might have not been initialized.
+
+- Fixed feature importances as computed with a forest of randomized trees
+  when fit with ``sample_weight != None`` and/or with ``bootstrap=True``.
+  By `Gilles Louppe`_.
+
+API changes summary
+-------------------
+
+- :mod:`sklearn.hmm` is deprecated. Its removal is planned
+  for the 0.17 release.
+
+- Use of :class:`covariance.EllipticEnvelop` has now been removed after
+  deprecation.
+  Please use :class:`covariance.EllipticEnvelope` instead.
+
+- :class:`cluster.Ward` is deprecated. Use
+  :class:`cluster.AgglomerativeClustering` instead.
+
+- :class:`cluster.WardClustering` is deprecated. Use
+- :class:`cluster.AgglomerativeClustering` instead.
+
+- :class:`cross_validation.Bootstrap` is deprecated.
+  :class:`cross_validation.KFold` or
+  :class:`cross_validation.ShuffleSplit` are recommended instead.
+
+- Direct support for the sequence of sequences (or list of lists) multilabel
+  format is deprecated. To convert to and from the supported binary
+  indicator matrix format, use
+  :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
+  By `Joel Nothman`_.
+
+- Add score method to :class:`PCA <decomposition.PCA>` following the model of
+  probabilistic PCA and deprecate
+  :class:`ProbabilisticPCA <decomposition.ProbabilisticPCA>` model whose
+  score implementation is not correct. The computation now also exploits the
+  matrix inversion lemma for faster computation. By `Alexandre Gramfort`_.
+
+- The score method of :class:`FactorAnalysis <decomposition.FactorAnalysis>`
+  now returns the average log-likelihood of the samples. Use score_samples
+  to get log-likelihood of each sample. By `Alexandre Gramfort`_.
+
+- Generating boolean masks (the setting ``indices=False``)
+  from cross-validation generators is deprecated.
+  Support for masks will be removed in 0.17.
+  The generators have produced arrays of indices by default since 0.10.
+  By `Joel Nothman`_.
+
+- 1-d arrays containing strings with ``dtype=object`` (as used in Pandas)
+  are now considered valid classification targets. This fixes a regression
+  from version 0.13 in some classifiers. By `Joel Nothman`_.
+
+- Fix wrong ``explained_variance_ratio_`` attribute in
+  :class:`RandomizedPCA <decomposition.RandomizedPCA>`.
+  By `Alexandre Gramfort`_.
+
+- Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in
+  :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`.
+  This changes the shape of ``alphas_`` from ``(n_alphas,)`` to
+  ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like
+  object of length greater than one.
+  By `Manoj Kumar`_.
+
+- Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`
+  when fitting intercept and input data is sparse. The automatic grid
+  of alphas was not computed correctly and the scaling with normalize
+  was wrong. By `Manoj Kumar`_.
+
+- Fix wrong maximal number of features drawn (``max_features``) at each split
+  for decision trees, random forests and gradient tree boosting.
+  Previously, the count for the number of drawn features started only after
+  one non constant features in the split. This bug fix will affect
+  computational and generalization performance of those algorithms in the
+  presence of constant features. To get back previous generalization
+  performance, you should modify the value of ``max_features``.
+  By `Arnaud Joly`_.
+
+- Fix wrong maximal number of features drawn (``max_features``) at each split
+  for :class:`ensemble.ExtraTreesClassifier` and
+  :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant
+  features in the split was counted as drawn. Now constant features are
+  counted as drawn. Furthermore at least one feature must be non constant
+  in order to make a valid split. This bug fix will affect
+  computational and generalization performance of extra trees in the
+  presence of constant features. To get back previous generalization
+  performance, you should modify the value of ``max_features``.
+  By `Arnaud Joly`_.
+
+- Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``.
+  Previously it was broken for input of non-integer ``dtype`` and the
+  weighted array that was returned was wrong. By `Manoj Kumar`_.
+
+- Fix :class:`cross_validation.Bootstrap` to return ``ValueError``
+  when ``n_train + n_test > n``. By :user:`Ronald Phlypo <rphlypo>`.
+
+
+People
+------
+
+List of contributors for release 0.15 by number of commits.
+
+* 312	Olivier Grisel
+* 275	Lars Buitinck
+* 221	Gael Varoquaux
+* 148	Arnaud Joly
+* 134	Johannes Schönberger
+* 119	Gilles Louppe
+* 113	Joel Nothman
+* 111	Alexandre Gramfort
+*  95	Jaques Grobler
+*  89	Denis Engemann
+*  83	Peter Prettenhofer
+*  83	Alexander Fabisch
+*  62	Mathieu Blondel
+*  60	Eustache Diemert
+*  60	Nelle Varoquaux
+*  49	Michael Bommarito
+*  45	Manoj-Kumar-S
+*  28	Kyle Kastner
+*  26	Andreas Mueller
+*  22	Noel Dawe
+*  21	Maheshakya Wijewardena
+*  21	Brooke Osborn
+*  21	Hamzeh Alsalhi
+*  21	Jake VanderPlas
+*  21	Philippe Gervais
+*  19	Bala Subrahmanyam Varanasi
+*  12	Ronald Phlypo
+*  10	Mikhail Korobov
+*   8	Thomas Unterthiner
+*   8	Jeffrey Blackburne
+*   8	eltermann
+*   8	bwignall
+*   7	Ankit Agrawal
+*   7	CJ Carey
+*   6	Daniel Nouri
+*   6	Chen Liu
+*   6	Michael Eickenberg
+*   6	ugurthemaster
+*   5	Aaron Schumacher
+*   5	Baptiste Lagarde
+*   5	Rajat Khanduja
+*   5	Robert McGibbon
+*   5	Sergio Pascual
+*   4	Alexis Metaireau
+*   4	Ignacio Rossi
+*   4	Virgile Fritsch
+*   4	Sebastian Säger
+*   4	Ilambharathi Kanniah
+*   4	sdenton4
+*   4	Robert Layton
+*   4	Alyssa
+*   4	Amos Waterland
+*   3	Andrew Tulloch
+*   3	murad
+*   3	Steven Maude
+*   3	Karol Pysniak
+*   3	Jacques Kvam
+*   3	cgohlke
+*   3	cjlin
+*   3	Michael Becker
+*   3	hamzeh
+*   3	Eric Jacobsen
+*   3	john collins
+*   3	kaushik94
+*   3	Erwin Marsi
+*   2	csytracy
+*   2	LK
+*   2	Vlad Niculae
+*   2	Laurent Direr
+*   2	Erik Shilts
+*   2	Raul Garreta
+*   2	Yoshiki Vázquez Baeza
+*   2	Yung Siang Liau
+*   2	abhishek thakur
+*   2	James Yu
+*   2	Rohit Sivaprasad
+*   2	Roland Szabo
+*   2	amormachine
+*   2	Alexis Mignon
+*   2	Oscar Carlsson
+*   2	Nantas Nardelli
+*   2	jess010
+*   2	kowalski87
+*   2	Andrew Clegg
+*   2	Federico Vaggi
+*   2	Simon Frid
+*   2	Félix-Antoine Fortin
+*   1	Ralf Gommers
+*   1	t-aft
+*   1	Ronan Amicel
+*   1	Rupesh Kumar Srivastava
+*   1	Ryan Wang
+*   1	Samuel Charron
+*   1	Samuel St-Jean
+*   1	Fabian Pedregosa
+*   1	Skipper Seabold
+*   1	Stefan Walk
+*   1	Stefan van der Walt
+*   1	Stephan Hoyer
+*   1	Allen Riddell
+*   1	Valentin Haenel
+*   1	Vijay Ramesh
+*   1	Will Myers
+*   1	Yaroslav Halchenko
+*   1	Yoni Ben-Meshulam
+*   1	Yury V. Zaytsev
+*   1	adrinjalali
+*   1	ai8rahim
+*   1	alemagnani
+*   1	alex
+*   1	benjamin wilson
+*   1	chalmerlowe
+*   1	dzikie drożdże
+*   1	jamestwebber
+*   1	matrixorz
+*   1	popo
+*   1	samuela
+*   1	François Boulogne
+*   1	Alexander Measure
+*   1	Ethan White
+*   1	Guilherme Trein
+*   1	Hendrik Heuer
+*   1	IvicaJovic
+*   1	Jan Hendrik Metzen
+*   1	Jean Michel Rouly
+*   1	Eduardo Ariño de la Rubia
+*   1	Jelle Zijlstra
+*   1	Eddy L O Jansson
+*   1	Denis
+*   1	John
+*   1	John Schmidt
+*   1	Jorge Cañardo Alastuey
+*   1	Joseph Perla
+*   1	Joshua Vredevoogd
+*   1	José Ricardo
+*   1	Julien Miotte
+*   1	Kemal Eren
+*   1	Kenta Sato
+*   1	David Cournapeau
+*   1	Kyle Kelley
+*   1	Daniele Medri
+*   1	Laurent Luce
+*   1	Laurent Pierron
+*   1	Luis Pedro Coelho
+*   1	DanielWeitzenfeld
+*   1	Craig Thompson
+*   1	Chyi-Kwei Yau
+*   1	Matthew Brett
+*   1	Matthias Feurer
+*   1	Max Linke
+*   1	Chris Filo Gorgolewski
+*   1	Charles Earl
+*   1	Michael Hanke
+*   1	Michele Orrù
+*   1	Bryan Lunt
+*   1	Brian Kearns
+*   1	Paul Butler
+*   1	Paweł Mandera
+*   1	Peter
+*   1	Andrew Ash
+*   1	Pietro Zambelli
+*   1	staubda
+
+
+.. _changes_0_14:
+
+Version 0.14
+===============
+
+**August 7, 2013**
+
+Changelog
+---------
+
+- Missing values with sparse and dense matrices can be imputed with the
+  transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_.
+
+- The core implementation of decisions trees has been rewritten from
+  scratch, allowing for faster tree induction and lower memory
+  consumption in all tree-based estimators. By `Gilles Louppe`_.
+
+- Added :class:`ensemble.AdaBoostClassifier` and
+  :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_  and
+  `Gilles Louppe`_. See the :ref:`AdaBoost <adaboost>` section of the user
+  guide for details and examples.
+
+- Added :class:`grid_search.RandomizedSearchCV` and
+  :class:`grid_search.ParameterSampler` for randomized hyperparameter
+  optimization. By `Andreas Müller`_.
+
+- Added :ref:`biclustering <biclustering>` algorithms
+  (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and
+  :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data
+  generation methods (:func:`sklearn.datasets.make_biclusters` and
+  :func:`sklearn.datasets.make_checkerboard`), and scoring metrics
+  (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_.
+
+- Added :ref:`Restricted Boltzmann Machines<rbm>`
+  (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_.
+
+- Python 3 support by :user:`Justin Vincent <justinvf>`, `Lars Buitinck`_,
+  :user:`Subhodeep Moitra <smoitra87>` and `Olivier Grisel`_. All tests now pass under
+  Python 3.3.
+
+- Ability to pass one penalty (alpha value) per target in
+  :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_.
+
+- Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization
+  issue (minor practical significance).
+  By :user:`Norbert Crombach <norbert>` and `Mathieu Blondel`_ .
+
+- Added an interactive version of `Andreas Müller`_'s
+  `Machine Learning Cheat Sheet (for scikit-learn)
+  <http://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html>`_
+  to the documentation. See :ref:`Choosing the right estimator <ml_map>`.
+  By `Jaques Grobler`_.
+
+- :class:`grid_search.GridSearchCV` and
+  :func:`cross_validation.cross_val_score` now support the use of advanced
+  scoring function such as area under the ROC curve and f-beta scores.
+  See :ref:`scoring_parameter` for details. By `Andreas Müller`_
+  and `Lars Buitinck`_.
+  Passing a function from :mod:`sklearn.metrics` as ``score_func`` is
+  deprecated.
+
+- Multi-label classification output is now supported by
+  :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`,
+  :func:`metrics.f1_score`, :func:`metrics.fbeta_score`,
+  :func:`metrics.classification_report`,
+  :func:`metrics.precision_score` and :func:`metrics.recall_score`
+  by `Arnaud Joly`_.
+
+- Two new metrics :func:`metrics.hamming_loss` and
+  :func:`metrics.jaccard_similarity_score`
+  are added with multi-label support by `Arnaud Joly`_.
+
+- Speed and memory usage improvements in
+  :class:`feature_extraction.text.CountVectorizer` and
+  :class:`feature_extraction.text.TfidfVectorizer`,
+  by Jochen Wersdörfer and Roman Sinayev.
+
+- The ``min_df`` parameter in
+  :class:`feature_extraction.text.CountVectorizer` and
+  :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2,
+  has been reset to 1 to avoid unpleasant surprises (empty vocabularies)
+  for novice users who try it out on tiny document collections.
+  A value of at least 2 is still recommended for practical use.
+
+- :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and
+  :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that
+  converts their ``coef_`` into a sparse matrix, meaning stored models
+  trained using these estimators can be made much more compact.
+
+- :class:`linear_model.SGDClassifier` now produces multiclass probability
+  estimates when trained under log loss or modified Huber loss.
+
+- Hyperlinks to documentation in example code on the website by
+  :user:`Martin Luessi <mluessi>`.
+
+- Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling
+  of the features for non-default ``feature_range`` settings. By `Andreas
+  Müller`_.
+
+- ``max_features`` in :class:`tree.DecisionTreeClassifier`,
+  :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
+  now supports percentage values. By `Gilles Louppe`_.
+
+- Performance improvements in :class:`isotonic.IsotonicRegression` by
+  `Nelle Varoquaux`_.
+
+- :func:`metrics.accuracy_score` has an option normalize to return
+  the fraction or the number of correctly classified sample
+  by `Arnaud Joly`_.
+
+- Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy
+  loss. By Jochen Wersdörfer and `Lars Buitinck`_.
+
+- A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output
+  incorrect probabilities has been fixed.
+
+- Feature selectors now share a mixin providing consistent ``transform``,
+  ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_.
+
+- A fitted :class:`grid_search.GridSearchCV` or
+  :class:`grid_search.RandomizedSearchCV` can now generally be pickled.
+  By `Joel Nothman`_.
+
+- Refactored and vectorized implementation of :func:`metrics.roc_curve`
+  and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_.
+
+- The new estimator :class:`sklearn.decomposition.TruncatedSVD`
+  performs dimensionality reduction using SVD on sparse matrices,
+  and can be used for latent semantic analysis (LSA).
+  By `Lars Buitinck`_.
+
+- Added self-contained example of out-of-core learning on text data
+  :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`.
+  By :user:`Eustache Diemert <oddskool>`.
+
+- The default number of components for
+  :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented
+  to be ``n_features``. This was the default behavior, so programs using it
+  will continue to work as they did.
+
+- :class:`sklearn.cluster.KMeans` now fits several orders of magnitude
+  faster on sparse data (the speedup depends on the sparsity). By
+  `Lars Buitinck`_.
+
+- Reduce memory footprint of FastICA by `Denis Engemann`_ and
+  `Alexandre Gramfort`_.
+
+- Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses
+  a column format and prints progress in decreasing frequency.
+  It also shows the remaining time. By `Peter Prettenhofer`_.
+
+- :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement
+  :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_`
+  rather than the OOB score for model selection. An example that shows
+  how to use OOB estimates to select the number of trees was added.
+  By `Peter Prettenhofer`_.
+
+- Most metrics now support string labels for multiclass classification
+  by `Arnaud Joly`_ and `Lars Buitinck`_.
+
+- New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_
+  and `Vlad Niculae`_.
+
+- Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the
+  'alphas' parameter now works as expected when given a list of
+  values. By Philippe Gervais.
+
+- Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV`
+  that prevented all folds provided by a CV object to be used (only
+  the first 3 were used). When providing a CV object, execution
+  time may thus increase significantly compared to the previous
+  version (bug results are correct now). By Philippe Gervais.
+
+- :class:`cross_validation.cross_val_score` and the :mod:`grid_search`
+  module is now tested with multi-output data by `Arnaud Joly`_.
+
+- :func:`datasets.make_multilabel_classification` can now return
+  the output in label indicator multilabel format  by `Arnaud Joly`_.
+
+- K-nearest neighbors, :class:`neighbors.KNeighborsRegressor`
+  and :class:`neighbors.RadiusNeighborsRegressor`,
+  and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and
+  :class:`neighbors.RadiusNeighborsClassifier` support multioutput data
+  by `Arnaud Joly`_.
+
+- Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`,
+  :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be
+  controlled.  This is useful to ensure consistency in the probability
+  estimates for the classifiers trained with ``probability=True``. By
+  `Vlad Niculae`_.
+
+- Out-of-core learning support for discrete naive Bayes classifiers
+  :class:`sklearn.naive_bayes.MultinomialNB` and
+  :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit``
+  method by `Olivier Grisel`_.
+
+- New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_,
+  Vincent Michel and `Andreas Müller`_.
+
+- Improved documentation on :ref:`multi-class, multi-label and multi-output
+  classification <multiclass>` by `Yannick Schwartz`_ and `Arnaud Joly`_.
+
+- Better input and error handling in the :mod:`metrics` module by
+  `Arnaud Joly`_ and `Joel Nothman`_.
+
+- Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov <kmike>`
+
+- Significant speed improvements for :class:`sklearn.cluster.DBSCAN`
+  by `cleverless <https://github.com/cleverless>`_
+
+
+API changes summary
+-------------------
+
+- The :func:`auc_score` was renamed :func:`roc_auc_score`.
+
+- Testing scikit-learn with ``sklearn.test()`` is deprecated. Use
+  ``nosetests sklearn`` from the command line.
+
+- Feature importances in :class:`tree.DecisionTreeClassifier`,
+  :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
+  are now computed on the fly when accessing  the ``feature_importances_``
+  attribute. Setting ``compute_importances=True`` is no longer required.
+  By `Gilles Louppe`_.
+
+- :class:`linear_model.lasso_path` and
+  :class:`linear_model.enet_path` can return its results in the same
+  format as that of :class:`linear_model.lars_path`. This is done by
+  setting the ``return_models`` parameter to ``False``. By
+  `Jaques Grobler`_ and `Alexandre Gramfort`_
+
+- :class:`grid_search.IterGrid` was renamed to
+  :class:`grid_search.ParameterGrid`.
+
+- Fixed bug in :class:`KFold` causing imperfect class balance in some
+  cases. By `Alexandre Gramfort`_ and Tadej Janež.
+
+- :class:`sklearn.neighbors.BallTree` has been refactored, and a
+  :class:`sklearn.neighbors.KDTree` has been
+  added which shares the same interface.  The Ball Tree now works with
+  a wide variety of distance metrics.  Both classes have many new
+  methods, including single-tree and dual-tree queries, breadth-first
+  and depth-first searching, and more advanced queries such as
+  kernel density estimation and 2-point correlation functions.
+  By `Jake Vanderplas`_
+
+- Support for scipy.spatial.cKDTree within neighbors queries has been
+  removed, and the functionality replaced with the new :class:`KDTree`
+  class.
+
+- :class:`sklearn.neighbors.KernelDensity` has been added, which performs
+  efficient kernel density estimation with a variety of kernels.
+
+- :class:`sklearn.decomposition.KernelPCA` now always returns output with
+  ``n_components`` components, unless the new parameter ``remove_zero_eig``
+  is set to ``True``. This new behavior is consistent with the way
+  kernel PCA was always documented; previously, the removal of components
+  with zero eigenvalues was tacitly performed on all data.
+
+- ``gcv_mode="auto"`` no longer tries to perform SVD on a densified
+  sparse matrix in :class:`sklearn.linear_model.RidgeCV`.
+
+- Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA`
+  is now deprecated in favor of the new ``TruncatedSVD``.
+
+- :class:`cross_validation.KFold` and
+  :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2`
+  otherwise a ``ValueError`` is raised. By `Olivier Grisel`_.
+
+- :func:`datasets.load_files`'s ``charset`` and ``charset_errors``
+  parameters were renamed ``encoding`` and ``decode_errors``.
+
+- Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor`
+  and :class:`sklearn.ensemble.GradientBoostingClassifier`
+  is deprecated and has been replaced by ``oob_improvement_`` .
+
+- Attributes in OrthogonalMatchingPursuit have been deprecated
+  (copy_X, Gram, ...) and precompute_gram renamed precompute
+  for consistency. See #2224.
+
+- :class:`sklearn.preprocessing.StandardScaler` now converts integer input
+  to float, and raises a warning. Previously it rounded for dense integer
+  input.
+
+- :class:`sklearn.multiclass.OneVsRestClassifier` now has a
+  ``decision_function`` method. This will return the distance of each
+  sample from the decision boundary for each class, as long as the
+  underlying estimators implement the ``decision_function`` method.
+  By `Kyle Kastner`_.
+
+- Better input validation, warning on unexpected shapes for y.
+
+People
+------
+List of contributors for release 0.14 by number of commits.
+
+ * 277  Gilles Louppe
+ * 245  Lars Buitinck
+ * 187  Andreas Mueller
+ * 124  Arnaud Joly
+ * 112  Jaques Grobler
+ * 109  Gael Varoquaux
+ * 107  Olivier Grisel
+ * 102  Noel Dawe
+ *  99  Kemal Eren
+ *  79  Joel Nothman
+ *  75  Jake VanderPlas
+ *  73  Nelle Varoquaux
+ *  71  Vlad Niculae
+ *  65  Peter Prettenhofer
+ *  64  Alexandre Gramfort
+ *  54  Mathieu Blondel
+ *  38  Nicolas Trésegnie
+ *  35  eustache
+ *  27  Denis Engemann
+ *  25  Yann N. Dauphin
+ *  19  Justin Vincent
+ *  17  Robert Layton
+ *  15  Doug Coleman
+ *  14  Michael Eickenberg
+ *  13  Robert Marchman
+ *  11  Fabian Pedregosa
+ *  11  Philippe Gervais
+ *  10  Jim Holmström
+ *  10  Tadej Janež
+ *  10  syhw
+ *   9  Mikhail Korobov
+ *   9  Steven De Gryze
+ *   8  sergeyf
+ *   7  Ben Root
+ *   7  Hrishikesh Huilgolkar
+ *   6  Kyle Kastner
+ *   6  Martin Luessi
+ *   6  Rob Speer
+ *   5  Federico Vaggi
+ *   5  Raul Garreta
+ *   5  Rob Zinkov
+ *   4  Ken Geis
+ *   3  A. Flaxman
+ *   3  Denton Cockburn
+ *   3  Dougal Sutherland
+ *   3  Ian Ozsvald
+ *   3  Johannes Schönberger
+ *   3  Robert McGibbon
+ *   3  Roman Sinayev
+ *   3  Szabo Roland
+ *   2  Diego Molla
+ *   2  Imran Haque
+ *   2  Jochen Wersdörfer
+ *   2  Sergey Karayev
+ *   2  Yannick Schwartz
+ *   2  jamestwebber
+ *   1  Abhijeet Kolhe
+ *   1  Alexander Fabisch
+ *   1  Bastiaan van den Berg
+ *   1  Benjamin Peterson
+ *   1  Daniel Velkov
+ *   1  Fazlul Shahriar
+ *   1  Felix Brockherde
+ *   1  Félix-Antoine Fortin
+ *   1  Harikrishnan S
+ *   1  Jack Hale
+ *   1  JakeMick
+ *   1  James McDermott
+ *   1  John Benediktsson
+ *   1  John Zwinck
+ *   1  Joshua Vredevoogd
+ *   1  Justin Pati
+ *   1  Kevin Hughes
+ *   1  Kyle Kelley
+ *   1  Matthias Ekman
+ *   1  Miroslav Shubernetskiy
+ *   1  Naoki Orii
+ *   1  Norbert Crombach
+ *   1  Rafael Cunha de Almeida
+ *   1  Rolando Espinoza La fuente
+ *   1  Seamus Abshere
+ *   1  Sergey Feldman
+ *   1  Sergio Medina
+ *   1  Stefano Lattarini
+ *   1  Steve Koch
+ *   1  Sturla Molden
+ *   1  Thomas Jarosch
+ *   1  Yaroslav Halchenko
+
+.. _changes_0_13_1:
+
+Version 0.13.1
+==============
+
+**February 23, 2013**
+
+The 0.13.1 release only fixes some bugs and does not add any new functionality.
+
+Changelog
+---------
+
+- Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being
+  interpreted as a test by `Yaroslav Halchenko`_.
+
+- Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans`
+  by `Gael Varoquaux`_.
+
+- Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_.
+
+- Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_.
+
+- Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_.
+
+- Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_.
+
+- Other small improvements to tests and documentation.
+
+People
+------
+List of contributors for release 0.13.1 by number of commits.
+ * 16  `Lars Buitinck`_
+ * 12  `Andreas Müller`_
+ *  8  `Gael Varoquaux`_
+ *  5  Robert Marchman
+ *  3  `Peter Prettenhofer`_
+ *  2  Hrishikesh Huilgolkar
+ *  1  Bastiaan van den Berg
+ *  1  Diego Molla
+ *  1  `Gilles Louppe`_
+ *  1  `Mathieu Blondel`_
+ *  1  `Nelle Varoquaux`_
+ *  1  Rafael Cunha de Almeida
+ *  1  Rolando Espinoza La fuente
+ *  1  `Vlad Niculae`_
+ *  1  `Yaroslav Halchenko`_
+
+
+.. _changes_0_13:
+
+Version 0.13
+============
+
+**January 21, 2013**
+
+New Estimator Classes
+---------------------
+
+- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two
+  data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check
+  your estimators. See :ref:`dummy_estimators` in the user guide.
+  Multioutput support added by `Arnaud Joly`_.
+
+- :class:`decomposition.FactorAnalysis`, a transformer implementing the
+  classical factor analysis, by `Christian Osendorfer`_ and `Alexandre
+  Gramfort`_. See :ref:`FA` in the user guide.
+
+- :class:`feature_extraction.FeatureHasher`, a transformer implementing the
+  "hashing trick" for fast, low-memory feature extraction from string fields
+  by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer`
+  for text documents by `Olivier Grisel`_  See :ref:`feature_hashing` and
+  :ref:`hashing_vectorizer` for the documentation and sample usage.
+
+- :class:`pipeline.FeatureUnion`, a transformer that concatenates
+  results of several other transformers by `Andreas Müller`_. See
+  :ref:`feature_union` in the user guide.
+
+- :class:`random_projection.GaussianRandomProjection`,
+  :class:`random_projection.SparseRandomProjection` and the function
+  :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are
+  transformers implementing Gaussian and sparse random projection matrix
+  by `Olivier Grisel`_ and `Arnaud Joly`_.
+  See :ref:`random_projection` in the user guide.
+
+- :class:`kernel_approximation.Nystroem`, a transformer for approximating
+  arbitrary kernels by `Andreas Müller`_. See
+  :ref:`nystroem_kernel_approx` in the user guide.
+
+- :class:`preprocessing.OneHotEncoder`, a transformer that computes binary
+  encodings of categorical features by `Andreas Müller`_. See
+  :ref:`preprocessing_categorical_features` in the user guide.
+
+- :class:`linear_model.PassiveAggressiveClassifier` and
+  :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing
+  an efficient stochastic optimization for linear models by `Rob Zinkov`_ and
+  `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user
+  guide.
+
+- :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional
+  sparse representations using ensembles of totally random trees by  `Andreas Müller`_.
+  See :ref:`random_trees_embedding` in the user guide.
+
+- :class:`manifold.SpectralEmbedding` and function
+  :func:`manifold.spectral_embedding`, implementing the "laplacian
+  eigenmaps" transformation for non-linear dimensionality reduction by Wei
+  Li. See :ref:`spectral_embedding` in the user guide.
+
+- :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_
+  and `Nelle Varoquaux`_,
+
+
+Changelog
+---------
+
+- :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has
+  option for normalized output that reports the fraction of
+  misclassifications, rather than the raw number of misclassifications. By
+  Kyle Beauchamp.
+
+- :class:`tree.DecisionTreeClassifier` and all derived ensemble models now
+  support sample weighting, by `Noel Dawe`_  and `Gilles Louppe`_.
+
+- Speedup improvement when using bootstrap samples in forests of randomized
+  trees, by `Peter Prettenhofer`_  and `Gilles Louppe`_.
+
+- Partial dependence plots for :ref:`gradient_boosting` in
+  :func:`ensemble.partial_dependence.partial_dependence` by `Peter
+  Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an
+  example.
+
+- The table of contents on the website has now been made expandable by
+  `Jaques Grobler`_.
+
+- :class:`feature_selection.SelectPercentile` now breaks ties
+  deterministically instead of returning all equally ranked features.
+
+- :class:`feature_selection.SelectKBest` and
+  :class:`feature_selection.SelectPercentile` are more numerically stable
+  since they use scores, rather than p-values, to rank results. This means
+  that they might sometimes select different features than they did
+  previously.
+
+- Ridge regression and ridge classification fitting with ``sparse_cg`` solver
+  no longer has quadratic memory complexity, by `Lars Buitinck`_ and
+  `Fabian Pedregosa`_.
+
+- Ridge regression and ridge classification now support a new fast solver
+  called ``lsqr``, by `Mathieu Blondel`_.
+
+- Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee.
+
+- Added support for reading/writing svmlight files with pairwise
+  preference attribute (qid in svmlight file format) in
+  :func:`datasets.dump_svmlight_file` and
+  :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_.
+
+- Faster and more robust :func:`metrics.confusion_matrix` and
+  :ref:`clustering_evaluation` by Wei Li.
+
+- :func:`cross_validation.cross_val_score` now works with precomputed kernels
+  and affinity matrices, by `Andreas Müller`_.
+
+- LARS algorithm made more numerically stable with heuristics to drop
+  regressors too correlated as well as to stop the path when
+  numerical noise becomes predominant, by `Gael Varoquaux`_.
+
+- Faster implementation of :func:`metrics.precision_recall_curve` by
+  Conrad Lee.
+
+- New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used
+  in computer vision applications.
+
+- Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by
+  Shaun Jackman.
+
+- Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`,
+  by Andrew Winterman.
+
+- Improve consistency in gradient boosting: estimators
+  :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` use the estimator
+  :class:`tree.DecisionTreeRegressor` instead of the
+  :class:`tree._tree.Tree` data structure by `Arnaud Joly`_.
+
+- Fixed a floating point exception in the :ref:`decision trees <tree>`
+  module, by Seberg.
+
+- Fix :func:`metrics.roc_curve` fails when y_true has only one class
+  by Wei Li.
+
+- Add the :func:`metrics.mean_absolute_error` function which computes the
+  mean absolute error. The :func:`metrics.mean_squared_error`,
+  :func:`metrics.mean_absolute_error` and
+  :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_.
+
+- Fixed ``class_weight`` support in :class:`svm.LinearSVC` and
+  :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning
+  of ``class_weight`` was reversed as erroneously higher weight meant less
+  positives of a given class in earlier releases.
+
+- Improve narrative documentation and consistency in
+  :mod:`sklearn.metrics` for regression and classification metrics
+  by `Arnaud Joly`_.
+
+- Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with
+  unsorted indices by Xinfan Meng and `Andreas Müller`_.
+
+- :class:`MiniBatchKMeans`: Add random reassignment of cluster centers
+  with little observations attached to them, by `Gael Varoquaux`_.
+
+
+API changes summary
+-------------------
+- Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency.
+  This applies to :class:`decomposition.DictionaryLearning`,
+  :class:`decomposition.MiniBatchDictionaryLearning`,
+  :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`.
+
+- Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency.
+  This applies to :class:`semi_supervised.LabelPropagation` and
+  :class:`semi_supervised.label_propagation.LabelSpreading`.
+
+- Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for
+  consistency in :class:`ensemble.BaseGradientBoosting` and
+  :class:`ensemble.GradientBoostingRegressor`.
+
+- The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support
+  was already integrated into the "regular" linear models.
+
+- :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the
+  accumulated error, was removed. Use ``mean_squared_error`` instead.
+
+- Passing ``class_weight`` parameters to ``fit`` methods is no longer
+  supported. Pass them to estimator constructors instead.
+
+- GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``,
+  ``predict`` or ``sample`` methods instead.
+
+- The ``solver`` fit option in Ridge regression and classification is now
+  deprecated and will be removed in v0.14. Use the constructor option
+  instead.
+
+- :class:`feature_extraction.text.DictVectorizer` now returns sparse
+  matrices in the CSR format, instead of COO.
+
+- Renamed ``k`` in :class:`cross_validation.KFold` and
+  :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed
+  ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``.
+
+- Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency.
+  This applies to :class:`cross_validation.ShuffleSplit`,
+  :class:`cross_validation.StratifiedShuffleSplit`,
+  :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`.
+
+- Replaced ``rho`` in :class:`linear_model.ElasticNet` and
+  :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter
+  had different meanings; ``l1_ratio`` was introduced to avoid confusion.
+  It has the same meaning as previously ``rho`` in
+  :class:`linear_model.ElasticNet` and ``(1-rho)`` in
+  :class:`linear_model.SGDClassifier`.
+
+- :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now
+  store a list of paths in the case of multiple targets, rather than
+  an array of paths.
+
+- The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_``
+  to adhere more strictly with the API.
+
+- :func:`cluster.spectral_embedding` was moved to
+  :func:`manifold.spectral_embedding`.
+
+- Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`,
+  :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode``
+  to ``eigen_solver``.
+
+- Renamed ``mode`` in :func:`manifold.spectral_embedding` and
+  :class:`cluster.SpectralClustering` to ``eigen_solver``.
+
+- ``classes_`` and ``n_classes_`` attributes of
+  :class:`tree.DecisionTreeClassifier` and all derived ensemble models are
+  now flat in case of single output problems and nested in case of
+  multi-output problems.
+
+- The ``estimators_`` attribute of
+  :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and
+  :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an
+  array of :class:'tree.DecisionTreeRegressor'.
+
+- Renamed ``chunk_size`` to ``batch_size`` in
+  :class:`decomposition.MiniBatchDictionaryLearning` and
+  :class:`decomposition.MiniBatchSparsePCA` for consistency.
+
+- :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_``
+  attribute and support arbitrary dtypes for labels ``y``.
+  Also, the dtype returned by ``predict`` now reflects the dtype of
+  ``y`` during ``fit`` (used to be ``np.float``).
+
+- Changed default test_size in :func:`cross_validation.train_test_split`
+  to None, added possibility to infer ``test_size`` from ``train_size`` in
+  :class:`cross_validation.ShuffleSplit` and
+  :class:`cross_validation.StratifiedShuffleSplit`.
+
+- Renamed function :func:`sklearn.metrics.zero_one` to
+  :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior
+  in :func:`sklearn.metrics.zero_one_loss` is different from
+  :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to
+  ``normalize=True``.
+
+- Renamed function :func:`metrics.zero_one_score` to
+  :func:`metrics.accuracy_score`.
+
+- :func:`datasets.make_circles` now has the same number of inner and outer points.
+
+- In the Naive Bayes classifiers, the ``class_prior`` parameter was moved
+  from ``fit`` to ``__init__``.
+
+People
+------
+List of contributors for release 0.13 by number of commits.
+
+ * 364  `Andreas Müller`_
+ * 143  `Arnaud Joly`_
+ * 137  `Peter Prettenhofer`_
+ * 131  `Gael Varoquaux`_
+ * 117  `Mathieu Blondel`_
+ * 108  `Lars Buitinck`_
+ * 106  Wei Li
+ * 101  `Olivier Grisel`_
+ *  65  `Vlad Niculae`_
+ *  54  `Gilles Louppe`_
+ *  40  `Jaques Grobler`_
+ *  38  `Alexandre Gramfort`_
+ *  30  `Rob Zinkov`_
+ *  19  Aymeric Masurelle
+ *  18  Andrew Winterman
+ *  17  `Fabian Pedregosa`_
+ *  17  Nelle Varoquaux
+ *  16  `Christian Osendorfer`_
+ *  14  `Daniel Nouri`_
+ *  13  :user:`Virgile Fritsch <VirgileFritsch>`
+ *  13  syhw
+ *  12  `Satrajit Ghosh`_
+ *  10  Corey Lynch
+ *  10  Kyle Beauchamp
+ *   9  Brian Cheung
+ *   9  Immanuel Bayer
+ *   9  mr.Shu
+ *   8  Conrad Lee
+ *   8  `James Bergstra`_
+ *   7  Tadej Janež
+ *   6  Brian Cajes
+ *   6  `Jake Vanderplas`_
+ *   6  Michael
+ *   6  Noel Dawe
+ *   6  Tiago Nunes
+ *   6  cow
+ *   5  Anze
+ *   5  Shiqiao Du
+ *   4  Christian Jauvin
+ *   4  Jacques Kvam
+ *   4  Richard T. Guy
+ *   4  `Robert Layton`_
+ *   3  Alexandre Abraham
+ *   3  Doug Coleman
+ *   3  Scott Dickerson
+ *   2  ApproximateIdentity
+ *   2  John Benediktsson
+ *   2  Mark Veronda
+ *   2  Matti Lyra
+ *   2  Mikhail Korobov
+ *   2  Xinfan Meng
+ *   1  Alejandro Weinstein
+ *   1  `Alexandre Passos`_
+ *   1  Christoph Deil
+ *   1  Eugene Nizhibitsky
+ *   1  Kenneth C. Arnold
+ *   1  Luis Pedro Coelho
+ *   1  Miroslav Batchkarov
+ *   1  Pavel
+ *   1  Sebastian Berg
+ *   1  Shaun Jackman
+ *   1  Subhodeep Moitra
+ *   1  bob
+ *   1  dengemann
+ *   1  emanuele
+ *   1  x006
+
+
+.. _changes_0_12.1:
+
+Version 0.12.1
+===============
+
+**October 8, 2012**
+
+The 0.12.1 release is a bug-fix release with no additional features, but is
+instead a set of bug fixes
+
+Changelog
+----------
+
+- Improved numerical stability in spectral embedding by `Gael
+  Varoquaux`_
+
+- Doctest under windows 64bit by `Gael Varoquaux`_
+
+- Documentation fixes for elastic net by `Andreas Müller`_ and
+  `Alexandre Gramfort`_
+
+- Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_
+
+- Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_
+
+- Fix parallel computing in MDS by `Gael Varoquaux`_
+
+- Fix Unicode support in count vectorizer by `Andreas Müller`_
+
+- Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch <VirgileFritsch>`
+
+- Fix clone of SGD objects by `Peter Prettenhofer`_
+
+- Stabilize GMM by :user:`Virgile Fritsch <VirgileFritsch>`
+
+People
+------
+
+ *  14  `Peter Prettenhofer`_
+ *  12  `Gael Varoquaux`_
+ *  10  `Andreas Müller`_
+ *   5  `Lars Buitinck`_
+ *   3  :user:`Virgile Fritsch <VirgileFritsch>`
+ *   1  `Alexandre Gramfort`_
+ *   1  `Gilles Louppe`_
+ *   1  `Mathieu Blondel`_
+
+.. _changes_0_12:
+
+Version 0.12
+============
+
+**September 4, 2012**
+
+Changelog
+---------
+
+- Various speed improvements of the :ref:`decision trees <tree>` module, by
+  `Gilles Louppe`_.
+
+- :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` now support feature subsampling
+  via the ``max_features`` argument, by `Peter Prettenhofer`_.
+
+- Added Huber and Quantile loss functions to
+  :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_.
+
+- :ref:`Decision trees <tree>` and :ref:`forests of randomized trees <forest>`
+  now support multi-output classification and regression problems, by
+  `Gilles Louppe`_.
+
+- Added :class:`preprocessing.LabelEncoder`, a simple utility class to
+  normalize labels or transform non-numerical labels, by `Mathieu Blondel`_.
+
+- Added the epsilon-insensitive loss and the ability to make probabilistic
+  predictions with the modified huber loss in :ref:`sgd`, by
+  `Mathieu Blondel`_.
+
+- Added :ref:`multidimensional_scaling`, by Nelle Varoquaux.
+
+- SVMlight file format loader now detects compressed (gzip/bzip2) files and
+  decompresses them on the fly, by `Lars Buitinck`_.
+
+- SVMlight file format serializer now preserves double precision floating
+  point values, by `Olivier Grisel`_.
+
+- A common testing framework for all estimators was added, by `Andreas Müller`_.
+
+- Understandable error messages for estimators that do not accept
+  sparse input by `Gael Varoquaux`_
+
+- Speedups in hierarchical clustering by `Gael Varoquaux`_. In
+  particular building the tree now supports early stopping. This is
+  useful when the number of clusters is not small compared to the
+  number of samples.
+
+- Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection,
+  by `Alexandre Gramfort`_.
+
+- Added :func:`metrics.auc_score` and
+  :func:`metrics.average_precision_score` convenience functions by `Andreas
+  Müller`_.
+
+- Improved sparse matrix support in the :ref:`feature_selection`
+  module by `Andreas Müller`_.
+
+- New word boundaries-aware character n-gram analyzer for the
+  :ref:`text_feature_extraction` module by :user:`@kernc <kernc>`.
+
+- Fixed bug in spectral clustering that led to single point clusters
+  by `Andreas Müller`_.
+
+- In :class:`feature_extraction.text.CountVectorizer`, added an option to
+  ignore infrequent words, ``min_df`` by  `Andreas Müller`_.
+
+- Add support for multiple targets in some linear models (ElasticNet, Lasso
+  and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and
+  `Alexandre Gramfort`_.
+
+- Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li.
+
+- Fixed feature importance computation in
+  :ref:`gradient_boosting`.
+
+API changes summary
+-------------------
+
+- The old ``scikits.learn`` package has disappeared; all code should import
+  from ``sklearn`` instead, which was introduced in 0.9.
+
+- In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned
+  with it's order reversed, in order to keep it consistent with the order
+  of the returned ``fpr`` and ``tpr``.
+
+- In :class:`hmm` objects, like :class:`hmm.GaussianHMM`,
+  :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the
+  object when initialising it and not through ``fit``. Now ``fit`` will
+  only accept the data as an input parameter.
+
+- For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously,
+  the default gamma value was only computed the first time ``fit`` was called
+  and then stored. It is now recalculated on every call to ``fit``.
+
+- All ``Base`` classes are now abstract meta classes so that they can not be
+  instantiated.
+
+- :func:`cluster.ward_tree` now also returns the parent array. This is
+  necessary for early-stopping in which case the tree is not
+  completely built.
+
+- In :class:`feature_extraction.text.CountVectorizer` the parameters
+  ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to
+  enable grid-searching both at once.
+
+- In :class:`feature_extraction.text.CountVectorizer`, words that appear
+  only in one document are now ignored by default. To reproduce
+  the previous behavior, set ``min_df=1``.
+
+- Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now
+  returns 2d array when fit on two classes.
+
+- Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function`
+  and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays
+  when fit on two classes.
+
+- Grid of alphas used for fitting :class:`linear_model.LassoCV` and
+  :class:`linear_model.ElasticNetCV` is now stored
+  in the attribute ``alphas_`` rather than overriding the init parameter
+  ``alphas``.
+
+- Linear models when alpha is estimated by cross-validation store
+  the estimated value in the ``alpha_`` attribute rather than just
+  ``alpha`` or ``best_alpha``.
+
+- :class:`ensemble.GradientBoostingClassifier` now supports
+  :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and
+  :meth:`ensemble.GradientBoostingClassifier.staged_predict`.
+
+- :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated.
+  The all classes in the :ref:`svm` module now automatically select the
+  sparse or dense representation base on the input.
+
+- All clustering algorithms now interpret the array ``X`` given to ``fit`` as
+  input data, in particular :class:`cluster.SpectralClustering` and
+  :class:`cluster.AffinityPropagation` which previously expected affinity matrices.
+
+- For clustering algorithms that take the desired number of clusters as a parameter,
+  this parameter is now called ``n_clusters``.
+
+
+People
+------
+ * 267  `Andreas Müller`_
+ *  94  `Gilles Louppe`_
+ *  89  `Gael Varoquaux`_
+ *  79  `Peter Prettenhofer`_
+ *  60  `Mathieu Blondel`_
+ *  57  `Alexandre Gramfort`_
+ *  52  `Vlad Niculae`_
+ *  45  `Lars Buitinck`_
+ *  44  Nelle Varoquaux
+ *  37  `Jaques Grobler`_
+ *  30  Alexis Mignon
+ *  30  Immanuel Bayer
+ *  27  `Olivier Grisel`_
+ *  16  Subhodeep Moitra
+ *  13  Yannick Schwartz
+ *  12  :user:`@kernc <kernc>`
+ *  11  :user:`Virgile Fritsch <VirgileFritsch>`
+ *   9  Daniel Duckworth
+ *   9  `Fabian Pedregosa`_
+ *   9  `Robert Layton`_
+ *   8  John Benediktsson
+ *   7  Marko Burjek
+ *   5  `Nicolas Pinto`_
+ *   4  Alexandre Abraham
+ *   4  `Jake Vanderplas`_
+ *   3  `Brian Holt`_
+ *   3  `Edouard Duchesnay`_
+ *   3  Florian Hoenig
+ *   3  flyingimmidev
+ *   2  Francois Savard
+ *   2  Hannes Schulz
+ *   2  Peter Welinder
+ *   2  `Yaroslav Halchenko`_
+ *   2  Wei Li
+ *   1  Alex Companioni
+ *   1  Brandyn A. White
+ *   1  Bussonnier Matthias
+ *   1  Charles-Pierre Astolfi
+ *   1  Dan O'Huiginn
+ *   1  David Cournapeau
+ *   1  Keith Goodman
+ *   1  Ludwig Schwardt
+ *   1  Olivier Hervieu
+ *   1  Sergio Medina
+ *   1  Shiqiao Du
+ *   1  Tim Sheerman-Chase
+ *   1  buguen
+
+
+
+.. _changes_0_11:
+
+Version 0.11
+============
+
+**May 7, 2012**
+
+Changelog
+---------
+
+Highlights
+.............
+
+- Gradient boosted regression trees (:ref:`gradient_boosting`)
+  for classification and regression by `Peter Prettenhofer`_
+  and `Scott White`_ .
+
+- Simple dict-based feature loader with support for categorical variables
+  (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_.
+
+- Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`)
+  and added macro and micro average options to
+  :func:`metrics.precision_score`, :func:`metrics.recall_score` and
+  :func:`metrics.f1_score` by `Satrajit Ghosh`_.
+
+- :ref:`out_of_bag` of generalization error for :ref:`ensemble`
+  by `Andreas Müller`_.
+
+- Randomized sparse linear models for feature
+  selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_
+
+- :ref:`label_propagation` for semi-supervised learning, by Clay
+  Woolam. **Note** the semi-supervised API is still work in progress,
+  and may change.
+
+- Added BIC/AIC model selection to classical :ref:`gmm` and unified
+  the API with the remainder of scikit-learn, by `Bertrand Thirion`_
+
+- Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is
+  a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits,
+  by Yannick Schwartz.
+
+- :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a
+  ``shrink_threshold`` parameter, which implements **shrunken centroid
+  classification**, by `Robert Layton`_.
+
+Other changes
+..............
+
+- Merged dense and sparse implementations of :ref:`sgd` module and
+  exposed utility extension types for sequential
+  datasets ``seq_dataset`` and weight vectors ``weight_vector``
+  by `Peter Prettenhofer`_.
+
+- Added ``partial_fit`` (support for online/minibatch learning) and
+  warm_start to the :ref:`sgd` module by `Mathieu Blondel`_.
+
+- Dense and sparse implementations of :ref:`svm` classes and
+  :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_.
+
+- Regressors can now be used as base estimator in the :ref:`multiclass`
+  module by `Mathieu Blondel`_.
+
+- Added n_jobs option to :func:`metrics.pairwise.pairwise_distances`
+  and :func:`metrics.pairwise.pairwise_kernels` for parallel computation,
+  by `Mathieu Blondel`_.
+
+- :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument
+  to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_.
+
+- Improved :ref:`cross_validation` and :ref:`grid_search` documentation
+  and introduced the new :func:`cross_validation.train_test_split`
+  helper function by `Olivier Grisel`_
+
+- :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for
+  consistency with ``decision_function``; for ``kernel==linear``,
+  ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_.
+
+- Performance improvements to efficient leave-one-out cross-validated
+  Ridge regression, esp. for the ``n_samples > n_features`` case, in
+  :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin.
+
+- Refactoring and simplification of the :ref:`text_feature_extraction`
+  API and fixed a bug that caused possible negative IDF,
+  by `Olivier Grisel`_.
+
+- Beam pruning option in :class:`_BaseHMM` module has been removed since it
+  is difficult to Cythonize. If you are interested in contributing a Cython
+  version, you can use the python version in the git history as a reference.
+
+- Classes in :ref:`neighbors` now support arbitrary Minkowski metric for
+  nearest neighbors searches. The metric can be specified by argument ``p``.
+
+API changes summary
+-------------------
+
+- :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope`
+  instead.
+
+- ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module
+  :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`,
+  :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor`
+  and/or :class:`RadiusNeighborsRegressor` instead.
+
+- Sparse classes in the :ref:`sgd` module are now deprecated.
+
+- In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`,
+  parameters must be passed to an object when initialising it and not through
+  ``fit``. Now ``fit`` will only accept the data as an input parameter.
+
+- methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated.
+  ``sample`` and ``score`` or ``predict`` should be used instead.
+
+- attribute ``_scores`` and ``_pvalues`` in univariate feature selection
+  objects are now deprecated.
+  ``scores_`` or ``pvalues_`` should be used instead.
+
+- In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and
+  :class:`NuSVC`, the ``class_weight`` parameter is now an initialization
+  parameter, not a parameter to fit. This makes grid searches
+  over this parameter possible.
+
+- LFW ``data`` is now always shape ``(n_samples, n_features)`` to be
+  consistent with the Olivetti faces dataset. Use ``images`` and
+  ``pairs`` attribute to access the natural images shapes instead.
+
+- In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter
+  changed.  Options now are ``'ovr'`` and ``'crammer_singer'``, with
+  ``'ovr'`` being the default.  This does not change the default behavior
+  but hopefully is less confusing.
+
+- Class :class:`feature_selection.text.Vectorizer` is deprecated and
+  replaced by :class:`feature_selection.text.TfidfVectorizer`.
+
+- The preprocessor / analyzer nested structure for text feature
+  extraction has been removed. All those features are
+  now directly passed as flat constructor arguments
+  to :class:`feature_selection.text.TfidfVectorizer` and
+  :class:`feature_selection.text.CountVectorizer`, in particular the
+  following parameters are now used:
+
+- ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default
+  analysis scheme, or use a specific python callable (as previously).
+
+- ``tokenizer`` and ``preprocessor`` have been introduced to make it
+  still possible to customize those steps with the new API.
+
+- ``input`` explicitly control how to interpret the sequence passed to
+  ``fit`` and ``predict``: filenames, file objects or direct (byte or
+  Unicode) strings.
+
+- charset decoding is explicit and strict by default.
+
+- the ``vocabulary``, fitted or not is now stored in the
+  ``vocabulary_`` attribute to be consistent with the project
+  conventions.
+
+- Class :class:`feature_selection.text.TfidfVectorizer` now derives directly
+  from :class:`feature_selection.text.CountVectorizer` to make grid
+  search trivial.
+
+- methods ``rvs`` in :class:`_BaseHMM` module are now deprecated.
+  ``sample`` should be used instead.
+
+- Beam pruning option in :class:`_BaseHMM` module is removed since it is
+  difficult to be Cythonized. If you are interested, you can look in the
+  history codes by git.
+
+- The SVMlight format loader now supports files with both zero-based and
+  one-based column indices, since both occur "in the wild".
+
+- Arguments in class :class:`ShuffleSplit` are now consistent with
+  :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and
+  ``train_fraction`` are deprecated and renamed to ``test_size`` and
+  ``train_size`` and can accept both ``float`` and ``int``.
+
+- Arguments in class :class:`Bootstrap` are now consistent with
+  :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and
+  ``n_train`` are deprecated and renamed to ``test_size`` and
+  ``train_size`` and can accept both ``float`` and ``int``.
+
+- Argument ``p`` added to classes in :ref:`neighbors` to specify an
+  arbitrary Minkowski metric for nearest neighbors searches.
+
+
+People
+------
+   * 282  `Andreas Müller`_
+   * 239  `Peter Prettenhofer`_
+   * 198  `Gael Varoquaux`_
+   * 129  `Olivier Grisel`_
+   * 114  `Mathieu Blondel`_
+   * 103  Clay Woolam
+   *  96  `Lars Buitinck`_
+   *  88  `Jaques Grobler`_
+   *  82  `Alexandre Gramfort`_
+   *  50  `Bertrand Thirion`_
+   *  42  `Robert Layton`_
+   *  28  flyingimmidev
+   *  26  `Jake Vanderplas`_
+   *  26  Shiqiao Du
+   *  21  `Satrajit Ghosh`_
+   *  17  `David Marek`_
+   *  17  `Gilles Louppe`_
+   *  14  `Vlad Niculae`_
+   *  11  Yannick Schwartz
+   *  10  `Fabian Pedregosa`_
+   *   9  fcostin
+   *   7  Nick Wilson
+   *   5  Adrien Gaidon
+   *   5  `Nicolas Pinto`_
+   *   4  `David Warde-Farley`_
+   *   5  Nelle Varoquaux
+   *   5  Emmanuelle Gouillart
+   *   3  Joonas Sillanpää
+   *   3  Paolo Losi
+   *   2  Charles McCarthy
+   *   2  Roy Hyunjin Han
+   *   2  Scott White
+   *   2  ibayer
+   *   1  Brandyn White
+   *   1  Carlos Scheidegger
+   *   1  Claire Revillet
+   *   1  Conrad Lee
+   *   1  `Edouard Duchesnay`_
+   *   1  Jan Hendrik Metzen
+   *   1  Meng Xinfan
+   *   1  `Rob Zinkov`_
+   *   1  Shiqiao
+   *   1  Udi Weinsberg
+   *   1  Virgile Fritsch
+   *   1  Xinfan Meng
+   *   1  Yaroslav Halchenko
+   *   1  jansoe
+   *   1  Leon Palafox
+
+
+.. _changes_0_10:
+
+Version 0.10
+============
+
+**January 11, 2012**
+
+Changelog
+---------
+
+- Python 2.5 compatibility was dropped; the minimum Python version needed
+  to use scikit-learn is now 2.6.
+
+- :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with
+  associated cross-validated estimator, by `Gael Varoquaux`_
+
+- New :ref:`Tree <tree>` module by `Brian Holt`_, `Peter Prettenhofer`_,
+  `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete
+  documentation and examples.
+
+- Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378).
+
+- Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367).
+
+- Faster tests by `Fabian Pedregosa`_ and others.
+
+- Silhouette Coefficient cluster analysis evaluation metric added as
+  :func:`sklearn.metrics.silhouette_score` by Robert Layton.
+
+- Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter:
+  the clustering algorithm used to be run ``n_init`` times but the last
+  solution was retained instead of the best solution by `Olivier Grisel`_.
+
+- Minor refactoring in :ref:`sgd` module; consolidated dense and sparse
+  predict methods; Enhanced test time performance by converting model
+  parameters to fortran-style arrays after fitting (only multi-class).
+
+- Adjusted Mutual Information metric added as
+  :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton.
+
+- Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear
+  now support scaling of C regularization parameter by the number of
+  samples by `Alexandre Gramfort`_.
+
+- New :ref:`Ensemble Methods <ensemble>` module by `Gilles Louppe`_ and
+  `Brian Holt`_. The module comes with the random forest algorithm and the
+  extra-trees method, along with documentation and examples.
+
+- :ref:`outlier_detection`: outlier and novelty detection, by
+  :user:`Virgile Fritsch <VirgileFritsch>`.
+
+- :ref:`kernel_approximation`: a transform implementing kernel
+  approximation for fast SGD on non-linear kernels by
+  `Andreas Müller`_.
+
+- Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_.
+
+- :ref:`SparseCoder` by `Vlad Niculae`_.
+
+- :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_.
+
+- :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_.
+
+- Improved documentation for developers and for the :mod:`sklearn.utils`
+  module, by `Jake Vanderplas`_.
+
+- Vectorized 20newsgroups dataset loader
+  (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by
+  `Mathieu Blondel`_.
+
+- :ref:`multiclass` by `Lars Buitinck`_.
+
+- Utilities for fast computation of mean and variance for sparse matrices
+  by `Mathieu Blondel`_.
+
+- Make :func:`sklearn.preprocessing.scale` and
+  :class:`sklearn.preprocessing.Scaler` work on sparse matrices by
+  `Olivier Grisel`_
+
+- Feature importances using decision trees and/or forest of trees,
+  by `Gilles Louppe`_.
+
+- Parallel implementation of forests of randomized trees by
+  `Gilles Louppe`_.
+
+- :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train
+  sets as well as the test sets by `Olivier Grisel`_.
+
+- Errors in the build of the documentation fixed by `Andreas Müller`_.
+
+
+API changes summary
+-------------------
+
+Here are the code migration instructions when upgrading from scikit-learn
+version 0.9:
+
+- Some estimators that may overwrite their inputs to save memory previously
+  had ``overwrite_`` parameters; these have been replaced with ``copy_``
+  parameters with exactly the opposite meaning.
+
+  This particularly affects some of the estimators in :mod:`linear_model`.
+  The default behavior is still to copy everything passed in.
+
+- The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no
+  longer supports loading two files at once; use ``load_svmlight_files``
+  instead. Also, the (unused) ``buffer_mb`` parameter is gone.
+
+- Sparse estimators in the :ref:`sgd` module use dense parameter vector
+  ``coef_`` instead of ``sparse_coef_``. This significantly improves
+  test time performance.
+
+- The :ref:`covariance` module now has a robust estimator of
+  covariance, the Minimum Covariance Determinant estimator.
+
+- Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored
+  but the changes are backwards compatible. They have been moved to the
+  :mod:`metrics.cluster.supervised`, along with
+  :mod:`metrics.cluster.unsupervised` which contains the Silhouette
+  Coefficient.
+
+- The ``permutation_test_score`` function now behaves the same way as
+  ``cross_val_score`` (i.e. uses the mean score across the folds.)
+
+- Cross Validation generators now use integer indices (``indices=True``)
+  by default instead of boolean masks. This make it more intuitive to
+  use with sparse matrix data.
+
+- The functions used for sparse coding, ``sparse_encode`` and
+  ``sparse_encode_parallel`` have been combined into
+  :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays
+  have been transposed for consistency with the matrix factorization setting,
+  as opposed to the regression setting.
+
+- Fixed an off-by-one error in the SVMlight/LibSVM file format handling;
+  files generated using :func:`sklearn.datasets.dump_svmlight_file` should be
+  re-generated. (They should continue to work, but accidentally had one
+  extra column of zeros prepended.)
+
+- ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``.
+
+- :func:`sklearn.utils.extmath.fast_svd` has been renamed
+  :func:`sklearn.utils.extmath.randomized_svd` and the default
+  oversampling is now fixed to 10 additional random vectors instead
+  of doubling the number of components to extract. The new behavior
+  follows the reference paper.
+
+
+People
+------
+
+The following people contributed to scikit-learn since last release:
+
+   * 246  `Andreas Müller`_
+   * 242  `Olivier Grisel`_
+   * 220  `Gilles Louppe`_
+   * 183  `Brian Holt`_
+   * 166  `Gael Varoquaux`_
+   * 144  `Lars Buitinck`_
+   *  73  `Vlad Niculae`_
+   *  65  `Peter Prettenhofer`_
+   *  64  `Fabian Pedregosa`_
+   *  60  Robert Layton
+   *  55  `Mathieu Blondel`_
+   *  52  `Jake Vanderplas`_
+   *  44  Noel Dawe
+   *  38  `Alexandre Gramfort`_
+   *  24  :user:`Virgile Fritsch <VirgileFritsch>`
+   *  23  `Satrajit Ghosh`_
+   *   3  Jan Hendrik Metzen
+   *   3  Kenneth C. Arnold
+   *   3  Shiqiao Du
+   *   3  Tim Sheerman-Chase
+   *   3  `Yaroslav Halchenko`_
+   *   2  Bala Subrahmanyam Varanasi
+   *   2  DraXus
+   *   2  Michael Eickenberg
+   *   1  Bogdan Trach
+   *   1  Félix-Antoine Fortin
+   *   1  Juan Manuel Caicedo Carvajal
+   *   1  Nelle Varoquaux
+   *   1  `Nicolas Pinto`_
+   *   1  Tiziano Zito
+   *   1  Xinfan Meng
+
+
+
+.. _changes_0_9:
+
+Version 0.9
+===========
+
+**September 21, 2011**
+
+scikit-learn 0.9 was released on September 2011, three months after the 0.8
+release and includes the new modules :ref:`manifold`, :ref:`dirichlet_process`
+as well as several new algorithms and documentation improvements.
+
+This release also includes the dictionary-learning work developed by
+`Vlad Niculae`_ as part of the `Google Summer of Code
+<https://developers.google.com/open-source/gsoc>`_ program.
+
+
+
+.. |banner1| image:: ./auto_examples/manifold/images/thumb/sphx_glr_plot_compare_methods_thumb.png
+   :target: auto_examples/manifold/plot_compare_methods.html
+
+.. |banner2| image:: ./auto_examples/linear_model/images/thumb/sphx_glr_plot_omp_thumb.png
+   :target: auto_examples/linear_model/plot_omp.html
+
+.. |banner3| image:: ./auto_examples/decomposition/images/thumb/sphx_glr_plot_kernel_pca_thumb.png
+   :target: auto_examples/decomposition/plot_kernel_pca.html
+
+.. |center-div| raw:: html
+
+    <div style="text-align: center; margin: 0px 0 -5px 0;">
+
+.. |end-div| raw:: html
+
+    </div>
+
+
+|center-div| |banner2| |banner1| |banner3| |end-div|
+
+Changelog
+---------
+
+- New :ref:`manifold` module by `Jake Vanderplas`_ and
+  `Fabian Pedregosa`_.
+
+- New :ref:`Dirichlet Process <dirichlet_process>` Gaussian Mixture
+  Model by `Alexandre Passos`_
+
+- :ref:`neighbors` module refactoring by `Jake Vanderplas`_ :
+  general refactoring, support for sparse matrices in input, speed and
+  documentation improvements. See the next section for a full list of API
+  changes.
+
+- Improvements on the :ref:`feature_selection` module by
+  `Gilles Louppe`_ : refactoring of the RFE classes, documentation
+  rewrite, increased efficiency and minor API changes.
+
+- :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and
+  `Alexandre Gramfort`_
+
+- Printing an estimator now behaves independently of architectures
+  and Python version thanks to :user:`Jean Kossaifi <JeanKossaifi>`.
+
+- :ref:`Loader for libsvm/svmlight format <libsvm_loader>` by
+  `Mathieu Blondel`_ and `Lars Buitinck`_
+
+- Documentation improvements: thumbnails in
+  example gallery by `Fabian Pedregosa`_.
+
+- Important bugfixes in :ref:`svm` module (segfaults, bad
+  performance) by `Fabian Pedregosa`_.
+
+- Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes`
+  by `Lars Buitinck`_
+
+- Text feature extraction optimizations by Lars Buitinck
+
+- Chi-Square feature selection
+  (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_.
+
+- :ref:`sample_generators` module refactoring by `Gilles Louppe`_
+
+- :ref:`multiclass` by `Mathieu Blondel`_
+
+- Ball tree rewrite by `Jake Vanderplas`_
+
+- Implementation of :ref:`dbscan` algorithm by Robert Layton
+
+- Kmeans predict and transform by Robert Layton
+
+- Preprocessing module refactoring by `Olivier Grisel`_
+
+- Faster mean shift by Conrad Lee
+
+- New ``Bootstrap``, :ref:`ShuffleSplit` and various other
+  improvements in cross validation schemes by `Olivier Grisel`_ and
+  `Gael Varoquaux`_
+
+- Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_
+
+- Added :class:`Orthogonal Matching Pursuit <linear_model.OrthogonalMatchingPursuit>` by `Vlad Niculae`_
+
+- Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_
+
+- Implementation of :class:`linear_model.LassoLarsCV`
+  (cross-validated Lasso solver using the Lars algorithm) and
+  :class:`linear_model.LassoLarsIC` (BIC/AIC model
+  selection in Lars) by `Gael Varoquaux`_
+  and `Alexandre Gramfort`_
+
+- Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu
+
+- Distance helper functions :func:`metrics.pairwise.pairwise_distances`
+  and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton
+
+- :class:`Mini-Batch K-Means <cluster.MiniBatchKMeans>` by Nelle Varoquaux and Peter Prettenhofer.
+
+- :ref:`mldata` utilities by Pietro Berkes.
+
+- :ref:`olivetti_faces` by `David Warde-Farley`_.
+
+
+API changes summary
+-------------------
+
+Here are the code migration instructions when upgrading from scikit-learn
+version 0.8:
+
+- The ``scikits.learn`` package was renamed ``sklearn``. There is
+  still a ``scikits.learn`` package alias for backward compatibility.
+
+  Third-party projects with a dependency on scikit-learn 0.9+ should
+  upgrade their codebase. For instance, under Linux / MacOSX just run
+  (make a backup first!)::
+
+      find -name "*.py" | xargs sed -i 's/\bscikits.learn\b/sklearn/g'
+
+- Estimators no longer accept model parameters as ``fit`` arguments:
+  instead all parameters must be only be passed as constructor
+  arguments or using the now public ``set_params`` method inherited
+  from :class:`base.BaseEstimator`.
+
+  Some estimators can still accept keyword arguments on the ``fit``
+  but this is restricted to data-dependent values (e.g. a Gram matrix
+  or an affinity matrix that are precomputed from the ``X`` data matrix.
+
+- The ``cross_val`` package has been renamed to ``cross_validation``
+  although there is also a ``cross_val`` package alias in place for
+  backward compatibility.
+
+  Third-party projects with a dependency on scikit-learn 0.9+ should
+  upgrade their codebase. For instance, under Linux / MacOSX just run
+  (make a backup first!)::
+
+      find -name "*.py" | xargs sed -i 's/\bcross_val\b/cross_validation/g'
+
+- The ``score_func`` argument of the
+  ``sklearn.cross_validation.cross_val_score`` function is now expected
+  to accept ``y_test`` and ``y_predicted`` as only arguments for
+  classification and regression tasks or ``X_test`` for unsupervised
+  estimators.
+
+- ``gamma`` parameter for support vector machine algorithms is set
+  to ``1 / n_features`` by default, instead of ``1 / n_samples``.
+
+- The ``sklearn.hmm`` has been marked as orphaned: it will be removed
+  from scikit-learn in version 0.11 unless someone steps up to
+  contribute documentation, examples and fix lurking numerical
+  stability issues.
+
+- ``sklearn.neighbors`` has been made into a submodule.  The two previously
+  available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor``
+  have been marked as deprecated.  Their functionality has been divided
+  among five new classes: ``NearestNeighbors`` for unsupervised neighbors
+  searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier``
+  for supervised classification problems, and ``KNeighborsRegressor``
+  & ``RadiusNeighborsRegressor`` for supervised regression problems.
+
+- ``sklearn.ball_tree.BallTree`` has been moved to
+  ``sklearn.neighbors.BallTree``.  Using the former will generate a warning.
+
+- ``sklearn.linear_model.LARS()`` and related classes (LassoLARS,
+  LassoLARSCV, etc.) have been renamed to
+  ``sklearn.linear_model.Lars()``.
+
+- All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y
+  parameter, which by default is None. If not given, the result is the distance
+  (or kernel similarity) between each sample in Y. If given, the result is the
+  pairwise distance (or kernel similarity) between samples in X to Y.
+
+- ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``,
+  and by default returns the pairwise distance. For the component wise distance,
+  set the parameter ``sum_over_features`` to ``False``.
+
+Backward compatibility package aliases and other deprecated classes and
+functions will be removed in version 0.11.
+
+
+People
+------
+
+38 people contributed to this release.
+
+- 387  `Vlad Niculae`_
+- 320  `Olivier Grisel`_
+- 192  `Lars Buitinck`_
+- 179  `Gael Varoquaux`_
+- 168  `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_)
+- 127  `Jake Vanderplas`_
+- 120  `Mathieu Blondel`_
+- 85  `Alexandre Passos`_
+- 67  `Alexandre Gramfort`_
+- 57  `Peter Prettenhofer`_
+- 56  `Gilles Louppe`_
+- 42  Robert Layton
+- 38  Nelle Varoquaux
+- 32  :user:`Jean Kossaifi <JeanKossaifi>`
+- 30  Conrad Lee
+- 22  Pietro Berkes
+- 18  andy
+- 17  David Warde-Farley
+- 12  Brian Holt
+- 11  Robert
+- 8  Amit Aides
+- 8  :user:`Virgile Fritsch <VirgileFritsch>`
+- 7  `Yaroslav Halchenko`_
+- 6  Salvatore Masecchia
+- 5  Paolo Losi
+- 4  Vincent Schut
+- 3  Alexis Metaireau
+- 3  Bryan Silverthorn
+- 3  `Andreas Müller`_
+- 2  Minwoo Jake Lee
+- 1  Emmanuelle Gouillart
+- 1  Keith Goodman
+- 1  Lucas Wiman
+- 1  `Nicolas Pinto`_
+- 1  Thouis (Ray) Jones
+- 1  Tim Sheerman-Chase
+
+
+.. _changes_0_8:
+
+Version 0.8
+===========
+
+**May 11, 2011**
+
+scikit-learn 0.8 was released on May 2011, one month after the first
+"international" `scikit-learn coding sprint
+<https://github.com/scikit-learn/scikit-learn/wiki/Upcoming-events>`_ and is
+marked by the inclusion of important modules: :ref:`hierarchical_clustering`,
+:ref:`cross_decomposition`, :ref:`NMF`, initial support for Python 3 and by important
+enhancements and bug fixes.
+
+
+Changelog
+---------
+
+Several new modules where introduced during this release:
+
+- New :ref:`hierarchical_clustering` module by Vincent Michel,
+  `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_.
+
+- :ref:`kernel_pca` implementation by `Mathieu Blondel`_
+
+- :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_.
+
+- New :ref:`cross_decomposition` module by `Edouard Duchesnay`_.
+
+- :ref:`NMF` module `Vlad Niculae`_
+
+- Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by
+  :user:`Virgile Fritsch <VirgileFritsch>` in the :ref:`covariance` module.
+
+
+Some other modules benefited from significant improvements or cleanups.
+
+
+- Initial support for Python 3: builds and imports cleanly,
+  some modules are usable while others have failing tests by `Fabian Pedregosa`_.
+
+- :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_.
+
+- Guide :ref:`performance-howto` by `Olivier Grisel`_.
+
+- Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck.
+
+- bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter.
+
+- Add attribute converged to Gaussian Mixture Models by Vincent Schut.
+
+- Implemented ``transform``, ``predict_log_proba`` in
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_.
+
+- Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_,
+  `Gael Varoquaux`_ and Amit Aides.
+
+- Refactored SGD module (removed code duplication, better variable naming),
+  added interface for sample weight by `Peter Prettenhofer`_.
+
+- Wrapped BallTree with Cython by Thouis (Ray) Jones.
+
+- Added function :func:`svm.l1_min_c` by Paolo Losi.
+
+- Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_,
+  `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and
+  `Fabian Pedregosa`_.
+
+
+People
+-------
+
+People that made this release possible preceded by number of commits:
+
+
+- 159  `Olivier Grisel`_
+- 96  `Gael Varoquaux`_
+- 96  `Vlad Niculae`_
+- 94  `Fabian Pedregosa`_
+- 36  `Alexandre Gramfort`_
+- 32  Paolo Losi
+- 31  `Edouard Duchesnay`_
+- 30  `Mathieu Blondel`_
+- 25  `Peter Prettenhofer`_
+- 22  `Nicolas Pinto`_
+- 11  :user:`Virgile Fritsch <VirgileFritsch>`
+   -  7  Lars Buitinck
+   -  6  Vincent Michel
+   -  5  `Bertrand Thirion`_
+   -  4  Thouis (Ray) Jones
+   -  4  Vincent Schut
+   -  3  Jan Schlüter
+   -  2  Julien Miotte
+   -  2  `Matthieu Perrot`_
+   -  2  Yann Malet
+   -  2  `Yaroslav Halchenko`_
+   -  1  Amit Aides
+   -  1  `Andreas Müller`_
+   -  1  Feth Arezki
+   -  1  Meng Xinfan
+
+
+.. _changes_0_7:
+
+Version 0.7
+===========
+
+**March 2, 2011**
+
+scikit-learn 0.7 was released in March 2011, roughly three months
+after the 0.6 release. This release is marked by the speed
+improvements in existing algorithms like k-Nearest Neighbors and
+K-Means algorithm and by the inclusion of an efficient algorithm for
+computing the Ridge Generalized Cross Validation solution. Unlike the
+preceding release, no new modules where added to this release.
+
+Changelog
+---------
+
+- Performance improvements for Gaussian Mixture Model sampling [Jan
+  Schlüter].
+
+- Implementation of efficient leave-one-out cross-validated Ridge in
+  :class:`linear_model.RidgeCV` [`Mathieu Blondel`_]
+
+- Better handling of collinearity and early stopping in
+  :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian
+  Pedregosa`_].
+
+- Fixes for liblinear ordering of labels and sign of coefficients
+  [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_].
+
+- Performance improvements for Nearest Neighbors algorithm in
+  high-dimensional spaces [`Fabian Pedregosa`_].
+
+- Performance improvements for :class:`cluster.KMeans` [`Gael
+  Varoquaux`_ and `James Bergstra`_].
+
+- Sanity checks for SVM-based classes [`Mathieu Blondel`_].
+
+- Refactoring of :class:`neighbors.NeighborsClassifier` and
+  :func:`neighbors.kneighbors_graph`: added different algorithms for
+  the k-Nearest Neighbor Search and implemented a more stable
+  algorithm for finding barycenter weights. Also added some
+  developer documentation for this module, see
+  `notes_neighbors
+  <https://github.com/scikit-learn/scikit-learn/wiki/Neighbors-working-notes>`_ for more information [`Fabian Pedregosa`_].
+
+- Documentation improvements: Added :class:`pca.RandomizedPCA` and
+  :class:`linear_model.LogisticRegression` to the class
+  reference. Also added references of matrices used for clustering
+  and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu
+  Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle
+  Gouillart]
+
+- Binded decision_function in classes that make use of liblinear_,
+  dense and sparse variants, like :class:`svm.LinearSVC` or
+  :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_].
+
+- Performance and API improvements to
+  :func:`metrics.euclidean_distances` and to
+  :class:`pca.RandomizedPCA` [`James Bergstra`_].
+
+- Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche]
+
+- Allow input sequences of different lengths in :class:`hmm.GaussianHMM`
+  [`Ron Weiss`_].
+
+- Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng]
+
+
+People
+------
+
+People that made this release possible preceded by number of commits:
+
+- 85  `Fabian Pedregosa`_
+- 67  `Mathieu Blondel`_
+- 20  `Alexandre Gramfort`_
+- 19  `James Bergstra`_
+- 14  Dan Yamins
+- 13  `Olivier Grisel`_
+- 12  `Gael Varoquaux`_
+- 4  `Edouard Duchesnay`_
+- 4  `Ron Weiss`_
+- 2  Satrajit Ghosh
+- 2  Vincent Dubourg
+- 1  Emmanuelle Gouillart
+- 1  Kamel Ibn Hassen Derouiche
+- 1  Paolo Losi
+- 1  VirgileFritsch
+- 1  `Yaroslav Halchenko`_
+- 1  Xinfan Meng
+
+
+.. _changes_0_6:
+
+Version 0.6
+===========
+
+**December 21, 2010**
+
+scikit-learn 0.6 was released on December 2010. It is marked by the
+inclusion of several new modules and a general renaming of old
+ones. It is also marked by the inclusion of new example, including
+applications to real-world datasets.
+
+
+Changelog
+---------
+
+- New `stochastic gradient
+  <http://scikit-learn.org/stable/modules/sgd.html>`_ descent
+  module by Peter Prettenhofer. The module comes with complete
+  documentation and examples.
+
+- Improved svm module: memory consumption has been reduced by 50%,
+  heuristic to automatically set class weights, possibility to
+  assign weights to samples (see
+  :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example).
+
+- New :ref:`gaussian_process` module by Vincent Dubourg. This module
+  also has great documentation and some very neat examples. See
+  example_gaussian_process_plot_gp_regression.py or
+  example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py
+  for a taste of what can be done.
+
+- It is now possible to use liblinear’s Multi-class SVC (option
+  multi_class in :class:`svm.LinearSVC`)
+
+- New features and performance improvements of text feature
+  extraction.
+
+- Improved sparse matrix support, both in main classes
+  (:class:`grid_search.GridSearchCV`) as in modules
+  sklearn.svm.sparse and sklearn.linear_model.sparse.
+
+- Lots of cool new examples and a new section that uses real-world
+  datasets was created. These include:
+  :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`,
+  :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`,
+  :ref:`sphx_glr_auto_examples_applications_svm_gui.py`,
+  :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and
+  others.
+
+- Faster :ref:`least_angle_regression` algorithm. It is now 2x
+  faster than the R version on worst case and up to 10x times faster
+  on some cases.
+
+- Faster coordinate descent algorithm. In particular, the full path
+  version of lasso (:func:`linear_model.lasso_path`) is more than
+  200x times faster than before.
+
+- It is now possible to get probability estimates from a
+  :class:`linear_model.LogisticRegression` model.
+
+- module renaming: the glm module has been renamed to linear_model,
+  the gmm module has been included into the more general mixture
+  model and the sgd module has been included in linear_model.
+
+- Lots of bug fixes and documentation improvements.
+
+
+People
+------
+
+People that made this release possible preceded by number of commits:
+
+   * 207  `Olivier Grisel`_
+
+   * 167 `Fabian Pedregosa`_
+
+   * 97 `Peter Prettenhofer`_
+
+   * 68 `Alexandre Gramfort`_
+
+   * 59  `Mathieu Blondel`_
+
+   * 55  `Gael Varoquaux`_
+
+   * 33  Vincent Dubourg
+
+   * 21  `Ron Weiss`_
+
+   * 9  Bertrand Thirion
+
+   * 3  `Alexandre Passos`_
+
+   * 3  Anne-Laure Fouque
+
+   * 2  Ronan Amicel
+
+   * 1 `Christian Osendorfer`_
+
+
+
+.. _changes_0_5:
+
+
+Version 0.5
+===========
+
+**October 11, 2010**
+
+Changelog
+---------
+
+New classes
+-----------
+
+- Support for sparse matrices in some classifiers of modules
+  ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`,
+  :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`,
+  :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`)
+
+- New :class:`pipeline.Pipeline` object to compose different estimators.
+
+- Recursive Feature Elimination routines in module
+  :ref:`feature_selection`.
+
+- Addition of various classes capable of cross validation in the
+  linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`,
+  etc.).
+
+- New, more efficient LARS algorithm implementation. The Lasso
+  variant of the algorithm is also implemented. See
+  :class:`linear_model.lars_path`, :class:`linear_model.Lars` and
+  :class:`linear_model.LassoLars`.
+
+- New Hidden Markov Models module (see classes
+  :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`,
+  :class:`hmm.GMMHMM`)
+
+- New module feature_extraction (see :ref:`class reference
+  <feature_extraction_ref>`)
+
+- New FastICA algorithm in module sklearn.fastica
+
+
+Documentation
+-------------
+
+- Improved documentation for many modules, now separating
+  narrative documentation from the class reference. As an example,
+  see `documentation for the SVM module
+  <http://scikit-learn.org/stable/modules/svm.html>`_ and the
+  complete `class reference
+  <http://scikit-learn.org/stable/modules/classes.html>`_.
+
+Fixes
+-----
+
+- API changes: adhere variable names to PEP-8, give more
+  meaningful names.
+
+- Fixes for svm module to run on a shared memory context
+  (multiprocessing).
+
+- It is again possible to generate latex (and thus PDF) from the
+  sphinx docs.
+
+Examples
+--------
+
+- new examples using some of the mlcomp datasets:
+  ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and
+  :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py`
+
+- Many more examples. `See here
+  <http://scikit-learn.org/stable/auto_examples/index.html>`_
+  the full list of examples.
+
+
+External dependencies
+---------------------
+
+- Joblib is now a dependency of this package, although it is
+  shipped with (sklearn.externals.joblib).
+
+Removed modules
+---------------
+
+- Module ann (Artificial Neural Networks) has been removed from
+  the distribution. Users wanting this sort of algorithms should
+  take a look into pybrain.
+
+Misc
+----
+
+- New sphinx theme for the web page.
+
+
+Authors
+-------
+
+The following is a list of authors for this release, preceded by
+number of commits:
+
+     * 262  Fabian Pedregosa
+     * 240  Gael Varoquaux
+     * 149  Alexandre Gramfort
+     * 116  Olivier Grisel
+     *  40  Vincent Michel
+     *  38  Ron Weiss
+     *  23  Matthieu Perrot
+     *  10  Bertrand Thirion
+     *   7  Yaroslav Halchenko
+     *   9  VirgileFritsch
+     *   6  Edouard Duchesnay
+     *   4  Mathieu Blondel
+     *   1  Ariel Rokem
+     *   1  Matthieu Brucher
+
+Version 0.4
+===========
+
+**August 26, 2010**
+
+Changelog
+---------
+
+Major changes in this release include:
+
+- Coordinate Descent algorithm (Lasso, ElasticNet) refactoring &
+  speed improvements (roughly 100x times faster).
+
+- Coordinate Descent Refactoring (and bug fixing) for consistency
+  with R's package GLMNET.
+
+- New metrics module.
+
+- New GMM module contributed by Ron Weiss.
+
+- Implementation of the LARS algorithm (without Lasso variant for now).
+
+- feature_selection module redesign.
+
+- Migration to GIT as version control system.
+
+- Removal of obsolete attrselect module.
+
+- Rename of private compiled extensions (added underscore).
+
+- Removal of legacy unmaintained code.
+
+- Documentation improvements (both docstring and rst).
+
+- Improvement of the build system to (optionally) link with MKL.
+  Also, provide a lite BLAS implementation in case no system-wide BLAS is
+  found.
+
+- Lots of new examples.
+
+- Many, many bug fixes ...
+
+
+Authors
+-------
+
+The committer list for this release is the following (preceded by number
+of commits):
+
+    * 143  Fabian Pedregosa
+    * 35  Alexandre Gramfort
+    * 34  Olivier Grisel
+    * 11  Gael Varoquaux
+    *  5  Yaroslav Halchenko
+    *  2  Vincent Michel
+    *  1  Chris Filo Gorgolewski
+
+
+Earlier versions
+================
+
+Earlier versions included contributions by Fred Mailhot, David Cooke,
+David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
+
+.. _Olivier Grisel: https://twitter.com/ogrisel
+
+.. _Gael Varoquaux: http://gael-varoquaux.info
+
+.. _Alexandre Gramfort: http://alexandre.gramfort.net
+
+.. _Fabian Pedregosa: http://fa.bianp.net
+
+.. _Mathieu Blondel: http://www.mblondel.org
+
+.. _James Bergstra: http://www-etud.iro.umontreal.ca/~bergstrj/
+
+.. _liblinear: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
+
+.. _Yaroslav Halchenko: http://www.onerussian.com/
+
+.. _Vlad Niculae: http://vene.ro
+
+.. _Edouard Duchesnay: https://sites.google.com/site/duchesnay/home
+
+.. _Peter Prettenhofer: https://sites.google.com/site/peterprettenhofer/
+
+.. _Alexandre Passos: http://atpassos.me
+
+.. _Nicolas Pinto: https://twitter.com/npinto
+
+.. _Bertrand Thirion: https://team.inria.fr/parietal/bertrand-thirions-page
+
+.. _Andreas Müller: http://peekaboo-vision.blogspot.com
+
+.. _Matthieu Perrot: http://brainvisa.info/biblio/lnao/en/Author/PERROT-M.html
+
+.. _Jake Vanderplas: http://staff.washington.edu/jakevdp/
+
+.. _Gilles Louppe: http://www.montefiore.ulg.ac.be/~glouppe/
+
+.. _INRIA: http://www.inria.fr
+
+.. _Parietal Team: http://parietal.saclay.inria.fr/
+
+.. _David Warde-Farley: http://www-etud.iro.umontreal.ca/~wardefar/
+
+.. _Brian Holt: http://personal.ee.surrey.ac.uk/Personal/B.Holt
+
+.. _Satrajit Ghosh: http://www.mit.edu/~satra/
+
+.. _Robert Layton: https://twitter.com/robertlayton
+
+.. _Scott White: https://twitter.com/scottblanc
+
+.. _David Marek: http://www.davidmarek.cz/
+
+.. _Christian Osendorfer: https://osdf.github.io
+
+.. _Arnaud Joly: http://www.ajoly.org
+
+.. _Rob Zinkov: http://zinkov.com
+
+.. _Joel Nothman: http://joelnothman.com
+
+.. _Nicolas Trésegnie : http://nicolastr.com/
+
+.. _Kemal Eren: http://www.kemaleren.com
+
+.. _Yann Dauphin: http://ynd.github.io/
+
+.. _Yannick Schwartz: https://team.inria.fr/parietal/schwarty/
+
+.. _Kyle Kastner: http://kastnerkyle.github.io
+
+.. _Daniel Nouri: http://danielnouri.org
+
+.. _Manoj Kumar: https://manojbits.wordpress.com
+
+.. _Luis Pedro Coelho: http://luispedro.org
+
+.. _Fares Hedyati: http://www.eecs.berkeley.edu/~fareshed
+
+.. _Antony Lee: https://www.ocf.berkeley.edu/~antonyl/
+
+.. _Martin Billinger: http://tnsre.embs.org/author/martinbillinger
+
+.. _Matteo Visconti di Oleggio Castello: http://www.mvdoc.me
+
+.. _Trevor Stephens: http://trevorstephens.com/
+
+.. _Jan Hendrik Metzen: https://jmetzen.github.io/
+
+.. _Will Dawson: http://www.dawsonresearch.com
+
+.. _Andrew Tulloch: http://tullo.ch/
+
+.. _Hanna Wallach: http://dirichlet.net/
+
+.. _Yan Yi: http://seowyanyi.org
+
+.. _Hervé Bredin: http://herve.niderb.fr/
+
+.. _Eric Martin: http://www.ericmart.in
+
+.. _Nicolas Goix: https://perso.telecom-paristech.fr/~goix/
+
+.. _Sebastian Raschka: http://sebastianraschka.com
+
+.. _Brian McFee: https://bmcfee.github.io
+
+.. _Valentin Stolbunov: http://www.vstolbunov.com
+
+.. _Jaques Grobler: https://github.com/jaquesgrobler
+
+.. _Lars Buitinck: https://github.com/larsmans
+
+.. _Loic Esteve: https://github.com/lesteve
+
+.. _Noel Dawe: https://github.com/ndawe
+
+.. _Raghav RV: https://github.com/raghavrv
+
+.. _Tom Dupre la Tour: https://github.com/TomDLT
+
+.. _Nelle Varoquaux: https://github.com/nellev
+
+.. _Bing Tian Dai: https://github.com/btdai
+
+.. _Dylan Werner-Meier: https://github.com/unautre
+
+.. _Alyssa Batula: https://github.com/abatula
+
+.. _Srivatsan Ramesh: https://github.com/srivatsan-ramesh
+
+.. _Ron Weiss: http://www.ee.columbia.edu/~ronw
+
+.. _Kathleen Chen: https://github.com/kchen17
+
+.. _Vincent Pham: https://github.com/vincentpham1991
+
+.. _Denis Engemann: http://denis-engemann.de
+.. _Anish Shah: https://github.com/AnishShah
+
+.. _Neeraj Gangwar: http://neerajgangwar.in
+.. _Arthur Mensch: https://amensch.fr
+
+.. _Ivan Nazarov: https://github.com/ivannz

From d86b3fdb53c5168d3fe3ee10cc46c0703bccc93b Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Sun, 9 Apr 2017 02:27:26 +0300
Subject: [PATCH 03/41] Update to address #8711

---
 doc/whats_new.rst             |  1 +
 sklearn/svm/_classes.py       | 20 ++++++++++++++++++++
 sklearn/svm/tests/test_svm.py |  3 ++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index a2e79cb930838..6baab9d087f36 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -243,6 +243,7 @@ Model selection and evaluation
      cumulative gain (NDCG).
      :issue:`7739` by :user:`David Gasquez <davidgasquez>`.
      By `Arthur Mensch`_.
+
    - Added the :class:`svm.SVDD` class for novelty detection based on
      soft minimal volume hypersphere around the sample data.
      By `Ivan Nazarov`_.
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 918e7f3f8a116..d19ba44dad173 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1964,3 +1964,23 @@ def decision_function(self, X):
         """
         dec = self._decision_function(X)
         return dec
+
+    def predict(self, X):
+        """
+        Perform classification on samples in X.
+
+        For an one-class model, +1 or -1 is returned.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            For kernel="precomputed", the expected shape of X is
+            [n_samples_test, n_samples_train]
+
+        Returns
+        -------
+        y_pred : array, shape (n_samples,)
+            Class labels for samples in X.
+        """
+        y = super(SVDD, self).predict(X)
+        return np.asarray(y, dtype=np.intp)
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index b3b864826c546..a19285e4b8728 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -368,7 +368,8 @@ def test_svdd():
     clf.fit(X)
     pred = clf.predict(T)
 
-    assert_array_almost_equal(pred, [-1, -1, -1])
+    assert_array_equal(pred, [-1, -1, -1])
+    assert_equal(pred.dtype, np.dtype('intp'))
     assert_array_almost_equal(clf.intercept_, [0.491], decimal=3)
     assert_array_almost_equal(clf.dual_coef_,
                               [[0.632, 0.233, 0.633, 0.234, 0.632, 0.633]],

From 766a3444aacccd7f2b61e84df15476e78d9bb284 Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Fri, 9 Jun 2017 01:15:16 +0300
Subject: [PATCH 04/41] docstring fix reflecting #9048

---
 sklearn/svm/_classes.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index d19ba44dad173..2abefe1dd8d7d 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1951,7 +1951,9 @@ def fit(self, X, y=None, sample_weight=None, **params):
         return self
 
     def decision_function(self, X):
-        """Distance of the samples X to the separating hyperplane.
+        """Signed distance to the enveloping hypersphere.
+
+        Signed distance is positive for an inlier and negative for an outlier.
 
         Parameters
         ----------

From 4093763bf972c6ae15d4832626f43a909ed8c283 Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Fri, 15 Sep 2017 04:16:51 +0200
Subject: [PATCH 05/41] updated what's new according to #9505

---
 doc/whats_new.rst               | 5783 -------------------------------
 doc/whats_new/_contributors.rst |    4 +-
 doc/whats_new/v0.20.rst         |    4 +
 3 files changed, 7 insertions(+), 5784 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 6baab9d087f36..3354a6b13f32b 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -28,5786 +28,3 @@ on libraries.io to be notified when new versions are released.
     Version 0.14 <whats_new/v0.14.rst>
     Version 0.13 <whats_new/v0.13.rst>
     Older Versions <whats_new/older_versions.rst>
-
-Version 0.20 (under development)
-================================
-
-Changed models
---------------
-
-The following estimators and functions, when fit with the same data and
-parameters, may produce different models from the previous version. This often
-occurs due to changes in the modelling logic (bug fixes or enhancements), or in
-random sampling procedures.
-
-- :class:`decomposition.IncrementalPCA` in Python 2 (bug fix)
-
-Details are listed in the changelog below.
-
-(While we are trying to better inform users by providing this information, we
-cannot assure that this list is complete.)
-
-Changelog
----------
-
-New features
-............
-
-Classifiers and regressors
-
-- :class:`ensemble.GradientBoostingClassifier` and
-  :class:`ensemble.GradientBoostingRegressor` now support early stopping
-  via ``n_iter_no_change``, ``validation_fraction`` and ``tol``. :issue:`7071`
-  by `Raghav RV`_
-
-- Added :class:`naive_bayes.ComplementNB`, which implements the Complement
-  Naive Bayes classifier described in Rennie et al. (2003).
-  By :user:`Michael A. Alcorn <airalcorn2>`.
-
-Enhancements
-............
-
-Model evaluation and meta-estimators
-
-- A scorer based on :func:`metrics.brier_score_loss` is also available.
-  :issue:`9521` by :user:`Hanmin Qin <qinhanmin2014>`.
-
-Bug fixes
-.........
-
-Decomposition, manifold learning and clustering
-
-- Fix for uninformative error in :class:`decomposition.incremental_pca`:
-  now an error is raised if the number of components is larger than the
-  chosen batch size. The ``n_components=None`` case was adapted accordingly.
-  :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
-
-- Fixed a bug where the ``partial_fit`` method of
-  :class:`decomposition.IncrementalPCA` used integer division instead of float
-  division on Python 2 versions. :issue:`9492` by
-  :user:`James Bourbeau <jrbourbeau>`.
-
-Version 0.19
-============
-
-**Release Candidate (0.19b2) July 17, 2017**
-
-Highlights
-----------
-
-We are excited to release a number of great new features including
-:class:`neighbors.LocalOutlierFactor` for anomaly detection,
-:class:`preprocessing.QuantileTransformer` for robust feature transformation,
-and the :class:`multioutput.ClassifierChain` meta-estimator to simply account
-for dependencies between classes in multilabel problems. We have some new
-algorithms in existing estimators, such as multiplicative update in
-:class:`decomposition.NMF` and multinomial
-:class:`linear_model.LogisticRegression` with L1 loss (use ``solver='saga'``).
-
-Cross validation is now able to return the results from multiple metric
-evaluations. The new :func:`model_selection.cross_validate` can return many
-scores on the test data as well as training set performance and timings, and we
-have extended the ``scoring`` and ``refit`` parameters for grid/randomized
-search :ref:`to handle multiple metrics <multimetric_grid_search>`.
-
-You can also learn faster.  For instance, the :ref:`new option to cache
-transformations <pipeline_cache>` in :class:`pipeline.Pipeline` makes grid
-search over pipelines including slow transformations much more efficient.  And
-you can predict faster: if you're sure you know what you're doing, you can turn
-off validating that the input is finite using :func:`config_context`.
-
-We've made some important fixes too.  We've fixed a longstanding implementation
-error in :func:`metrics.average_precision_score`, so please be cautious with
-prior results reported from that function.  A number of errors in the
-:class:`manifold.TSNE` implementation have been fixed, particularly in the
-default Barnes-Hut approximation.  :class:`semi_supervised.LabelSpreading` and
-:class:`semi_supervised.LabelPropagation` have had substantial fixes.
-LabelPropagation was previously broken. LabelSpreading should now correctly
-respect its alpha parameter.
-
-Changed models
---------------
-
-The following estimators and functions, when fit with the same data and
-parameters, may produce different models from the previous version. This often
-occurs due to changes in the modelling logic (bug fixes or enhancements), or in
-random sampling procedures.
-
-- :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix)
-- :class:`cross_decomposition.PLSRegression`
-  with ``scale=True`` (bug fix)
-- :class:`ensemble.GradientBoostingClassifier` and
-  :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix)
-- gradient boosting ``loss='quantile'`` (bug fix)
-- :class:`ensemble.IsolationForest` (bug fix)
-- :class:`feature_selection.SelectFdr` (bug fix)
-- :class:`linear_model.RANSACRegressor` (bug fix)
-- :class:`linear_model.LassoLars` (bug fix)
-- :class:`linear_model.LassoLarsIC` (bug fix)
-- :class:`manifold.TSNE` (bug fix)
-- :class:`neighbors.NearestCentroid` (bug fix)
-- :class:`semi_supervised.LabelSpreading` (bug fix)
-- :class:`semi_supervised.LabelPropagation` (bug fix)
-- tree based models where ``min_weight_fraction_leaf`` is used (enhancement)
-
-Details are listed in the changelog below.
-
-(While we are trying to better inform users by providing this information, we
-cannot assure that this list is complete.)
-
-Changelog
----------
-
-New features
-............
-
-Classifiers and regressors
-
-- Added :class:`multioutput.ClassifierChain` for multi-label
-  classification. By `Adam Kleczewski <adamklec>`_.
-
-- Added solver ``'saga'`` that implements the improved version of Stochastic
-  Average Gradient, in :class:`linear_model.LogisticRegression` and
-  :class:`linear_model.Ridge`. It allows the use of L1 penalty with
-  multinomial logistic loss, and behaves marginally better than 'sag'
-  during the first epochs of ridge and logistic regression.
-  :issue:`8446` by `Arthur Mensch`_.
-
-Other estimators
-
-- Added the :class:`neighbors.LocalOutlierFactor` class for anomaly
-  detection based on nearest neighbors.
-  :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_.
-
-- Added :class:`preprocessing.QuantileTransformer` class and
-  :func:`preprocessing.quantile_transform` function for features
-  normalization based on quantiles.
-  :issue:`8363` by :user:`Denis Engemann <dengemann>`,
-  :user:`Guillaume Lemaitre <glemaitre>`, `Olivier Grisel`_, `Raghav RV`_,
-  :user:`Thierry Guillemot <tguillemot>`, and `Gael Varoquaux`_.
-
-- The new solver ``'mu'`` implements a Multiplicate Update in
-  :class:`decomposition.NMF`, allowing the optimization of all
-  beta-divergences, including the Frobenius norm, the generalized
-  Kullback-Leibler divergence and the Itakura-Saito divergence.
-  :issue:`5295` by `Tom Dupre la Tour`_.
-
-Model selection and evaluation
-
-- :class:`model_selection.GridSearchCV` and
-  :class:`model_selection.RandomizedSearchCV` now support simultaneous
-  evaluation of multiple metrics. Refer to the
-  :ref:`multimetric_grid_search` section of the user guide for more
-  information. :issue:`7388` by `Raghav RV`_
-
-- Added the :func:`model_selection.cross_validate` which allows evaluation
-  of multiple metrics. This function returns a dict with more useful
-  information from cross-validation such as the train scores, fit times and
-  score times.
-  Refer to :ref:`multimetric_cross_validation` section of the userguide
-  for more information. :issue:`7388` by `Raghav RV`_
-
-- Added :func:`metrics.mean_squared_log_error`, which computes
-  the mean square error of the logarithmic transformation of targets,
-  particularly useful for targets with an exponential trend.
-  :issue:`7655` by :user:`Karan Desai <karandesai-96>`.
-
-- Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which
-  compute Discounted cumulative gain (DCG) and Normalized discounted
-  cumulative gain (NDCG).
-  :issue:`7739` by :user:`David Gasquez <davidgasquez>`.
-
-- Added the :class:`model_selection.RepeatedKFold` and
-  :class:`model_selection.RepeatedStratifiedKFold`.
-  :issue:`8120` by `Neeraj Gangwar`_.
-   - :class:`model_selection.GridSearchCV` and
-     :class:`model_selection.RandomizedSearchCV` now support simultaneous
-     evaluation of multiple metrics. Refer to the
-     :ref:`multimetric_grid_search` section of the user guide for more
-     information. :issue:`7388` by `Raghav RV`_
-
-   - Added the :func:`model_selection.cross_validate` which allows evaluation
-     of multiple metrics. This function returns a dict with more useful
-     information from cross-validation such as the train scores, fit times and
-     score times.
-     Refer to :ref:`multimetric_cross_validation` section of the userguide
-     for more information. :issue:`7388` by `Raghav RV`_
-
-   - Added :func:`metrics.mean_squared_log_error`, which computes
-     the mean square error of the logarithmic transformation of targets,
-     particularly useful for targets with an exponential trend.
-     :issue:`7655` by :user:`Karan Desai <karandesai-96>`.
-
-   - Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which
-     compute Discounted cumulative gain (DCG) and Normalized discounted
-     cumulative gain (NDCG).
-     :issue:`7739` by :user:`David Gasquez <davidgasquez>`.
-     By `Arthur Mensch`_.
-
-   - Added the :class:`svm.SVDD` class for novelty detection based on
-     soft minimal volume hypersphere around the sample data.
-     By `Ivan Nazarov`_.
-
-   - Added the :class:`model_selection.RepeatedKFold` and
-     :class:`model_selection.RepeatedStratifiedKFold`.
-     :issue:`8120` by `Neeraj Gangwar`_.
-
-Miscellaneous
-
-- Validation that input data contains no NaN or inf can now be suppressed
-  using :func:`config_context`, at your own risk. This will save on runtime,
-  and may be particularly useful for prediction time. :issue:`7548` by
-  `Joel Nothman`_.
-
-- Added a test to ensure parameter listing in docstrings match the
-  function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and
-  `Raghav RV`_.
-
-Enhancements
-............
-
-Trees and ensembles
-
-- The ``min_weight_fraction_leaf`` constraint in tree construction is now
-  more efficient, taking a fast path to declare a node a leaf if its weight
-  is less than 2 * the minimum. Note that the constructed tree will be
-  different from previous versions where ``min_weight_fraction_leaf`` is
-  used. :issue:`7441` by :user:`Nelson Liu <nelson-liu>`.
-
-- :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor`
-  now support sparse input for prediction.
-  :issue:`6101` by :user:`Ibraim Ganiev <olologin>`.
-
-- :class:`ensemble.VotingClassifier` now allows changing estimators by using
-  :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be
-  removed by setting it to ``None``.
-  :issue:`7674` by :user:`Yichuan Liu <yl565>`.
-
-- :func:`tree.export_graphviz` now shows configurable number of decimal
-  places. :issue:`8698` by :user:`Guillaume Lemaitre <glemaitre>`.
-
-- Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier`
-  to change output shape of `transform` method to 2 dimensional.
-  :issue:`7794` by :user:`Ibraim Ganiev <olologin>` and
-  :user:`Herilalaina Rakotoarison <herilalaina>`.
-
-Linear, kernelized and related models
-
-- :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`,
-  :class:`linear_model.PassiveAggressiveClassifier`,
-  :class:`linear_model.PassiveAggressiveRegressor` and
-  :class:`linear_model.Perceptron` now expose ``max_iter`` and
-  ``tol`` parameters, to handle convergence more precisely.
-  ``n_iter`` parameter is deprecated, and the fitted estimator exposes
-  a ``n_iter_`` attribute, with actual number of iterations before
-  convergence. :issue:`5036` by `Tom Dupre la Tour`_.
-
-- Added ``average`` parameter to perform weight averaging in
-  :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939`
-  by :user:`Andrea Esuli <aesuli>`.
-
-- :class:`linear_model.RANSACRegressor` no longer throws an error
-  when calling ``fit`` if no inliers are found in its first iteration.
-  Furthermore, causes of skipped iterations are tracked in newly added
-  attributes, ``n_skips_*``.
-  :issue:`7914` by :user:`Michael Horrell <mthorrell>`.
-
-- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
-  is a lot faster with ``return_std=True``. :issue:`8591` by
-  :user:`Hadrien Bertrand <hbertrand>`.
-
-- Added ``return_std`` to ``predict`` method of
-  :class:`linear_model.ARDRegression` and
-  :class:`linear_model.BayesianRidge`.
-  :issue:`7838` by :user:`Sergey Feldman <sergeyf>`.
-
-- Memory usage enhancements: Prevent cast from float32 to float64 in:
-  :class:`linear_model.MultiTaskElasticNet`;
-  :class:`linear_model.LogisticRegression` when using newton-cg solver; and
-  :class:`linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr
-  solvers. :issue:`8835`, :issue:`8061` by :user:`Joan Massich <massich>` and :user:`Nicolas
-  Cordier <ncordier>` and :user:`Thierry Guillemot <tguillemot>`.
-
-Other predictors
-
-- Custom metrics for the :mod:`neighbors` binary trees now have
-  fewer constraints: they must take two 1d-arrays and return a float.
-  :issue:`6288` by `Jake Vanderplas`_.
-
-- ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most
-  appropriate algorithm for all input types and metrics. :issue:`9145` by
-  :user:`Herilalaina Rakotoarison <herilalaina>` and :user:`Reddy Chinthala
-  <preddy5Pradyumna>`.
-
-Decomposition, manifold learning and clustering
-
-- :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans`
-  now use significantly less memory when assigning data points to their
-  nearest cluster center. :issue:`7721` by :user:`Jon Crall <Erotemic>`.
-
-- :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and
-  :class:`decomposition.TruncatedSVD` now expose the singular values
-  from the underlying SVD. They are stored in the attribute
-  ``singular_values_``, like in :class:`decomposition.IncrementalPCA`.
-  :issue:`7685` by :user:`Tommy Löfstedt <tomlof>`
-
-- :class:`decomposition.NMF` now faster when ``beta_loss=0``.
-  :issue:`9277` by :user:`hongkahjun`.
-
-- Memory improvements for method ``barnes_hut`` in :class:`manifold.TSNE`
-  :issue:`7089` by :user:`Thomas Moreau <tomMoral>` and `Olivier Grisel`_.
-
-- Optimization schedule improvements for Barnes-Hut :class:`manifold.TSNE`
-  so the results are closer to the one from the reference implementation
-  `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_ by :user:`Thomas
-  Moreau <tomMoral>` and `Olivier Grisel`_.
-
-- Memory usage enhancements: Prevent cast from float32 to float64 in
-  :class:`decomposition.PCA` and
-  :func:`decomposition.randomized_svd_low_rank`.
-  :issue:`9067` by `Raghav RV`_.
-
-Preprocessing and feature selection
-
-- Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel`
-  to enable selection of the norm order when ``coef_`` is more than 1D.
-  :issue:`6181` by :user:`Antoine Wendlinger <antoinewdg>`.
-
-- Added ability to use sparse matrices in :func:`feature_selection.f_regression`
-  with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune <acadiansith>`.
-
-- Small performance improvement to n-gram creation in
-  :mod:`feature_extraction.text` by binding methods for loops and
-  special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke <jtdoepke>`
-
-- Relax assumption on the data for the
-  :class:`kernel_approximation.SkewedChi2Sampler`. Since the Skewed-Chi2
-  kernel is defined on the open interval :math:`(-skewedness; +\infty)^d`,
-  the transform function should not check whether ``X < 0`` but whether ``X <
-  -self.skewedness``. :issue:`7573` by :user:`Romain Brault <RomainBrault>`.
-
-- Made default kernel parameters kernel-dependent in
-  :class:`kernel_approximation.Nystroem`.
-  :issue:`5229` by :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
-
-Model evaluation and meta-estimators
-
-- :class:`pipeline.Pipeline` is now able to cache transformers
-  within a pipeline by using the ``memory`` constructor parameter.
-  :issue:`7990` by :user:`Guillaume Lemaitre <glemaitre>`.
-
-- :class:`pipeline.Pipeline` steps can now be accessed as attributes of its
-  ``named_steps`` attribute. :issue:`8586` by :user:`Herilalaina
-  Rakotoarison <herilalaina>`.
-
-- Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`.
-  :issue:`7723` by :user:`Mikhail Korobov <kmike>`.
-
-- Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`.
-  A ``TypeError`` will be raised for any other kwargs. :issue:`8028`
-  by :user:`Alexander Booth <alexandercbooth>`.
-
-- :class:`model_selection.GridSearchCV`,
-  :class:`model_selection.RandomizedSearchCV` and
-  :func:`model_selection.cross_val_score` now allow estimators with callable
-  kernels which were previously prohibited.
-  :issue:`8005` by `Andreas Müller`_ .
-
-- :func:`model_selection.cross_val_predict` now returns output of the
-  correct shape for all values of the argument ``method``.
-  :issue:`7863` by :user:`Aman Dalmia <dalmia>`.
-
-- Added ``shuffle`` and ``random_state`` parameters to shuffle training
-  data before taking prefixes of it based on training sizes in
-  :func:`model_selection.learning_curve`.
-  :issue:`7506` by :user:`Narine Kokhlikyan <NarineK>`.
-
-- :class:`model_selection.StratifiedShuffleSplit` now works with multioutput
-  multiclass (or multilabel) data.  :issue:`9044` by `Vlad Niculae`_.
-
-- Speed improvements to :class:`model_selection.StratifiedShuffleSplit`.
-  :issue:`5991` by :user:`Arthur Mensch <arthurmensch>` and `Joel Nothman`_.
-
-- Add ``shuffle`` parameter to :func:`model_selection.train_test_split`.
-  :issue:`8845` by  :user:`themrmax <themrmax>`
-
-- :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier`
-  now support online learning using ``partial_fit``.
-  :issue: `8053` by :user:`Peng Yu <yupbank>`.
-
-- Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit`
-  :issue:`8282` by :user:`Aman Dalmia <dalmia>`.
-
-- More clustering metrics are now available through :func:`metrics.get_scorer`
-  and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_.
-
-- A scorer based on :func:`metrics.explained_variance_score` is also available.
-  :issue:`9259` by :user:`Hanmin Qin <qinhanmin2014>`.
-
-Metrics
-
-- :func:`metrics.matthews_corrcoef` now support multiclass classification.
-  :issue:`8094` by :user:`Jon Crall <Erotemic>`.
-
-- Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score`.
-  :issue:`8335` by :user:`Victor Poughon <vpoughon>`.
-
-Miscellaneous
-
-- :func:`utils.check_estimator` now attempts to ensure that methods
-  transform, predict, etc.  do not set attributes on the estimator.
-  :issue:`7533` by :user:`Ekaterina Krivich <kiote>`.
-
-- Added type checking to the ``accept_sparse`` parameter in
-  :mod:`utils.validation` methods. This parameter now accepts only boolean,
-  string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and
-  should be replaced by ``accept_sparse=False``.
-  :issue:`7880` by :user:`Josh Karnofsky <jkarno>`.
-
-- Make it possible to load a chunk of an svmlight formatted file by
-  passing a range of bytes to :func:`datasets.load_svmlight_file`.
-  :issue:`935` by :user:`Olivier Grisel <ogrisel>`.
-
-- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`
-  now accept non-finite features. :issue:`8931` by :user:`Attractadore`.
-
-Bug fixes
-.........
-
-Trees and ensembles
-
-- Fixed a memory leak in trees when using trees with ``criterion='mae'``.
-  :issue:`8002` by `Raghav RV`_.
-
-- Fixed a bug where :class:`ensemble.IsolationForest` uses an
-  an incorrect formula for the average path length
-  :issue:`8549` by `Peter Wang <https://github.com/PTRWang>`_.
-
-- Fixed a bug where :class:`ensemble.AdaBoostClassifier` throws
-  ``ZeroDivisionError`` while fitting data with single class labels.
-  :issue:`7501` by :user:`Dominik Krzeminski <dokato>`.
-
-- Fixed a bug in :class:`ensemble.GradientBoostingClassifier` and
-  :class:`ensemble.GradientBoostingRegressor` where a float being compared
-  to ``0.0`` using ``==`` caused a divide by zero error. :issue:`7970` by
-  :user:`He Chen <chenhe95>`.
-
-- Fix a bug where :class:`ensemble.GradientBoostingClassifier` and
-  :class:`ensemble.GradientBoostingRegressor` ignored the
-  ``min_impurity_split`` parameter.
-  :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
-
-- Fixed ``oob_score`` in :class:`ensemble.BaggingClassifier`.
-  :issue:`8936` by :user:`Michael Lewis <mlewis1729>`
-
-- Fixed excessive memory usage in prediction for random forests estimators.
-  :issue:`8672` by :user:`Mike Benfield <mikebenfield>`.
-
-- Fixed a bug where ``sample_weight`` as a list broke random forests in Python 2
-  :issue:`8068` by :user:`xor`.
-
-- Fixed a bug where :class:`ensemble.IsolationForest` fails when
-  ``max_features`` is less than 1.
-  :issue:`5732` by :user:`Ishank Gulati <IshankGulati>`.
-
-- Fix a bug where gradient boosting with ``loss='quantile'`` computed
-  negative errors for negative values of ``ytrue - ypred`` leading to wrong
-  values when calling ``__call__``.
-  :issue:`8087` by :user:`Alexis Mignon <AlexisMignon>`
-
-- Fix a bug where :class:`ensemble.VotingClassifier` raises an error
-  when a numpy array is passed in for weights. :issue:`7983` by
-  :user:`Vincent Pham <vincentpham1991>`.
-
-- Fixed a bug where :func:`tree.export_graphviz` raised an error
-  when the length of features_names does not match n_features in the decision
-  tree. :issue:`8512` by :user:`Li Li <aikinogard>`.
-
-Linear, kernelized and related models
-
-- Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until
-  ``max_iter`` if it finds a large inlier group early. :issue:`8251` by
-  :user:`aivision2020`.
-
-- Fixed a bug where :class:`naive_bayes.MultinomialNB` and
-  :class:`naive_bayes.BernoulliNB` failed when ``alpha=0``. :issue:`5814` by
-  :user:`Yichuan Liu <yl565>` and :user:`Herilalaina Rakotoarison
-  <herilalaina>`.
-
-- Fixed a bug where :class:`linear_model.LassoLars` does not give
-  the same result as the LassoLars implementation available
-  in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`.
-
-- Fixed a bug in :class:`linear_model.RandomizedLasso`,
-  :class:`linear_model.Lars`, :class:`linear_model.LassoLars`,
-  :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`,
-  where the parameter ``precompute`` was not used consistently across
-  classes, and some values proposed in the docstring could raise errors.
-  :issue:`5359` by `Tom Dupre la Tour`_.
-
-- Fix inconsistent results between :class:`linear_model.RidgeCV` and
-  :class:`linear_model.Ridge` when using ``normalize=True``. :issue:`9302`
-  by `Alexandre Gramfort`_.
-
-- Fix a bug where :func:`linear_model.LassoLars.fit` sometimes
-  left ``coef_`` as a list, rather than an ndarray.
-  :issue:`8160` by :user:`CJ Carey <perimosocordiae>`.
-
-- Fix :func:`linear_model.BayesianRidge.fit` to return
-  ridge parameter ``alpha_`` and ``lambda_`` consistent with calculated
-  coefficients ``coef_`` and ``intercept_``.
-  :issue:`8224` by :user:`Peter Gedeck <gedeck>`.
-
-- Fixed a bug in :class:`svm.OneClassSVM` where it returned floats instead of
-  integer classes. :issue:`8676` by :user:`Vathsala Achar <VathsalaAchar>`.
-
-- Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`.
-  :issue:`9022` by `Alexandre Gramfort`_ and :user:`Mehmet Basbug <mehmetbasbug>`.
-
-- Fixed a memory leak in our LibLinear implementation. :issue:`9024` by
-  :user:`Sergei Lebedev <superbobry>`
-
-- Fix bug where stratified CV splitters did not work with
-  :class:`linear_model.LassoCV`. :issue:`8973` by
-  :user:`Paulo Haddad <paulochf>`.
-
-- Fixed a bug in :class:`gaussian_process.GaussianProcessRegressor`
-  when the standard deviation and covariance predicted without fit
-  would fail with a unmeaningful error by default.
-  :issue:`6573` by :user:`Quazi Marufur Rahman <qmaruf>` and
-  `Manoj Kumar`_.
-
-Other predictors
-
-- Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement
-  ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced
-  papers. :issue:`9239`
-  by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
-  <musically-ut>`, and `Joel Nothman`_.
-
-Decomposition, manifold learning and clustering
-
-- Fixed the implementation of :class:`manifold.TSNE`:
-- ``early_exageration`` parameter had no effect and is now used for the
-  first 250 optimization iterations.
-- Fixed the ``AssertionError: Tree consistency failed`` exception
-  reported in :issue:`8992`.
-- Improve the learning schedule to match the one from the reference
-  implementation `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_.
-     by :user:`Thomas Moreau <tomMoral>` and `Olivier Grisel`_.
-
-- Fix a bug in :class:`decomposition.LatentDirichletAllocation`
-  where the ``perplexity`` method was returning incorrect results because
-  the ``transform`` method returns normalized document topic distributions
-  as of version 0.18. :issue:`7954` by :user:`Gary Foreman <garyForeman>`.
-
-- Fix output shape and bugs with n_jobs > 1 in
-  :class:`decomposition.SparseCoder` transform and
-  :func:`decomposition.sparse_encode`
-  for one-dimensional data and one component.
-  This also impacts the output shape of :class:`decomposition.DictionaryLearning`.
-  :issue:`8086` by `Andreas Müller`_.
-
-- Fixed the implementation of ``explained_variance_``
-  in :class:`decomposition.PCA`,
-  :class:`decomposition.RandomizedPCA` and
-  :class:`decomposition.IncrementalPCA`.
-  :issue:`9105` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
-
-- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`.
-  :issue:`9108` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
-
-- Fixed a bug where :class:`cluster.DBSCAN` gives incorrect
-  result when input is a precomputed sparse matrix with initial
-  rows all zero. :issue:`8306` by :user:`Akshay Gupta <Akshay0724>`
-
-- Fix a bug regarding fitting :class:`cluster.KMeans` with a sparse
-  array X and initial centroids, where X's means were unnecessarily being
-  subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky <jkarno>`.
-
-- Fixes to the input validation in :class:`covariance.EllipticEnvelope`.
-  :issue:`8086` by `Andreas Müller`_.
-
-- Fixed a bug in :class:`covariance.MinCovDet` where inputting data
-  that produced a singular covariance matrix would cause the helper method
-  ``_c_step`` to throw an exception.
-  :issue:`3367` by :user:`Jeremy Steward <ThatGeoGuy>`
-
-- Fixed a bug in :class:`manifold.TSNE` affecting convergence of the
-  gradient descent. :issue:`8768` by :user:`David DeTomaso <deto>`.
-
-- Fixed a bug in :class:`manifold.TSNE` where it stored the incorrect
-  ``kl_divergence_``. :issue:`6507` by :user:`Sebastian Saeger <ssaeger>`.
-
-- Fixed improper scaling in :class:`cross_decomposition.PLSRegression`
-  with ``scale=True``. :issue:`7819` by :user:`jayzed82 <jayzed82>`.
-
-- :class:`cluster.bicluster.SpectralCoclustering` and
-  :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms
-  with API by accepting ``y`` and returning the object.  :issue:`6126`,
-  :issue:`7814` by :user:`Laurent Direr <ldirer>` and :user:`Maniteja
-  Nandana <maniteja123>`.
-
-- Fix bug where :mod:`mixture` ``sample`` methods did not return as many
-  samples as requested. :issue:`7702` by :user:`Levi John Wolf <ljwolf>`.
-
-- Fixed the shrinkage implementation in :class:`neighbors.NearestCentroid`.
-  :issue:`9219` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
-
-Preprocessing and feature selection
-
-- For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True``
-  will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with
-  norm 'max' the norms returned will be the same as for dense matrices.
-  :issue:`7771` by `Ang Lu <https://github.com/luang008>`_.
-
-- Fix a bug where :class:`feature_selection.SelectFdr` did not
-  exactly implement Benjamini-Hochberg procedure. It formerly may have
-  selected fewer features than it should.
-  :issue:`7490` by :user:`Peng Meng <mpjlu>`.
-
-- Fixed a bug where :class:`linear_model.RandomizedLasso` and
-  :class:`linear_model.RandomizedLogisticRegression` breaks for
-  sparse input. :issue:`8259` by :user:`Aman Dalmia <dalmia>`.
-
-- Fix a bug where :class:`feature_extraction.FeatureHasher`
-  mandatorily applied a sparse random projection to the hashed features,
-  preventing the use of
-  :class:`feature_extraction.text.HashingVectorizer` in a
-  pipeline with  :class:`feature_extraction.text.TfidfTransformer`.
-  :issue:`7565` by :user:`Roman Yurchak <rth>`.
-
-- Fix a bug where :class:`feature_selection.mutual_info_regression` did not
-  correctly use ``n_neighbors``. :issue:`8181` by :user:`Guillaume Lemaitre
-  <glemaitre>`.
-
-Model evaluation and meta-estimators
-
-- Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform`
-  returns ``self.best_estimator_.transform()`` instead of
-  ``self.best_estimator_.inverse_transform()``.
-  :issue:`8344` by :user:`Akshay Gupta <Akshay0724>` and :user:`Rasmus Eriksson <MrMjauh>`.
-
-- Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`,
-  :class:`model_selection.RandomizedSearchCV`,  :class:`grid_search.GridSearchCV`,
-  and  :class:`grid_search.RandomizedSearchCV` that matches the ``classes_``
-  attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295`
-  by :user:`Alyssa Batula <abatula>`, :user:`Dylan Werner-Meier <unautre>`,
-  and :user:`Stephen Hoover <stephen-hoover>`.
-
-- Fixed a bug where :func:`model_selection.validation_curve`
-  reused the same estimator for each parameter value.
-  :issue:`7365` by :user:`Aleksandr Sandrovskii <Sundrique>`.
-
-- :func:`model_selection.permutation_test_score` now works with Pandas
-  types. :issue:`5697` by :user:`Stijn Tonk <equialgo>`.
-
-- Several fixes to input validation in
-  :class:`multiclass.OutputCodeClassifier`
-  :issue:`8086` by `Andreas Müller`_.
-
-- :class:`multiclass.OneVsOneClassifier`'s ``partial_fit`` now ensures all
-  classes are provided up-front. :issue:`6250` by
-  :user:`Asish Panda <kaichogami>`.
-
-- Fix :func:`multioutput.MultiOutputClassifier.predict_proba` to return a
-  list of 2d arrays, rather than a 3d array. In the case where different
-  target columns had different numbers of classes, a ``ValueError`` would be
-  raised on trying to stack matrices with different dimensions.
-  :issue:`8093` by :user:`Peter Bull <pjbull>`.
-
-- Cross validation now works with Pandas datatypes that that have a
-  read-only index. :issue:`9507` by `Loic Esteve`_.
-
-Metrics
-
-- :func:`metrics.average_precision_score` no longer linearly
-  interpolates between operating points, and instead weighs precisions
-  by the change in recall since the last operating point, as per the
-  `Wikipedia entry <http://en.wikipedia.org/wiki/Average_precision>`_.
-  (`#7356 <https://github.com/scikit-learn/scikit-learn/pull/7356>`_). By
-  :user:`Nick Dingwall <ndingwall>` and `Gael Varoquaux`_.
-
-- Fix a bug in :func:`metrics.classification._check_targets`
-  which would return ``'binary'`` if ``y_true`` and ``y_pred`` were
-  both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was
-  ``'multiclass'``. :issue:`8377` by `Loic Esteve`_.
-
-- Fixed an integer overflow bug in :func:`metrics.confusion_matrix` and
-  hence :func:`metrics.cohen_kappa_score`. :issue:`8354`, :issue:`7929`
-  by `Joel Nothman`_ and :user:`Jon Crall <Erotemic>`.
-
-- Fixed passing of ``gamma`` parameter to the ``chi2`` kernel in
-  :func:`metrics.pairwise.pairwise_kernels` :issue:`5211` by
-  :user:`Nick Rhinehart <nrhine1>`,
-  :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
-
-Miscellaneous
-
-- Fixed a bug when :func:`datasets.make_classification` fails
-  when generating more than 30 features. :issue:`8159` by
-  :user:`Herilalaina Rakotoarison <herilalaina>`.
-
-- Fixed a bug where :func:`datasets.make_moons` gives an
-  incorrect result when ``n_samples`` is odd.
-  :issue:`8198` by :user:`Josh Levy <levy5674>`.
-
-- Some ``fetch_`` functions in :mod:`datasets` were ignoring the
-  ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers <rgommers>`.
-
-- Fix estimators to accept a ``sample_weight`` parameter of type
-  ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
-  `Kathleen Chen`_.
-
-- Fix a bug in cases where ``numpy.cumsum`` may be numerically unstable,
-  raising an exception if instability is identified. :issue:`7376` and
-  :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`.
-
-- Fix a bug where :meth:`base.BaseEstimator.__getstate__`
-  obstructed pickling customizations of child-classes, when used in a
-  multiple inheritance context.
-  :issue:`8316` by :user:`Holger Peters <HolgerPeters>`.
-
-- Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in
-  documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` by
-  :user:`Oscar Najera <Titan-C>`
-
-- Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99`.
-  :issue:`9289` by `Loic Esteve`_.
-
-- Fix dataset loaders using Python 3 version of makedirs to also work in
-  Python 2. :issue:`9284` by :user:`Sebastin Santy <SebastinSanty>`.
-
-- Several minor issues were fixed with thanks to the alerts of
-  [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie <jhelie>`,
-  among others.
-
-API changes summary
--------------------
-
-Trees and ensembles
-
-- Gradient boosting base models are no longer estimators. By `Andreas Müller`_.
-
-- All tree based estimators now accept a ``min_impurity_decrease``
-  parameter in lieu of the ``min_impurity_split``, which is now deprecated.
-  The ``min_impurity_decrease`` helps stop splitting the nodes in which
-  the weighted impurity decrease from splitting is no longer alteast
-  ``min_impurity_decrease``.  :issue:`8449` by `Raghav RV`_.
-
-Linear, kernelized and related models
-
-- ``n_iter`` parameter is deprecated in :class:`linear_model.SGDClassifier`,
-  :class:`linear_model.SGDRegressor`,
-  :class:`linear_model.PassiveAggressiveClassifier`,
-  :class:`linear_model.PassiveAggressiveRegressor` and
-  :class:`linear_model.Perceptron`. By `Tom Dupre la Tour`_.
-
-Other predictors
-
-- :class:`neighbors.LSHForest` has been deprecated and will be
-  removed in 0.21 due to poor performance.
-  :issue:`9078` by :user:`Laurent Direr <ldirer>`.
-
-- :class:`neighbors.NearestCentroid` no longer purports to support
-  ``metric='precomputed'`` which now raises an error. :issue:`8515` by
-  :user:`Sergul Aydore <sergulaydore>`.
-
-- The ``alpha`` parameter of :class:`semi_supervised.LabelPropagation` now
-  has no effect and is deprecated to be removed in 0.21. :issue:`9239`
-  by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
-  <musically-ut>`, and `Joel Nothman`_.
-
-Decomposition, manifold learning and clustering
-
-- Deprecate the ``doc_topic_distr`` argument of the ``perplexity`` method
-  in :class:`decomposition.LatentDirichletAllocation` because the
-  user no longer has access to the unnormalized document topic distribution
-  needed for the perplexity calculation. :issue:`7954` by
-  :user:`Gary Foreman <garyForeman>`.
-
-- The ``n_topics`` parameter of :class:`decomposition.LatentDirichletAllocation`
-  has been renamed to ``n_components`` and will be removed in version 0.21.
-  :issue:`8922` by :user:`Attractadore`.
-
-- :meth:`decomposition.SparsePCA.transform`'s ``ridge_alpha`` parameter is
-  deprecated in preference for class parameter.
-  :issue:`8137` by :user:`Naoya Kanai <naoyak>`.
-
-- :class:`cluster.DBSCAN` now has a ``metric_params`` parameter.
-  :issue:`8139` by :user:`Naoya Kanai <naoyak>`.
-
-Preprocessing and feature selection
-
-- :class:`feature_selection.SelectFromModel` now has a ``partial_fit``
-  method only if the underlying estimator does. By `Andreas Müller`_.
-
-- :class:`feature_selection.SelectFromModel` now validates the ``threshold``
-  parameter and sets the ``threshold_`` attribute during the call to
-  ``fit``, and no longer during the call to ``transform```. By `Andreas
-  Müller`_.
-
-- The ``non_negative`` parameter in :class:`feature_extraction.FeatureHasher`
-  has been deprecated, and replaced with a more principled alternative,
-  ``alternate_sign``.
-  :issue:`7565` by :user:`Roman Yurchak <rth>`.
-
-- :class:`linear_model.RandomizedLogisticRegression`,
-  and :class:`linear_model.RandomizedLasso` have been deprecated and will
-  be removed in version 0.21.
-  :issue:`8995` by :user:`Ramana.S <sentient07>`.
-
-Model evaluation and meta-estimators
-
-- Deprecate the ``fit_params`` constructor input to the
-  :class:`model_selection.GridSearchCV` and
-  :class:`model_selection.RandomizedSearchCV` in favor
-  of passing keyword parameters to the ``fit`` methods
-  of those classes. Data-dependent parameters needed for model
-  training should be passed as keyword arguments to ``fit``,
-  and conforming to this convention will allow the hyperparameter
-  selection classes to be used with tools such as
-  :func:`model_selection.cross_val_predict`.
-  :issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.
-
-- In version 0.21, the default behavior of splitters that use the
-  ``test_size`` and ``train_size`` parameter will change, such that
-  specifying ``train_size`` alone will cause ``test_size`` to be the
-  remainder. :issue:`7459` by :user:`Nelson Liu <nelson-liu>`.
-
-- :class:`multiclass.OneVsRestClassifier` now has ``partial_fit``,
-  ``decision_function`` and ``predict_proba`` methods only when the
-  underlying estimator does.  :issue:`7812` by `Andreas Müller`_ and
-  :user:`Mikhail Korobov <kmike>`.
-
-- :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method
-  only if the underlying estimator does.  By `Andreas Müller`_.
-
-- The ``decision_function`` output shape for binary classification in
-  :class:`multiclass.OneVsRestClassifier` and
-  :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform
-  to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_.
-
-- The :func:`multioutput.MultiOutputClassifier.predict_proba`
-  function used to return a 3d array (``n_samples``, ``n_classes``,
-  ``n_outputs``). In the case where different target columns had different
-  numbers of classes, a ``ValueError`` would be raised on trying to stack
-  matrices with different dimensions. This function now returns a list of
-  arrays where the length of the list is ``n_outputs``, and each array is
-  (``n_samples``, ``n_classes``) for that particular output.
-  :issue:`8093` by :user:`Peter Bull <pjbull>`.
-
-- Replace attribute ``named_steps`` ``dict`` to :class:`utils.Bunch`
-  in :class:`pipeline.Pipeline` to enable tab completion in interactive
-  environment. In the case conflict value on ``named_steps`` and ``dict``
-  attribute, ``dict`` behavior will be prioritized.
-  :issue:`8481` by :user:`Herilalaina Rakotoarison <herilalaina>`.
-
-Miscellaneous
-
-- Deprecate the ``y`` parameter in ``transform`` and ``inverse_transform``.
-  The method  should not accept ``y`` parameter, as it's used at the prediction time.
-  :issue:`8174` by :user:`Tahar Zanouda <tzano>`, `Alexandre Gramfort`_
-  and `Raghav RV`_.
-
-- SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions
-  for scikit-learn. The following backported functions in
-  :mod:`utils` have been removed or deprecated accordingly.
-  :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai <naoyak>`
-
-- The ``store_covariances`` and ``covariances_`` parameters of
-  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`
-  has been renamed to ``store_covariance`` and ``covariance_`` to be
-  consistent with the corresponding parameter names of the
-  :class:`discriminant_analysis.LinearDiscriminantAnalysis`. They will be
-  removed in version 0.21. :issue:`7998` by :user:`Jiacheng <mrbeann>`
-
-  Removed in 0.19:
-
-  - ``utils.fixes.argpartition``
-  - ``utils.fixes.array_equal``
-  - ``utils.fixes.astype``
-  - ``utils.fixes.bincount``
-  - ``utils.fixes.expit``
-  - ``utils.fixes.frombuffer_empty``
-  - ``utils.fixes.in1d``
-  - ``utils.fixes.norm``
-  - ``utils.fixes.rankdata``
-  - ``utils.fixes.safe_copy``
-
-  Deprecated in 0.19, to be removed in 0.21:
-
-  - ``utils.arpack.eigs``
-  - ``utils.arpack.eigsh``
-  - ``utils.arpack.svds``
-  - ``utils.extmath.fast_dot``
-  - ``utils.extmath.logsumexp``
-  - ``utils.extmath.norm``
-  - ``utils.extmath.pinvh``
-  - ``utils.graph.graph_laplacian``
-  - ``utils.random.choice``
-  - ``utils.sparsetools.connected_components``
-  - ``utils.stats.rankdata``
-
-- Estimators with both methods ``decision_function`` and ``predict_proba``
-  are now required to have a monotonic relation between them. The
-  method ``check_decision_proba_consistency`` has been added in
-  **utils.estimator_checks** to check their consistency.
-  :issue:`7578` by :user:`Shubham Bhardwaj <shubham0704>`
-
-- All checks in ``utils.estimator_checks``, in particular
-  :func:`utils.estimator_checks.check_estimator` now accept estimator
-  instances. Most other checks do not accept
-  estimator classes any more. :issue:`9019` by `Andreas Müller`_.
-
-- Ensure that estimators' attributes ending with ``_`` are not set
-  in the constructor but only in the ``fit`` method. Most notably,
-  ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`)
-  now only have ``self.estimators_`` available after ``fit``.
-  :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_.
-
-
-Code and Documentation Contributors
------------------------------------
-
-Thanks to everyone who has contributed to the maintenance and improvement of the
-project since version 0.18, including:
-
-Joel Nothman, Loic Esteve, Andreas Mueller, Guillaume Lemaitre, Olivier Grisel,
-Hanmin Qin, Raghav RV, Alexandre Gramfort, themrmax, Aman Dalmia, Gael
-Varoquaux, Naoya Kanai, Tom Dupré la Tour, Rishikesh, Nelson Liu, Taehoon Lee,
-Nelle Varoquaux, Aashil, Mikhail Korobov, Sebastin Santy, Joan Massich, Roman
-Yurchak, RAKOTOARISON Herilalaina, Thierry Guillemot, Alexandre Abadie, Carol
-Willing, Balakumaran Manoharan, Josh Karnofsky, Vlad Niculae, Utkarsh Upadhyay,
-Dmitry Petrov, Minghui Liu, Srivatsan, Vincent Pham, Albert Thomas, Jake
-VanderPlas, Attractadore, JC Liu, alexandercbooth, chkoar, Óscar Nájera,
-Aarshay Jain, Kyle Gilliam, Ramana Subramanyam, CJ Carey, Clement Joudet, David
-Robles, He Chen, Joris Van den Bossche, Karan Desai, Katie Luangkote, Leland
-McInnes, Maniteja Nandana, Michele Lacchia, Sergei Lebedev, Shubham Bhardwaj,
-akshay0724, omtcyfz, rickiepark, waterponey, Vathsala Achar, jbDelafosse, Ralf
-Gommers, Ekaterina Krivich, Vivek Kumar, Ishank Gulati, Dave Elliott, ldirer,
-Reiichiro Nakano, Levi John Wolf, Mathieu Blondel, Sid Kapur, Dougal J.
-Sutherland, midinas, mikebenfield, Sourav Singh, Aseem Bansal, Ibraim Ganiev,
-Stephen Hoover, AishwaryaRK, Steven C. Howell, Gary Foreman, Neeraj Gangwar,
-Tahar, Jon Crall, dokato, Kathy Chen, ferria, Thomas Moreau, Charlie Brummitt,
-Nicolas Goix, Adam Kleczewski, Sam Shleifer, Nikita Singh, Basil Beirouti,
-Giorgio Patrini, Manoj Kumar, Rafael Possas, James Bourbeau, James A. Bednar,
-Janine Harper, Jaye, Jean Helie, Jeremy Steward, Artsiom, John Wei, Jonathan
-LIgo, Jonathan Rahn, seanpwilliams, Arthur Mensch, Josh Levy, Julian Kuhlmann,
-Julien Aubert, Jörn Hees, Kai, shivamgargsya, Kat Hempstalk, Kaushik
-Lakshmikanth, Kennedy, Kenneth Lyons, Kenneth Myers, Kevin Yap, Kirill Bobyrev,
-Konstantin Podshumok, Arthur Imbert, Lee Murray, toastedcornflakes, Lera, Li
-Li, Arthur Douillard, Mainak Jas, tobycheese, Manraj Singh, Manvendra Singh,
-Marc Meketon, MarcoFalke, Matthew Brett, Matthias Gilch, Mehul Ahuja, Melanie
-Goetz, Meng, Peng, Michael Dezube, Michal Baumgartner, vibrantabhi19, Artem
-Golubin, Milen Paskov, Antonin Carette, Morikko, MrMjauh, NALEPA Emmanuel,
-Namiya, Antoine Wendlinger, Narine Kokhlikyan, NarineK, Nate Guerin, Angus
-Williams, Ang Lu, Nicole Vavrova, Nitish Pandey, Okhlopkov Daniil Olegovich,
-Andy Craze, Om Prakash, Parminder Singh, Patrick Carlson, Patrick Pei, Paul
-Ganssle, Paulo Haddad, Paweł Lorek, Peng Yu, Pete Bachant, Peter Bull, Peter
-Csizsek, Peter Wang, Pieter Arthur de Jong, Ping-Yao, Chang, Preston Parry,
-Puneet Mathur, Quentin Hibon, Andrew Smith, Andrew Jackson, 1kastner, Rameshwar
-Bhaskaran, Rebecca Bilbro, Remi Rampin, Andrea Esuli, Rob Hall, Robert
-Bradshaw, Romain Brault, Aman Pratik, Ruifeng Zheng, Russell Smith, Sachin
-Agarwal, Sailesh Choyal, Samson Tan, Samuël Weber, Sarah Brown, Sebastian
-Pölsterl, Sebastian Raschka, Sebastian Saeger, Alyssa Batula, Abhyuday Pratap
-Singh, Sergey Feldman, Sergul Aydore, Sharan Yalburgi, willduan, Siddharth
-Gupta, Sri Krishna, Almer, Stijn Tonk, Allen Riddell, Theofilos Papapanagiotou,
-Alison, Alexis Mignon, Tommy Boucher, Tommy Löfstedt, Toshihiro Kamishima,
-Tyler Folkman, Tyler Lanigan, Alexander Junge, Varun Shenoy, Victor Poughon,
-Vilhelm von Ehrenheim, Aleksandr Sandrovskii, Alan Yee, Vlasios Vasileiou,
-Warut Vijitbenjaronk, Yang Zhang, Yaroslav Halchenko, Yichuan Liu, Yuichi
-Fujikawa, affanv14, aivision2020, xor, andreh7, brady salz, campustrampus,
-Agamemnon Krasoulis, ditenberg, elena-sharova, filipj8, fukatani, gedeck,
-guiniol, guoci, hakaa1, hongkahjun, i-am-xhy, jakirkham, jaroslaw-weber,
-jayzed82, jeroko, jmontoyam, jonathan.striebel, josephsalmon, jschendel,
-leereeves, martin-hahn, mathurinm, mehak-sachdeva, mlewis1729, mlliou112,
-mthorrell, ndingwall, nuffe, yangarbiter, plagree, pldtc325, Breno Freitas,
-Brett Olsen, Brian A. Alfano, Brian Burns, polmauri, Brandon Carter, Charlton
-Austin, Chayant T15h, Chinmaya Pancholi, Christian Danielsen, Chung Yen,
-Chyi-Kwei Yau, pravarmahajan, DOHMATOB Elvis, Daniel LeJeune, Daniel Hnyk,
-Darius Morawiec, David DeTomaso, David Gasquez, David Haberthür, David
-Heryanto, David Kirkby, David Nicholson, rashchedrin, Deborah Gertrude Digges,
-Denis Engemann, Devansh D, Dickson, Bob Baxley, Don86, E. Lynch-Klarup, Ed
-Rogers, Elizabeth Ferriss, Ellen-Co2, Fabian Egli, Fang-Chieh Chou, Bing Tian
-Dai, Greg Stupp, Grzegorz Szpak, Bertrand Thirion, Hadrien Bertrand, Harizo
-Rajaona, zxcvbnius, Henry Lin, Holger Peters, Icyblade Dai, Igor
-Andriushchenko, Ilya, Isaac Laughlin, Iván Vallés, Aurélien Bellet, JPFrancoia,
-Jacob Schreiber, Asish Mahapatra
-
-.. _changes_0_18_2:
-
-Version 0.18.2
-==============
-
-**June 20, 2017**
-
-.. topic:: Last release with Python 2.6 support
-
-    Scikit-learn 0.18 is the last major release of scikit-learn to support Python 2.6.
-    Later versions of scikit-learn will require Python 2.7 or above.
-
-
-Changelog
----------
-
-- Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by
-  `Loic Esteve`_.
-
-- Minor compatibility changes in the examples :issue:`9010` :issue:`8040`
-  :issue:`9149`.
-
-Code Contributors
------------------
-Aman Dalmia, Loic Esteve, Nate Guerin, Sergei Lebedev
-
-
-.. _changes_0_18_1:
-
-Version 0.18.1
-==============
-
-**November 11, 2016**
-
-Changelog
----------
-
-Enhancements
-............
-
-- Improved ``sample_without_replacement`` speed by utilizing
-  numpy.random.permutation for most cases. As a result,
-  samples may differ in this release for a fixed random state.
-  Affected estimators:
-
-  - :class:`ensemble.BaggingClassifier`
-  - :class:`ensemble.BaggingRegressor`
-  - :class:`linear_model.RANSACRegressor`
-  - :class:`model_selection.RandomizedSearchCV`
-  - :class:`random_projection.SparseRandomProjection`
-
-  This also affects the :meth:`datasets.make_classification`
-  method.
-
-Bug fixes
-.........
-
-- Fix issue where ``min_grad_norm`` and ``n_iter_without_progress``
-  parameters were not being utilised by :class:`manifold.TSNE`.
-  :issue:`6497` by :user:`Sebastian Säger <ssaeger>`
-
-- Fix bug for svm's decision values when ``decision_function_shape``
-  is ``ovr`` in :class:`svm.SVC`.
-  :class:`svm.SVC`'s decision_function was incorrect from versions
-  0.17.0 through 0.18.0.
-  :issue:`7724` by `Bing Tian Dai`_
-
-- Attribute ``explained_variance_ratio`` of
-  :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated
-  with SVD and Eigen solver are now of the same length. :issue:`7632`
-  by :user:`JPFrancoia <JPFrancoia>`
-
-- Fixes issue in :ref:`univariate_feature_selection` where score
-  functions were not accepting multi-label targets. :issue:`7676`
-  by :user:`Mohammed Affan <affanv14>`
-
-- Fixed setting parameters when calling ``fit`` multiple times on
-  :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_
-
-- Fixes issue in ``partial_fit`` method of
-  :class:`multiclass.OneVsRestClassifier` when number of classes used in
-  ``partial_fit`` was less than the total number of classes in the
-  data. :issue:`7786` by `Srivatsan Ramesh`_
-
-- Fixes issue in :class:`calibration.CalibratedClassifierCV` where
-  the sum of probabilities of each class for a data was not 1, and
-  ``CalibratedClassifierCV`` now handles the case where the training set
-  has less number of classes than the total data. :issue:`7799` by
-  `Srivatsan Ramesh`_
-
-- Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not
-  exactly implement Benjamini-Hochberg procedure. It formerly may have
-  selected fewer features than it should.
-  :issue:`7490` by :user:`Peng Meng <mpjlu>`.
-
-- :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles
-  integer inputs. :issue:`6282` by `Jake Vanderplas`_.
-
-- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
-  regressors now assumes uniform sample weights by default if the
-  ``sample_weight`` argument is not passed to the ``fit`` function.
-  Previously, the parameter was silently ignored. :issue:`7301`
-  by :user:`Nelson Liu <nelson-liu>`.
-
-- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
-  `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_
-
-- Tree splitting criterion classes' cloning/pickling is now memory safe
-  :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
-
-- Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_``
-  attribute in `transform()`. :issue:`7553` by :user:`Ekaterina
-  Krivich <kiote>`.
-
-- :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles
-  string labels. :issue:`5874` by `Raghav RV`_.
-
-- Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised
-  an error when ``stratify`` is a list of string labels. :issue:`7593` by
-  `Raghav RV`_.
-
-- Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and
-  :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable
-  because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by
-  `Raghav RV`_.
-
-- All cross-validation utilities in :mod:`sklearn.model_selection` now
-  permit one time cross-validation splitters for the ``cv`` parameter. Also
-  non-deterministic cross-validation splitters (where multiple calls to
-  ``split`` produce dissimilar splits) can be used as ``cv`` parameter.
-  The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each
-  parameter setting on the split produced by the first ``split`` call
-  to the cross-validation splitter.  :issue:`7660` by `Raghav RV`_.
-
-- Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform`
-  returned an invalid CSR matrix.
-  :issue:`7750` by :user:`CJ Carey <perimosocordiae>`.
-
-- Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a
-  small negative distance. :issue:`7732` by :user:`Artsion <asanakoy>`.
-
-API changes summary
--------------------
-
-Trees and forests
-
-- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
-  regressors now assumes uniform sample weights by default if the
-  ``sample_weight`` argument is not passed to the ``fit`` function.
-  Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson
-  Liu <nelson-liu>`.
-
-- Tree splitting criterion classes' cloning/pickling is now memory safe.
-  :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
-
-
-Linear, kernelized and related models
-
-- Length of ``explained_variance_ratio`` of
-  :class:`discriminant_analysis.LinearDiscriminantAnalysis`
-  changed for both Eigen and SVD solvers. The attribute has now a length
-  of min(n_components, n_classes - 1). :issue:`7632`
-  by :user:`JPFrancoia <JPFrancoia>`
-
-- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
-  ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_
-
-.. _changes_0_18:
-
-Version 0.18
-============
-
-**September 28, 2016**
-
-.. topic:: Last release with Python 2.6 support
-
-    Scikit-learn 0.18 will be the last version of scikit-learn to support Python 2.6.
-    Later versions of scikit-learn will require Python 2.7 or above.
-
-.. _model_selection_changes:
-
-Model Selection Enhancements and API Changes
---------------------------------------------
-
-- **The model_selection module**
-
-  The new module :mod:`sklearn.model_selection`, which groups together the
-  functionalities of formerly :mod:`sklearn.cross_validation`,
-  :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new
-  possibilities such as nested cross-validation and better manipulation of
-  parameter searches with Pandas.
-
-  Many things will stay the same but there are some key differences. Read
-  below to know more about the changes.
-
-- **Data-independent CV splitters enabling nested cross-validation**
-
-  The new cross-validation splitters, defined in the
-  :mod:`sklearn.model_selection`, are no longer initialized with any
-  data-dependent parameters such as ``y``. Instead they expose a
-  :func:`split` method that takes in the data and yields a generator for the
-  different splits.
-
-  This change makes it possible to use the cross-validation splitters to
-  perform nested cross-validation, facilitated by
-  :class:`model_selection.GridSearchCV` and
-  :class:`model_selection.RandomizedSearchCV` utilities.
-
-- **The enhanced cv_results_ attribute**
-
-  The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV`
-  and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the
-  ``grid_scores_`` attribute is a dict of 1D arrays with elements in each
-  array corresponding to the parameter settings (i.e. search candidates).
-
-  The ``cv_results_`` dict can be easily imported into ``pandas`` as a
-  ``DataFrame`` for exploring the search results.
-
-  The ``cv_results_`` arrays include scores for each cross-validation split
-  (with keys such as ``'split0_test_score'``), as well as their mean
-  (``'mean_test_score'``) and standard deviation (``'std_test_score'``).
-
-  The ranks for the search candidates (based on their mean
-  cross-validation score) is available at ``cv_results_['rank_test_score']``.
-
-  The parameter values for each parameter is stored separately as numpy
-  masked object arrays. The value, for that search candidate, is masked if
-  the corresponding parameter is not applicable. Additionally a list of all
-  the parameter dicts are stored at ``cv_results_['params']``.
-
-- **Parameters n_folds and n_iter renamed to n_splits**
-
-  Some parameter names have changed:
-  The ``n_folds`` parameter in new :class:`model_selection.KFold`,
-  :class:`model_selection.GroupKFold` (see below for the name change),
-  and :class:`model_selection.StratifiedKFold` is now renamed to
-  ``n_splits``. The ``n_iter`` parameter in
-  :class:`model_selection.ShuffleSplit`, the new class
-  :class:`model_selection.GroupShuffleSplit` and
-  :class:`model_selection.StratifiedShuffleSplit` is now renamed to
-  ``n_splits``.
-
-- **Rename of splitter classes which accepts group labels along with data**
-
-  The cross-validation splitters ``LabelKFold``,
-  ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have
-  been renamed to :class:`model_selection.GroupKFold`,
-  :class:`model_selection.GroupShuffleSplit`,
-  :class:`model_selection.LeaveOneGroupOut` and
-  :class:`model_selection.LeavePGroupsOut` respectively.
-
-  Note the change from singular to plural form in
-  :class:`model_selection.LeavePGroupsOut`.
-
-- **Fit parameter labels renamed to groups**
-
-  The ``labels`` parameter in the :func:`split` method of the newly renamed
-  splitters :class:`model_selection.GroupKFold`,
-  :class:`model_selection.LeaveOneGroupOut`,
-  :class:`model_selection.LeavePGroupsOut`,
-  :class:`model_selection.GroupShuffleSplit` is renamed to ``groups``
-  following the new nomenclature of their class names.
-
-- **Parameter n_labels renamed to n_groups**
-
-  The parameter ``n_labels`` in the newly renamed
-  :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``.
-
-- Training scores and Timing information
-
-  ``cv_results_`` also includes the training scores for each
-  cross-validation split (with keys such as ``'split0_train_score'``), as
-  well as their mean (``'mean_train_score'``) and standard deviation
-  (``'std_train_score'``). To avoid the cost of evaluating training score,
-  set ``return_train_score=False``.
-
-  Additionally the mean and standard deviation of the times taken to split,
-  train and score the model across all the cross-validation splits is
-  available at the key ``'mean_time'`` and ``'std_time'`` respectively.
-
-Changelog
----------
-
-New features
-............
-
-Classifiers and Regressors
-
-- The Gaussian Process module has been reimplemented and now offers classification
-  and regression estimators through :class:`gaussian_process.GaussianProcessClassifier`
-  and  :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new
-  implementation supports kernel engineering, gradient-based hyperparameter optimization or
-  sampling of functions from GP prior and GP posterior. Extensive documentation and
-  examples are provided. By `Jan Hendrik Metzen`_.
-
-- Added new supervised learning algorithm: :ref:`Multi-layer Perceptron <multilayer_perceptron>`
-  :issue:`3204` by :user:`Issam H. Laradji <IssamLaradji>`
-
-- Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers.
-  :issue:`5291` by `Manoj Kumar`_.
-
-- Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It
-  converts single output regressors to multi-output regressors by fitting
-  one regressor per output. By :user:`Tim Head <betatim>`.
-
-Other estimators
-
-- New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture`
-  replace former mixture models, employing faster inference
-  for sounder results. :issue:`7295` by :user:`Wei Xue <xuewei4d>` and
-  :user:`Thierry Guillemot <tguillemot>`.
-
-- Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA`
-  and it is available calling with parameter ``svd_solver='randomized'``.
-  The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old
-  behavior of PCA is recovered by ``svd_solver='full'``. An additional solver
-  calls ``arpack`` and performs truncated (non-randomized) SVD. By default,
-  the best solver is selected depending on the size of the input and the
-  number of components requested. :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
-
-- Added two functions for mutual information estimation:
-  :func:`feature_selection.mutual_info_classif` and
-  :func:`feature_selection.mutual_info_regression`. These functions can be
-  used in :class:`feature_selection.SelectKBest` and
-  :class:`feature_selection.SelectPercentile` as score functions.
-  By :user:`Andrea Bravi <AndreaBravi>` and :user:`Nikolay Mayorov <nmayorov>`.
-
-- Added the :class:`ensemble.IsolationForest` class for anomaly detection based on
-  random forests. By `Nicolas Goix`_.
-
-- Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing
-  Elkan's fast K-Means algorithm. By `Andreas Müller`_.
-
-Model selection and evaluation
-
-- Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows
-  Index which measures the similarity of two clusterings of a set of points
-  By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
-
-- Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski
-  and Harabaz score to evaluate the resulting clustering of a set of points.
-  By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
-
-- Added new cross-validation splitter
-  :class:`model_selection.TimeSeriesSplit` to handle time series data.
-  :issue:`6586` by :user:`YenChen Lin <yenchenlin>`
-
-- The cross-validation iterators are replaced by cross-validation splitters
-  available from :mod:`sklearn.model_selection`, allowing for nested
-  cross-validation. See :ref:`model_selection_changes` for more information.
-  :issue:`4294` by `Raghav RV`_.
-
-Enhancements
-............
-
-Trees and ensembles
-
-- Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`,
-  the mean absolute error. This criterion can also be used in
-  :class:`ensemble.ExtraTreesRegressor`,
-  :class:`ensemble.RandomForestRegressor`, and the gradient boosting
-  estimators. :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
-
-- Added weighted impurity-based early stopping criterion for decision tree
-  growth. :issue:`6954` by :user:`Nelson Liu <nelson-liu>`
-
-- The random forest, extra tree and decision tree estimators now has a
-  method ``decision_path`` which returns the decision path of samples in
-  the tree. By `Arnaud Joly`_.
-
-- A new example has been added unveiling the decision tree structure.
-  By `Arnaud Joly`_.
-
-- Random forest, extra trees, decision trees and gradient boosting estimator
-  accept the parameter ``min_samples_split`` and ``min_samples_leaf``
-  provided as a percentage of the training samples. By :user:`yelite <yelite>` and `Arnaud Joly`_.
-
-- Gradient boosting estimators accept the parameter ``criterion`` to specify
-  to splitting criterion used in built decision trees.
-  :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
-
-- The memory footprint is reduced (sometimes greatly) for
-  :class:`ensemble.bagging.BaseBagging` and classes that inherit from it,
-  i.e, :class:`ensemble.BaggingClassifier`,
-  :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`,
-  by dynamically generating attribute ``estimators_samples_`` only when it is
-  needed. By :user:`David Staub <staubda>`.
-
-- Added ``n_jobs`` and ``sample_weight`` parameters for
-  :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel.
-  :issue:`5805` by :user:`Ibraim Ganiev <olologin>`.
-
-Linear, kernelized and related models
-
-- In :class:`linear_model.LogisticRegression`, the SAG solver is now
-  available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_.
-
-- :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and
-  :class:`svm.LinearSVR` now support ``sample_weight``.
-  By :user:`Imaculate <Imaculate>`.
-
-- Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the
-  error on the samples for every trial. By `Manoj Kumar`_.
-
-- Prediction of out-of-sample events with Isotonic Regression
-  (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic
-  data). By :user:`Jonathan Arfa <jarfa>`.
-
-- Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid
-  `O(n^2)` behavior in pathological cases, and is also generally faster
-  (:issue:`#6691`). By `Antony Lee`_.
-
-- :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors
-  through the parameter ``priors``. By :user:`Guillaume Lemaitre <glemaitre>`.
-
-- :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso`
-  now works with ``np.float32`` input data without converting it
-  into ``np.float64``. This allows to reduce the memory
-  consumption. :issue:`6913` by :user:`YenChen Lin <yenchenlin>`.
-
-- :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading`
-  now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``.
-  :issue:`5762` by :user:`Utkarsh Upadhyay <musically-ut>`.
-
-Decomposition, manifold learning and clustering
-
-- Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute
-  data matrix of original shape. By :user:`Anish Shah <AnishShah>`.
-
-- :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works
-  with ``np.float32`` and ``np.float64`` input data without converting it.
-  This allows to reduce the memory consumption by using ``np.float32``.
-  :issue:`6846` by :user:`Sebastian Säger <ssaeger>` and
-  :user:`YenChen Lin <yenchenlin>`.
-
-Preprocessing and feature selection
-
-- :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter.
-  :issue:`5929` by :user:`Konstantin Podshumok <podshumok>`.
-
-- :class:`feature_extraction.FeatureHasher` now accepts string values.
-  :issue:`6173` by :user:`Ryad Zenine <ryadzenine>` and
-  :user:`Devashish Deshpande <dsquareindia>`.
-
-- Keyword arguments can now be supplied to ``func`` in
-  :class:`preprocessing.FunctionTransformer` by means of the ``kw_args``
-  parameter. By `Brian McFee`_.
-
-- :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile`
-  now accept score functions that take X, y as input and return only the scores.
-  By :user:`Nikolay Mayorov <nmayorov>`.
-
-Model evaluation and meta-estimators
-
-- :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier`
-  now support ``partial_fit``. By :user:`Asish Panda <kaichogami>` and
-  :user:`Philipp Dowling <phdowling>`.
-
-- Added support for substituting or disabling :class:`pipeline.Pipeline`
-  and :class:`pipeline.FeatureUnion` components using the ``set_params``
-  interface that powers :mod:`sklearn.grid_search`.
-  See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py`
-  By `Joel Nothman`_ and :user:`Robert McGibbon <rmcgibbo>`.
-
-- The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV`
-  (and :class:`model_selection.RandomizedSearchCV`) can be easily imported
-  into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for
-  more information. :issue:`6697` by `Raghav RV`_.
-
-- Generalization of :func:`model_selection.cross_val_predict`.
-  One can pass method names such as `predict_proba` to be used in the cross
-  validation framework instead of the default `predict`.
-  By :user:`Ori Ziv <zivori>` and :user:`Sears Merritt <merritts>`.
-
-- The training scores and time taken for training followed by scoring for
-  each search candidate are now available at the ``cv_results_`` dict.
-  See :ref:`model_selection_changes` for more information.
-  :issue:`7325` by :user:`Eugene Chen <eyc88>` and `Raghav RV`_.
-
-Metrics
-
-- Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide
-  the labels when the number of classes in ``y_true`` and ``y_pred`` differ.
-  :issue:`7239` by :user:`Hong Guangguo <hongguangguo>` with help from
-  :user:`Mads Jensen <indianajensen>` and :user:`Nelson Liu <nelson-liu>`.
-
-- Support sparse contingency matrices in cluster evaluation
-  (:mod:`metrics.cluster.supervised`) to scale to a large number of
-  clusters.
-  :issue:`7419` by :user:`Gregory Stupp <stuppie>` and `Joel Nothman`_.
-
-- Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`.
-  By :user:`Jatin Shah <jatinshah>` and `Raghav RV`_.
-
-- Speed up :func:`metrics.silhouette_score` by using vectorized operations.
-  By `Manoj Kumar`_.
-
-- Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`.
-  By :user:`Bernardo Stein <DanielSidhion>`.
-
-Miscellaneous
-
-- Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute
-  the score on the test folds in parallel. By `Manoj Kumar`_
-
-- Codebase does not contain C/C++ cython generated files: they are
-  generated during build. Distribution packages will still contain generated
-  C/C++ files. By :user:`Arthur Mensch <arthurmensch>`.
-
-- Reduce the memory usage for 32-bit float input arrays of
-  :func:`utils.sparse_func.mean_variance_axis` and
-  :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython
-  fused types. By :user:`YenChen Lin <yenchenlin>`.
-
-- The :func:`ignore_warnings` now accept a category argument to ignore only
-  the warnings of a specified type. By :user:`Thierry Guillemot <tguillemot>`.
-
-- Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to
-  :func:`load_iris` dataset
-  :issue:`7049`,
-  :func:`load_breast_cancer` dataset
-  :issue:`7152`,
-  :func:`load_digits` dataset,
-  :func:`load_diabetes` dataset,
-  :func:`load_linnerud` dataset,
-  :func:`load_boston` dataset
-  :issue:`7154` by
-  :user:`Manvendra Singh<manu-chroma>`.
-
-- Simplification of the ``clone`` function, deprecate support for estimators
-  that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_.
-
-- When unpickling a scikit-learn estimator in a different version than the one
-  the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation
-  on model persistence <persistence_limitations>` for more details. (:issue:`7248`)
-  By `Andreas Müller`_.
-
-Bug fixes
-.........
-
-Trees and ensembles
-
-- Random forest, extra trees, decision trees and gradient boosting
-  won't accept anymore ``min_samples_split=1`` as at least 2 samples
-  are required to split a decision tree node. By `Arnaud Joly`_
-
-- :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``,
-  ``transform`` or ``predict_proba`` are called on the non-fitted estimator.
-  by `Sebastian Raschka`_.
-
-- Fix bug where :class:`ensemble.AdaBoostClassifier` and
-  :class:`ensemble.AdaBoostRegressor` would perform poorly if the
-  ``random_state`` was fixed
-  (:issue:`7411`). By `Joel Nothman`_.
-
-- Fix bug in ensembles with randomization where the ensemble would not
-  set ``random_state`` on base estimators in a pipeline or similar nesting.
-  (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier`
-  :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier`
-  and :class:`ensemble.AdaBoostRegressor` will now differ from previous
-  versions. By `Joel Nothman`_.
-
-Linear, kernelized and related models
-
-- Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in
-  :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor`
-  (:issue:`6764`). By :user:`Wenhua Yang <geekoala>`.
-
-- Fix bug in :class:`linear_model.LogisticRegressionCV` where
-  ``solver='liblinear'`` did not accept ``class_weights='balanced``.
-  (:issue:`6817`). By `Tom Dupre la Tour`_.
-
-- Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error
-  occurred when there were outliers being labelled and a weight function
-  specified (:issue:`6902`).  By
-  `LeonieBorne <https://github.com/LeonieBorne>`_.
-
-- Fix :class:`linear_model.ElasticNet` sparse decision function to match
-  output with dense in the multioutput case.
-
-Decomposition, manifold learning and clustering
-
-- :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3.
-  :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
-
-- :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0.
-  In practice this is enough for obtaining a good approximation of the
-  true eigenvalues/vectors in the presence of noise. When `n_components` is
-  small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies
-  a higher number. This improves precision with few components.
-  :issue:`5299` by :user:`Giorgio Patrini<giorgiop>`.
-
-- Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA`
-  and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the
-  New features) is fixed. `components_` are stored with no whitening.
-  :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
-
-- Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
-  Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer <yanlend>`.
-
-- Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all
-  occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`,
-  :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
-  and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By
-  :user:`Peter Fischer <yanlend>`.
-
-- Attribute ``explained_variance_ratio_`` calculated with the SVD solver
-  of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
-  correct results. By :user:`JPFrancoia <JPFrancoia>`
-
-Preprocessing and feature selection
-
-- :func:`preprocessing.data._transform_selected` now always passes a copy
-  of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio
-  Oliveira <https://github.com/caioaao>`_.
-
-Model evaluation and meta-estimators
-
-- :class:`model_selection.StratifiedKFold` now raises error if all n_labels
-  for individual classes is less than n_folds.
-  :issue:`6182` by :user:`Devashish Deshpande <dsquareindia>`.
-
-- Fixed bug in :class:`model_selection.StratifiedShuffleSplit`
-  where train and test sample could overlap in some edge cases,
-  see :issue:`6121` for
-  more details. By `Loic Esteve`_.
-
-- Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to
-  return splits of size ``train_size`` and ``test_size`` in all cases
-  (:issue:`6472`). By `Andreas Müller`_.
-
-- Cross-validation of :class:`OneVsOneClassifier` and
-  :class:`OneVsRestClassifier` now works with precomputed kernels.
-  :issue:`7350` by :user:`Russell Smith <rsmith54>`.
-
-- Fix incomplete ``predict_proba`` method delegation from
-  :class:`model_selection.GridSearchCV` to
-  :class:`linear_model.SGDClassifier` (:issue:`7159`)
-  by `Yichuan Liu <https://github.com/yl565>`_.
-
-Metrics
-
-- Fix bug in :func:`metrics.silhouette_score` in which clusters of
-  size 1 were incorrectly scored. They should get a score of 0.
-  By `Joel Nothman`_.
-
-- Fix bug in :func:`metrics.silhouette_samples` so that it now works with
-  arbitrary labels, not just those ranging from 0 to n_clusters - 1.
-
-- Fix bug where expected and adjusted mutual information were incorrect if
-  cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_.
-
-- :func:`metrics.pairwise.pairwise_distances` now converts arrays to
-  boolean arrays when required in ``scipy.spatial.distance``.
-  :issue:`5460` by `Tom Dupre la Tour`_.
-
-- Fix sparse input support in :func:`metrics.silhouette_score` as well as
-  example examples/text/document_clustering.py. By :user:`YenChen Lin <yenchenlin>`.
-
-- :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no
-  longer round ``y_score`` values when creating ROC curves; this was causing
-  problems for users with very small differences in scores (:issue:`7353`).
-
-Miscellaneous
-
-- :func:`model_selection.tests._search._check_param_grid` now works correctly with all types
-  that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange
-  (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi.
-
-- :func:`utils.extmath.randomized_range_finder` is more numerically stable when many
-  power iterations are requested, since it applies LU normalization by default.
-  If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied.
-  Other normalization options are available: ``'none', 'LU'`` and ``'QR'``.
-  :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
-
-- Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators
-  with them as parameters, could not be passed to :func:`base.clone`.
-  By `Loic Esteve`_.
-
-- :func:`datasets.load_svmlight_file` now is able to read long int QID values.
-  :issue:`7101` by :user:`Ibraim Ganiev <olologin>`.
-
-
-API changes summary
--------------------
-
-Linear, kernelized and related models
-
-- ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`.
-  Use ``loss`` instead. By `Manoj Kumar`_.
-
-- Access to public attributes ``.X_`` and ``.y_`` has been deprecated in
-  :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa <jarfa>`.
-
-Decomposition, manifold learning and clustering
-
-- The old :class:`mixture.DPGMM` is deprecated in favor of the new
-  :class:`mixture.BayesianGaussianMixture` (with the parameter
-  ``weight_concentration_prior_type='dirichlet_process'``).
-  The new class solves the computational
-  problems of the old class and computes the Gaussian mixture with a
-  Dirichlet process prior faster than before.
-  :issue:`7295` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
-
-- The old :class:`mixture.VBGMM` is deprecated in favor of the new
-  :class:`mixture.BayesianGaussianMixture` (with the parameter
-  ``weight_concentration_prior_type='dirichlet_distribution'``).
-  The new class solves the computational
-  problems of the old class and computes the Variational Bayesian Gaussian
-  mixture faster than before.
-  :issue:`6651` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
-
-- The old :class:`mixture.GMM` is deprecated in favor of the new
-  :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture
-  faster than before and some of computational problems have been solved.
-  :issue:`6666` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
-
-Model evaluation and meta-estimators
-
-- The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and
-  :mod:`sklearn.learning_curve` have been deprecated and the classes and
-  functions have been reorganized into the :mod:`sklearn.model_selection`
-  module. Ref :ref:`model_selection_changes` for more information.
-  :issue:`4294` by `Raghav RV`_.
-
-- The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV`
-  and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of
-  the attribute ``cv_results_``.
-  Ref :ref:`model_selection_changes` for more information.
-  :issue:`6697` by `Raghav RV`_.
-
-- The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced
-  by the new parameter ``n_splits`` since it can provide a consistent
-  and unambiguous interface to represent the number of train-test splits.
-  :issue:`7187` by :user:`YenChen Lin <yenchenlin>`.
-
-- ``classes`` parameter was renamed to ``labels`` in
-  :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell <srvanrell>`.
-
-- The splitter classes ``LabelKFold``, ``LabelShuffleSplit``,
-  ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to
-  :class:`model_selection.GroupKFold`,
-  :class:`model_selection.GroupShuffleSplit`,
-  :class:`model_selection.LeaveOneGroupOut`
-  and :class:`model_selection.LeavePGroupsOut` respectively.
-  Also the parameter ``labels`` in the :func:`split` method of the newly
-  renamed splitters :class:`model_selection.LeaveOneGroupOut` and
-  :class:`model_selection.LeavePGroupsOut` is renamed to
-  ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`,
-  the parameter ``n_labels`` is renamed to ``n_groups``.
-  :issue:`6660` by `Raghav RV`_.
-
-- Error and loss names for ``scoring`` parameters are now prefixed by
-  ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions
-  are deprecated and will be removed in version 0.20.
-  :issue:`7261` by :user:`Tim Head <betatim>`.
-
-Code Contributors
------------------
-Aditya Joshi, Alejandro, Alexander Fabisch, Alexander Loginov, Alexander
-Minyushkin, Alexander Rudy, Alexandre Abadie, Alexandre Abraham, Alexandre
-Gramfort, Alexandre Saint, alexfields, Alvaro Ulloa, alyssaq, Amlan Kar,
-Andreas Mueller, andrew giessel, Andrew Jackson, Andrew McCulloh, Andrew
-Murray, Anish Shah, Arafat, Archit Sharma, Ariel Rokem, Arnaud Joly, Arnaud
-Rachez, Arthur Mensch, Ash Hoover, asnt, b0noI, Behzad Tabibian, Bernardo,
-Bernhard Kratzwald, Bhargav Mangipudi, blakeflei, Boyuan Deng, Brandon Carter,
-Brett Naul, Brian McFee, Caio Oliveira, Camilo Lamus, Carol Willing, Cass,
-CeShine Lee, Charles Truong, Chyi-Kwei Yau, CJ Carey, codevig, Colin Ni, Dan
-Shiebler, Daniel, Daniel Hnyk, David Ellis, David Nicholson, David Staub, David
-Thaler, David Warshaw, Davide Lasagna, Deborah, definitelyuncertain, Didi
-Bar-Zev, djipey, dsquareindia, edwinENSAE, Elias Kuthe, Elvis DOHMATOB, Ethan
-White, Fabian Pedregosa, Fabio Ticconi, fisache, Florian Wilhelm, Francis,
-Francis O'Donovan, Gael Varoquaux, Ganiev Ibraim, ghg, Gilles Louppe, Giorgio
-Patrini, Giovanni Cherubin, Giovanni Lanzani, Glenn Qian, Gordon
-Mohr, govin-vatsan, Graham Clenaghan, Greg Reda, Greg Stupp, Guillaume
-Lemaitre, Gustav Mörtberg, halwai, Harizo Rajaona, Harry Mavroforakis,
-hashcode55, hdmetor, Henry Lin, Hobson Lane, Hugo Bowne-Anderson,
-Igor Andriushchenko, Imaculate, Inki Hwang, Isaac Sijaranamual,
-Ishank Gulati, Issam Laradji, Iver Jordal, jackmartin, Jacob Schreiber, Jake
-Vanderplas, James Fiedler, James Routley, Jan Zikes, Janna Brettingen, jarfa, Jason
-Laska, jblackburne, jeff levesque, Jeffrey Blackburne, Jeffrey04, Jeremy Hintz,
-jeremynixon, Jeroen, Jessica Yung, Jill-Jênn Vie, Jimmy Jia, Jiyuan Qian, Joel
-Nothman, johannah, John, John Boersma, John Kirkham, John Moeller,
-jonathan.striebel, joncrall, Jordi, Joseph Munoz, Joshua Cook, JPFrancoia,
-jrfiedler, JulianKahnert, juliathebrave, kaichogami, KamalakerDadi, Kenneth
-Lyons, Kevin Wang, kingjr, kjell, Konstantin Podshumok, Kornel Kielczewski,
-Krishna Kalyan, krishnakalyan3, Kvle Putnam, Kyle Jackson, Lars Buitinck,
-ldavid, LeiG, LeightonZhang, Leland McInnes, Liang-Chi Hsieh, Lilian Besson,
-lizsz, Loic Esteve, Louis Tiao, Léonie Borne, Mads Jensen, Maniteja Nandana,
-Manoj Kumar, Manvendra Singh, Marco, Mario Krell, Mark Bao, Mark Szepieniec,
-Martin Madsen, MartinBpr, MaryanMorel, Massil, Matheus, Mathieu Blondel,
-Mathieu Dubois, Matteo, Matthias Ekman, Max Moroz, Michael Scherer, michiaki
-ariga, Mikhail Korobov, Moussa Taifi, mrandrewandrade, Mridul Seth, nadya-p,
-Naoya Kanai, Nate George, Nelle Varoquaux, Nelson Liu, Nick James,
-NickleDave, Nico, Nicolas Goix, Nikolay Mayorov, ningchi, nlathia,
-okbalefthanded, Okhlopkov, Olivier Grisel, Panos Louridas, Paul Strickland,
-Perrine Letellier, pestrickland, Peter Fischer, Pieter, Ping-Yao, Chang,
-practicalswift, Preston Parry, Qimu Zheng, Rachit Kansal, Raghav RV,
-Ralf Gommers, Ramana.S, Rammig, Randy Olson, Rob Alexander, Robert Lutz,
-Robin Schucker, Rohan Jain, Ruifeng Zheng, Ryan Yu, Rémy Léone, saihttam,
-Saiwing Yeung, Sam Shleifer, Samuel St-Jean, Sartaj Singh, Sasank Chilamkurthy,
-saurabh.bansod, Scott Andrews, Scott Lowe, seales, Sebastian Raschka, Sebastian
-Saeger, Sebastián Vanrell, Sergei Lebedev, shagun Sodhani, shanmuga cv,
-Shashank Shekhar, shawpan, shengxiduan, Shota, shuckle16, Skipper Seabold,
-sklearn-ci, SmedbergM, srvanrell, Sébastien Lerique, Taranjeet, themrmax,
-Thierry, Thierry Guillemot, Thomas, Thomas Hallock, Thomas Moreau, Tim Head,
-tKammy, toastedcornflakes, Tom, TomDLT, Toshihiro Kamishima, tracer0tong, Trent
-Hauck, trevorstephens, Tue Vo, Varun, Varun Jewalikar, Viacheslav, Vighnesh
-Birodkar, Vikram, Villu Ruusmann, Vinayak Mehta, walter, waterponey, Wenhua
-Yang, Wenjian Huang, Will Welch, wyseguy7, xyguo, yanlend, Yaroslav Halchenko,
-yelite, Yen, YenChenLin, Yichuan Liu, Yoav Ram, Yoshiki, Zheng RuiFeng, zivori, Óscar Nájera
-
-.. currentmodule:: sklearn
-
-.. _changes_0_17_1:
-
-Version 0.17.1
-==============
-
-**February 18, 2016**
-
-Changelog
----------
-
-Bug fixes
-.........
-
-
-- Upgrade vendored joblib to version 0.9.4 that fixes an important bug in
-  ``joblib.Parallel`` that can silently yield to wrong results when working
-  on datasets larger than 1MB:
-  https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst
-
-- Fixed reading of Bunch pickles generated with scikit-learn
-  version <= 0.16. This can affect users who have already
-  downloaded a dataset with scikit-learn 0.16 and are loading it
-  with scikit-learn 0.17. See :issue:`6196` for
-  how this affected :func:`datasets.fetch_20newsgroups`. By `Loic
-  Esteve`_.
-
-- Fixed a bug that prevented using ROC AUC score to perform grid search on
-  several CPU / cores on large arrays. See :issue:`6147`
-  By `Olivier Grisel`_.
-
-- Fixed a bug that prevented to properly set the ``presort`` parameter
-  in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857`
-  By Andrew McCulloh.
-
-- Fixed a joblib error when evaluating the perplexity of a
-  :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258`
-  By Chyi-Kwei Yau.
-
-
-.. _changes_0_17:
-
-Version 0.17
-============
-
-**November 5, 2015**
-
-Changelog
----------
-
-New features
-............
-
-- All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by
-  calling `partial_fit`. By :user:`Giorgio Patrini <giorgiop>`.
-
-- The new class :class:`ensemble.VotingClassifier` implements a
-  "majority rule" / "soft voting" ensemble classifier to combine
-  estimators for classification. By `Sebastian Raschka`_.
-
-- The new class :class:`preprocessing.RobustScaler` provides an
-  alternative to :class:`preprocessing.StandardScaler` for feature-wise
-  centering and range normalization that is robust to outliers.
-  By :user:`Thomas Unterthiner <untom>`.
-
-- The new class :class:`preprocessing.MaxAbsScaler` provides an
-  alternative to :class:`preprocessing.MinMaxScaler` for feature-wise
-  range normalization when the data is already centered or sparse.
-  By :user:`Thomas Unterthiner <untom>`.
-
-- The new class :class:`preprocessing.FunctionTransformer` turns a Python
-  function into a ``Pipeline``-compatible transformer object.
-  By Joe Jevnik.
-
-- The new classes :class:`cross_validation.LabelKFold` and
-  :class:`cross_validation.LabelShuffleSplit` generate train-test folds,
-  respectively similar to :class:`cross_validation.KFold` and
-  :class:`cross_validation.ShuffleSplit`, except that the folds are
-  conditioned on a label array. By `Brian McFee`_, :user:`Jean
-  Kossaifi <JeanKossaifi>` and `Gilles Louppe`_.
-
-- :class:`decomposition.LatentDirichletAllocation` implements the Latent
-  Dirichlet Allocation topic model with online  variational
-  inference. By :user:`Chyi-Kwei Yau <chyikwei>`, with code based on an implementation
-  by Matt Hoffman. (:issue:`3659`)
-
-- The new solver ``sag`` implements a Stochastic Average Gradient descent
-  and is available in both :class:`linear_model.LogisticRegression` and
-  :class:`linear_model.Ridge`. This solver is very efficient for large
-  datasets. By :user:`Danny Sullivan <dsullivan7>` and `Tom Dupre la Tour`_.
-  (:issue:`4738`)
-
-- The new solver ``cd`` implements a Coordinate Descent in
-  :class:`decomposition.NMF`. Previous solver based on Projected Gradient is
-  still available setting new parameter ``solver`` to ``pg``, but is
-  deprecated and will be removed in 0.19, along with
-  :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``,
-  ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and
-  ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a
-  shuffling step in the ``cd`` solver.
-  By `Tom Dupre la Tour`_ and `Mathieu Blondel`_.
-
-Enhancements
-............
-- :class:`manifold.TSNE` now supports approximate optimization via the
-  Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody.
-  (:issue:`4025`)
-
-- :class:`cluster.mean_shift_.MeanShift` now supports parallel execution,
-  as implemented in the ``mean_shift`` function. By :user:`Martino
-  Sorbaro <martinosorb>`.
-
-- :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``.
-  By `Jan Hendrik Metzen`_.
-
-- :class:`dummy.DummyClassifier` now supports a prior fitting strategy.
-  By `Arnaud Joly`_.
-
-- Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses.
-  By :user:`Cory Lorenz <clorenz7>`.
-
-- Added the :func:`metrics.label_ranking_loss` metric.
-  By `Arnaud Joly`_.
-
-- Added the :func:`metrics.cohen_kappa_score` metric.
-
-- Added a ``warm_start`` constructor parameter to the bagging ensemble
-  models to increase the size of the ensemble. By :user:`Tim Head <betatim>`.
-
-- Added option to use multi-output regression metrics without averaging.
-  By Konstantin Shmelkov and :user:`Michael Eickenberg<eickenberg>`.
-
-- Added ``stratify`` option to :func:`cross_validation.train_test_split`
-  for stratified splitting. By Miroslav Batchkarov.
-
-- The :func:`tree.export_graphviz` function now supports aesthetic
-  improvements for :class:`tree.DecisionTreeClassifier` and
-  :class:`tree.DecisionTreeRegressor`, including options for coloring nodes
-  by their majority class or impurity, showing variable names, and using
-  node proportions instead of raw sample counts. By `Trevor Stephens`_.
-
-- Improved speed of ``newton-cg`` solver in
-  :class:`linear_model.LogisticRegression`, by avoiding loss computation.
-  By `Mathieu Blondel`_ and `Tom Dupre la Tour`_.
-
-- The ``class_weight="auto"`` heuristic in classifiers supporting
-  ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"``
-  option, which has a simpler formula and interpretation.
-  By `Hanna Wallach`_ and `Andreas Müller`_.
-
-- Add ``class_weight`` parameter to automatically weight samples by class
-  frequency for :class:`linear_model.PassiveAgressiveClassifier`. By
-  `Trevor Stephens`_.
-
-- Added backlinks from the API reference pages to the user guide. By
-  `Andreas Müller`_.
-
-- The ``labels`` parameter to :func:`sklearn.metrics.f1_score`,
-  :func:`sklearn.metrics.fbeta_score`,
-  :func:`sklearn.metrics.recall_score` and
-  :func:`sklearn.metrics.precision_score` has been extended.
-  It is now possible to ignore one or more labels, such as where
-  a multiclass problem has a majority class to ignore. By `Joel Nothman`_.
-
-- Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`.
-  By `Trevor Stephens`_.
-
-- Provide an option for sparse output from
-  :func:`sklearn.metrics.pairwise.cosine_similarity`. By
-  :user:`Jaidev Deshpande <jaidevd>`.
-
-- Add :func:`minmax_scale` to provide a function interface for
-  :class:`MinMaxScaler`. By :user:`Thomas Unterthiner <untom>`.
-
-- ``dump_svmlight_file`` now handles multi-label datasets.
-  By Chih-Wei Chang.
-
-- RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`).
-  By `Tom Dupre la Tour`_.
-
-- The "Wisconsin Breast Cancer" classical two-class classification dataset
-  is now included in scikit-learn, available with
-  :func:`sklearn.dataset.load_breast_cancer`.
-
-- Upgraded to joblib 0.9.3 to benefit from the new automatic batching of
-  short tasks. This makes it possible for scikit-learn to benefit from
-  parallelism when many very short tasks are executed in parallel, for
-  instance by the :class:`grid_search.GridSearchCV` meta-estimator
-  with ``n_jobs > 1`` used with a large grid of parameters on a small
-  dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_.
-
-- For more details about changes in joblib 0.9.3 see the release notes:
-  https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093
-
-- Improved speed (3 times per iteration) of
-  :class:`decomposition.DictLearning` with coordinate descent method
-  from :class:`linear_model.Lasso`. By :user:`Arthur Mensch <arthurmensch>`.
-
-- Parallel processing (threaded) for queries of nearest neighbors
-  (using the ball-tree) by Nikolay Mayorov.
-
-- Allow :func:`datasets.make_multilabel_classification` to output
-  a sparse ``y``. By Kashif Rasul.
-
-- :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed
-  distances, allowing memory-efficient distance precomputation. By
-  `Joel Nothman`_.
-
-- :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method
-  for retrieving the leaf indices samples are predicted as. By
-  :user:`Daniel Galvez <galv>` and `Gilles Louppe`_.
-
-- Speed up decision tree regressors, random forest regressors, extra trees
-  regressors and gradient boosting estimators by computing a proxy
-  of the impurity improvement during the tree growth. The proxy quantity is
-  such that the split that maximizes this value also maximizes the impurity
-  improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber <jmschrei>`
-  and `Gilles Louppe`_.
-
-- Speed up tree based methods by reducing the number of computations needed
-  when computing the impurity measure taking into account linear
-  relationship of the computed statistics. The effect is particularly
-  visible with extra trees and on datasets with categorical or sparse
-  features. By `Arnaud Joly`_.
-
-- :class:`ensemble.GradientBoostingRegressor` and
-  :class:`ensemble.GradientBoostingClassifier` now expose an ``apply``
-  method for retrieving the leaf indices each sample ends up in under
-  each try. By :user:`Jacob Schreiber <jmschrei>`.
-
-- Add ``sample_weight`` support to :class:`linear_model.LinearRegression`.
-  By Sonny Hu. (:issue:`#4881`)
-
-- Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control
-  the stopping criterion. By Santi Villalba. (:issue:`5186`)
-
-- Added optional parameter ``random_state`` in :class:`linear_model.Ridge`
-  , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_.
-
-- Added optional parameter ``warm_start`` in
-  :class:`linear_model.LogisticRegression`. If set to True, the solvers
-  ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the
-  coefficients computed in the previous fit. By `Tom Dupre la Tour`_.
-
-- Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for
-  the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_.
-  Support added to the ``liblinear`` solver. By `Manoj Kumar`_.
-
-- Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor`
-  and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior
-  the same. This allows gradient boosters to turn off presorting when building
-  deep trees or using sparse data. By :user:`Jacob Schreiber <jmschrei>`.
-
-- Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by
-  default. By :user:`Graham Clenaghan <gclenaghan>`.
-
-- Added :class:`feature_selection.SelectFromModel` meta-transformer which can
-  be used along with estimators that have `coef_` or `feature_importances_`
-  attribute to select important features of the input data. By
-  :user:`Maheshakya Wijewardena <maheshakya>`, `Joel Nothman`_ and `Manoj Kumar`_.
-
-- Added :func:`metrics.pairwise.laplacian_kernel`.  By `Clyde Fare <https://github.com/Clyde-fare>`_.
-
-- :class:`covariance.GraphLasso` allows separate control of the convergence criterion
-  for the Elastic-Net subproblem via  the ``enet_tol`` parameter.
-
-- Improved verbosity in :class:`decomposition.DictionaryLearning`.
-
-- :class:`ensemble.RandomForestClassifier` and
-  :class:`ensemble.RandomForestRegressor` no longer explicitly store the
-  samples used in bagging, resulting in a much reduced memory footprint for
-  storing random forest models.
-
-- Added ``positive`` option to :class:`linear_model.Lars` and
-  :func:`linear_model.lars_path` to force coefficients to be positive.
-  (:issue:`5131`)
-
-- Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances`
-  to provide precomputed squared norms for ``X``.
-
-- Added the ``fit_predict`` method to :class:`pipeline.Pipeline`.
-
-- Added the :func:`preprocessing.min_max_scale` function.
-
-Bug fixes
-.........
-
-- Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse
-  multi-label output. By `Andreas Müller`_.
-
-- Fixed the output shape of :class:`linear_model.RANSACRegressor` to
-  ``(n_samples, )``. By `Andreas Müller`_.
-
-- Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By
-  `Andreas Müller`_.
-
-- Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a
-  lot of memory for large discrete grids. By `Joel Nothman`_.
-
-- Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored
-  in the final fit. By `Manoj Kumar`_.
-
-- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
-  oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan <ankurankan>`.
-
-- All regressors now consistently handle and warn when given ``y`` that is of
-  shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin.
-  (:issue:`5431`)
-
-- Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by
-  `Lars Buitinck`_.
-
-- Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance
-  matrices when using shrinkage. By `Martin Billinger`_.
-
-- Fixed :func:`cross_validation.cross_val_predict` for estimators with
-  sparse predictions. By Buddha Prakash.
-
-- Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression`
-  to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_.
-  (:issue:`5182`)
-
-- Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier`
-  when called with ``average=True``. By :user:`Andrew Lamb <andylamb>`.
-  (:issue:`5282`)
-
-- Dataset fetchers use different filenames under Python 2 and Python 3 to
-  avoid pickling compatibility issues. By `Olivier Grisel`_.
-  (:issue:`5355`)
-
-- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification
-  results to depend on scale. By `Jake Vanderplas`_.
-
-- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect
-  when fitting the intercept in the case of sparse data. The fix
-  automatically changes the solver to 'sag' in this case.
-  :issue:`5360` by `Tom Dupre la Tour`_.
-
-- Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data
-  with a large number of features and fewer samples. (:issue:`4478`)
-  By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini <giorgiop>`.
-
-- Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and
-  platform dependent output, and failed on `fit_transform`.
-  By :user:`Arthur Mensch <arthurmensch>`.
-
-- Fixes to the ``Bunch`` class used to store datasets.
-
-- Fixed :func:`ensemble.plot_partial_dependence` ignoring the
-  ``percentiles`` parameter.
-
-- Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer
-  leads to inconsistent results when pickling.
-
-- Fixed the conditions on when a precomputed Gram matrix needs to
-  be recomputed in :class:`linear_model.LinearRegression`,
-  :class:`linear_model.OrthogonalMatchingPursuit`,
-  :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`.
-
-- Fixed inconsistent memory layout in the coordinate descent solver
-  that affected :class:`linear_model.DictionaryLearning` and
-  :class:`covariance.GraphLasso`. (:issue:`5337`)
-  By `Olivier Grisel`_.
-
-- :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg``
-  parameter.
-
-- Nearest Neighbor estimators with custom distance metrics can now be pickled.
-  (:issue:`4362`)
-
-- Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights``
-  were not properly handled when performing grid-searches.
-
-- Fixed a bug in :class:`linear_model.LogisticRegression` and
-  :class:`linear_model.LogisticRegressionCV` when using
-  ``class_weight='balanced'```or ``class_weight='auto'``.
-  By `Tom Dupre la Tour`_.
-
-- Fixed bug :issue:`5495` when
-  doing OVR(SVC(decision_function_shape="ovr")). Fixed by
-  :user:`Elvis Dohmatob <dohmatob>`.
-
-
-API changes summary
--------------------
-- Attribute `data_min`, `data_max` and `data_range` in
-  :class:`preprocessing.MinMaxScaler` are deprecated and won't be available
-  from 0.19. Instead, the class now exposes `data_min_`, `data_max_`
-  and `data_range_`. By :user:`Giorgio Patrini <giorgiop>`.
-
-- All Scaler classes now have an `scale_` attribute, the feature-wise
-  rescaling applied by their `transform` methods. The old attribute `std_`
-  in :class:`preprocessing.StandardScaler` is deprecated and superseded
-  by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini <giorgiop>`.
-
-- :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape``
-  parameter to make their decision function of shape ``(n_samples, n_classes)``
-  by setting ``decision_function_shape='ovr'``. This will be the default behavior
-  starting in 0.19. By `Andreas Müller`_.
-
-- Passing 1D data arrays as input to estimators is now deprecated as it
-  caused confusion in how the array elements should be interpreted
-  as features or as samples. All data arrays are now expected
-  to be explicitly shaped ``(n_samples, n_features)``.
-  By :user:`Vighnesh Birodkar <vighneshbirodkar>`.
-
-- :class:`lda.LDA` and :class:`qda.QDA` have been moved to
-  :class:`discriminant_analysis.LinearDiscriminantAnalysis` and
-  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
-
-- The ``store_covariance`` and ``tol`` parameters have been moved from
-  the fit method to the constructor in
-  :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the
-  ``store_covariances`` and ``tol`` parameters have been moved from the
-  fit method to the constructor in
-  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
-
-- Models inheriting from ``_LearntSelectorMixin`` will no longer support the
-  transform methods. (i.e,  RandomForests, GradientBoosting, LogisticRegression,
-  DecisionTrees, SVMs and SGD related models). Wrap these models around the
-  metatransfomer :class:`feature_selection.SelectFromModel` to remove
-  features (according to `coefs_` or `feature_importances_`)
-  which are below a certain threshold value instead.
-
-- :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence,
-  to ensure consistency of ``predict(X)`` and ``labels_``. By
-  :user:`Vighnesh Birodkar <vighneshbirodkar>`.
-
-- Classifier and Regressor models are now tagged as such using the
-  ``_estimator_type`` attribute.
-
-- Cross-validation iterators always provide indices into training and test set,
-  not boolean masks.
-
-- The ``decision_function`` on all regressors was deprecated and will be
-  removed in 0.19.  Use ``predict`` instead.
-
-- :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19.
-  Use :func:`datasets.fetch_lfw_pairs` instead.
-
-- The deprecated ``hmm`` module was removed.
-
-- The deprecated ``Bootstrap`` cross-validation iterator was removed.
-
-- The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed.
-  Use :class:`clustering.AgglomerativeClustering` instead.
-
-- :func:`cross_validation.check_cv` is now a public function.
-
-- The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated
-  and will be removed in 0.19.
-
-- The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved
-  to the constructor.
-
-- Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit``
-  method. Use the construction parameter instead.
-
-- The deprecated support for the sequence of sequences (or list of lists) multilabel
-  format was removed. To convert to and from the supported binary
-  indicator matrix format, use
-  :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
-
-- The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will
-  change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input.
-
-- The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of
-  :class:`preprocessing.LabelBinarizer` were removed.
-
-- Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the
-  gamma to ``1. / n_features`` is deprecated and will be removed in 0.19.
-  Use ``gamma="auto"`` instead.
-
-Code Contributors
------------------
-Aaron Schumacher, Adithya Ganesh, akitty, Alexandre Gramfort, Alexey Grigorev,
-Ali Baharev, Allen Riddell, Ando Saabas, Andreas Mueller, Andrew Lamb, Anish
-Shah, Ankur Ankan, Anthony Erlinger, Ari Rouvinen, Arnaud Joly, Arnaud Rachez,
-Arthur Mensch, banilo, Barmaley.exe, benjaminirving, Boyuan Deng, Brett Naul,
-Brian McFee, Buddha Prakash, Chi Zhang, Chih-Wei Chang, Christof Angermueller,
-Christoph Gohlke, Christophe Bourguignat, Christopher Erick Moody, Chyi-Kwei
-Yau, Cindy Sridharan, CJ Carey, Clyde-fare, Cory Lorenz, Dan Blanchard, Daniel
-Galvez, Daniel Kronovet, Danny Sullivan, Data1010, David, David D Lowe, David
-Dotson, djipey, Dmitry Spikhalskiy, Donne Martin, Dougal J. Sutherland, Dougal
-Sutherland, edson duarte, Eduardo Caro, Eric Larson, Eric Martin, Erich
-Schubert, Fernando Carrillo, Frank C. Eckert, Frank Zalkow, Gael Varoquaux,
-Ganiev Ibraim, Gilles Louppe, Giorgio Patrini, giorgiop, Graham Clenaghan,
-Gryllos Prokopis, gwulfs, Henry Lin, Hsuan-Tien Lin, Immanuel Bayer, Ishank
-Gulati, Jack Martin, Jacob Schreiber, Jaidev Deshpande, Jake Vanderplas, Jan
-Hendrik Metzen, Jean Kossaifi, Jeffrey04, Jeremy, jfraj, Jiali Mei,
-Joe Jevnik, Joel Nothman, John Kirkham, John Wittenauer, Joseph, Joshua Loyal,
-Jungkook Park, KamalakerDadi, Kashif Rasul, Keith Goodman, Kian Ho, Konstantin
-Shmelkov, Kyler Brown, Lars Buitinck, Lilian Besson, Loic Esteve, Louis Tiao,
-maheshakya, Maheshakya Wijewardena, Manoj Kumar, MarkTab marktab.net, Martin
-Ku, Martin Spacek, MartinBpr, martinosorb, MaryanMorel, Masafumi Oyamada,
-Mathieu Blondel, Matt Krump, Matti Lyra, Maxim Kolganov, mbillinger, mhg,
-Michael Heilman, Michael Patterson, Miroslav Batchkarov, Nelle Varoquaux,
-Nicolas, Nikolay Mayorov, Olivier Grisel, Omer Katz, Óscar Nájera, Pauli
-Virtanen, Peter Fischer, Peter Prettenhofer, Phil Roth, pianomania, Preston
-Parry, Raghav RV, Rob Zinkov, Robert Layton, Rohan Ramanath, Saket Choudhary,
-Sam Zhang, santi, saurabh.bansod, scls19fr, Sebastian Raschka, Sebastian
-Saeger, Shivan Sornarajah, SimonPL, sinhrks, Skipper Seabold, Sonny Hu, sseg,
-Stephen Hoover, Steven De Gryze, Steven Seguin, Theodore Vasiloudis, Thomas
-Unterthiner, Tiago Freitas Pereira, Tian Wang, Tim Head, Timothy Hopper,
-tokoroten, Tom Dupré la Tour, Trevor Stephens, Valentin Stolbunov, Vighnesh
-Birodkar, Vinayak Mehta, Vincent, Vincent Michel, vstolbunov, wangz10, Wei Xue,
-Yucheng Low, Yury Zhauniarovich, Zac Stewart, zhai_pro, Zichen Wang
-
-.. _changes_0_1_16:
-
-Version 0.16.1
-===============
-
-**April 14, 2015**
-
-Changelog
----------
-
-Bug fixes
-.........
-
-- Allow input data larger than ``block_size`` in
-  :class:`covariance.LedoitWolf` by `Andreas Müller`_.
-
-- Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that
-  caused unstable result in :class:`calibration.CalibratedClassifierCV` by
-  `Jan Hendrik Metzen`_.
-
-- Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman.
-
-- Fix several stability and convergence issues in
-  :class:`cross_decomposition.CCA` and
-  :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_
-
-- Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False``
-  on fortran-ordered data.
-
-- Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict``
-  and ``predict_proba`` by `Andreas Müller`_.
-
-- Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_
-
-.. _changes_0_16:
-
-Version 0.16
-============
-
-**March 26, 2015**
-
-Highlights
------------
-
-- Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory
-  requirements, bug-fixes and better default settings.
-
-- Multinomial Logistic regression and a path algorithm in
-  :class:`linear_model.LogisticRegressionCV`.
-
-- Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`.
-
-- Probability callibration of classifiers using
-  :class:`calibration.CalibratedClassifierCV`.
-
-- :class:`cluster.Birch` clustering method for large-scale datasets.
-
-- Scalable approximate nearest neighbors search with Locality-sensitive
-  hashing forests in :class:`neighbors.LSHForest`.
-
-- Improved error messages and better validation when using malformed input data.
-
-- More robust integration with pandas dataframes.
-
-Changelog
----------
-
-New features
-............
-
-- The new :class:`neighbors.LSHForest` implements locality-sensitive hashing
-  for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena<maheshakya>`.
-
-- Added :class:`svm.LinearSVR`. This class uses the liblinear implementation
-  of Support Vector Regression which is much faster for large
-  sample sizes than :class:`svm.SVR` with linear kernel. By
-  `Fabian Pedregosa`_ and Qiang Luo.
-
-- Incremental fit for :class:`GaussianNB <naive_bayes.GaussianNB>`.
-
-- Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and
-  :class:`dummy.DummyRegressor`. By `Arnaud Joly`_.
-
-- Added the :func:`metrics.label_ranking_average_precision_score` metrics.
-  By `Arnaud Joly`_.
-
-- Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_.
-
-- Added :class:`linear_model.LogisticRegressionCV`. By
-  `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_
-  and `Alexandre Gramfort`_.
-
-- Added ``warm_start`` constructor parameter to make it possible for any
-  trained forest model to grow additional trees incrementally. By
-  :user:`Laurent Direr<ldirer>`.
-
-- Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and
-  :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_.
-
-- Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA
-  algorithm that supports out-of-core learning with a ``partial_fit``
-  method. By `Kyle Kastner`_.
-
-- Averaged SGD for :class:`SGDClassifier <linear_model.SGDClassifier>`
-  and :class:`SGDRegressor <linear_model.SGDRegressor>` By
-  :user:`Danny Sullivan <dsullivan7>`.
-
-- Added :func:`cross_val_predict <cross_validation.cross_val_predict>`
-  function which computes cross-validated estimates. By `Luis Pedro Coelho`_
-
-- Added :class:`linear_model.TheilSenRegressor`, a robust
-  generalized-median-based estimator. By :user:`Florian Wilhelm <FlorianWilhelm>`.
-
-- Added :func:`metrics.median_absolute_error`, a robust metric.
-  By `Gael Varoquaux`_ and :user:`Florian Wilhelm <FlorianWilhelm>`.
-
-- Add :class:`cluster.Birch`, an online clustering algorithm. By
-  `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_.
-
-- Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis`
-  using two new solvers. By :user:`Clemens Brunner <cle1109>` and `Martin Billinger`_.
-
-- Added :class:`kernel_ridge.KernelRidge`, an implementation of
-  kernelized ridge regression.
-  By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_.
-
-- All solvers in :class:`linear_model.Ridge` now support `sample_weight`.
-  By `Mathieu Blondel`_.
-
-- Added :class:`cross_validation.PredefinedSplit` cross-validation
-  for fixed user-provided cross-validation folds.
-  By :user:`Thomas Unterthiner <untom>`.
-
-- Added :class:`calibration.CalibratedClassifierCV`, an approach for
-  calibrating the predicted probabilities of a classifier.
-  By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_
-  and :user:`Balazs Kegl <kegl>`.
-
-
-Enhancements
-............
-
-- Add option ``return_distance`` in :func:`hierarchical.ward_tree`
-  to return distances between nodes for both structured and unstructured
-  versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_.
-  The same option was added in :func:`hierarchical.linkage_tree`.
-  By `Manoj Kumar`_
-
-- Add support for sample weights in scorer objects.  Metrics with sample
-  weight support will automatically benefit from it. By `Noel Dawe`_ and
-  `Vlad Niculae`_.
-
-- Added ``newton-cg`` and `lbfgs` solver support in
-  :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_.
-
-- Add ``selection="random"`` parameter to implement stochastic coordinate
-  descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet`
-  and related. By `Manoj Kumar`_.
-
-- Add ``sample_weight`` parameter to
-  :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`.
-  By :user:`Jatin Shah <jatinshah>`.
-
-- Support sparse multilabel indicator representation in
-  :class:`preprocessing.LabelBinarizer` and
-  :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi <hamsal>` with thanks
-  to Rohit Sivaprasad), as well as evaluation metrics (by
-  `Joel Nothman`_).
-
-- Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`.
-  By `Jatin Shah`.
-
-- Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None``
-  as optional parameter. By `Saurabh Jha`.
-
-- Add ``sample_weight`` parameter to `metrics.hinge_loss`.
-  By `Saurabh Jha`.
-
-- Add ``multi_class="multinomial"`` option in
-  :class:`linear_model.LogisticRegression` to implement a Logistic
-  Regression solver that minimizes the cross-entropy or multinomial loss
-  instead of the default One-vs-Rest setting. Supports `lbfgs` and
-  `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option
-  `newton-cg` by Simon Wu.
-
-- ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a
-  single pass, when giving the option ``sort=False``. By :user:`Dan
-  Blanchard <dan-blanchard>`.
-
-- :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be
-  configured to work with estimators that may fail and raise errors on
-  individual folds. This option is controlled by the `error_score`
-  parameter. This does not affect errors raised on re-fit. By
-  :user:`Michal Romaniuk <romaniukm>`.
-
-- Add ``digits`` parameter to `metrics.classification_report` to allow
-  report to show different precision of floating point numbers. By
-  :user:`Ian Gilmore <agileminor>`.
-
-- Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`.
-  By :user:`Aaron Staple <staple>`.
-
-- Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to
-  handle unknown categorical features more gracefully during transform.
-  By `Manoj Kumar`_.
-
-- Added support for sparse input data to decision trees and their ensembles.
-  By `Fares Hedyati`_ and `Arnaud Joly`_.
-
-- Optimized :class:`cluster.AffinityPropagation` by reducing the number of
-  memory allocations of large temporary data-structures. By `Antony Lee`_.
-
-- Parellization of the computation of feature importances in random forest.
-  By `Olivier Grisel`_ and `Arnaud Joly`_.
-
-- Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute
-  in their constructor. By `Manoj Kumar`_.
-
-- Added decision function for :class:`multiclass.OneVsOneClassifier`
-  By `Raghav RV`_ and :user:`Kyle Beauchamp <kyleabeauchamp>`.
-
-- :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph`
-  support non-Euclidean metrics. By `Manoj Kumar`_
-
-- Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering`
-  and family now accept callables that return a connectivity matrix.
-  By `Manoj Kumar`_.
-
-- Sparse support for :func:`paired_distances`. By `Joel Nothman`_.
-
-- :class:`cluster.DBSCAN` now supports sparse input and sample weights and
-  has been optimized: the inner loop has been rewritten in Cython and
-  radius neighbors queries are now computed in batch. By `Joel Nothman`_
-  and `Lars Buitinck`_.
-
-- Add ``class_weight`` parameter to automatically weight samples by class
-  frequency for :class:`ensemble.RandomForestClassifier`,
-  :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier`
-  and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_.
-
-- :class:`grid_search.RandomizedSearchCV` now does sampling without
-  replacement if all parameters are given as lists. By `Andreas Müller`_.
-
-- Parallelized calculation of :func:`pairwise_distances` is now supported
-  for scipy metrics and custom callables. By `Joel Nothman`_.
-
-- Allow the fitting and scoring of all clustering algorithms in
-  :class:`pipeline.Pipeline`. By `Andreas Müller`_.
-
-- More robust seeding and improved error messages in :class:`cluster.MeanShift`
-  by `Andreas Müller`_.
-
-- Make the stopping criterion for :class:`mixture.GMM`,
-  :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the
-  number of samples by thresholding the average log-likelihood change
-  instead of its sum over all samples. By `Hervé Bredin`_.
-
-- The outcome of :func:`manifold.spectral_embedding` was made deterministic
-  by flipping the sign of eigenvectors. By :user:`Hasil Sharma <Hasil-Sharma>`.
-
-- Significant performance and memory usage improvements in
-  :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_.
-
-- Numerical stability improvements for :class:`preprocessing.StandardScaler`
-  and :func:`preprocessing.scale`. By `Nicolas Goix`_
-
-- :class:`svm.SVC` fitted on sparse input now implements ``decision_function``.
-  By `Rob Zinkov`_ and `Andreas Müller`_.
-
-- :func:`cross_validation.train_test_split` now preserves the input type,
-  instead of converting to numpy arrays.
-
-
-Documentation improvements
-..........................
-
-- Added example of using :class:`FeatureUnion` for heterogeneous input.
-  By :user:`Matt Terry <mrterry>`
-
-- Documentation on scorers was improved, to highlight the handling of loss
-  functions. By :user:`Matt Pico <MattpSoftware>`.
-
-- A discrepancy between liblinear output and scikit-learn's wrappers
-  is now noted. By `Manoj Kumar`_.
-
-- Improved documentation generation: examples referring to a class or
-  function are now shown in a gallery on the class/function's API reference
-  page. By `Joel Nothman`_.
-
-- More explicit documentation of sample generators and of data
-  transformation. By `Joel Nothman`_.
-
-- :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree`
-  used to point to empty pages stating that they are aliases of BinaryTree.
-  This has been fixed to show the correct class docs. By `Manoj Kumar`_.
-
-- Added silhouette plots for analysis of KMeans clustering using
-  :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`.
-  See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`
-
-Bug fixes
-.........
-- Metaestimators now support ducktyping for the presence of ``decision_function``,
-  ``predict_proba`` and other methods. This fixes behavior of
-  :class:`grid_search.GridSearchCV`,
-  :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`,
-  :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested.
-  By `Joel Nothman`_
-
-- The ``scoring`` attribute of grid-search and cross-validation methods is no longer
-  ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or
-  the base estimator doesn't have predict.
-
-- The function :func:`hierarchical.ward_tree` now returns the children in
-  the same order for both the structured and unstructured versions. By
-  `Matteo Visconti di Oleggio Castello`_.
-
-- :class:`feature_selection.RFECV` now correctly handles cases when
-  ``step`` is not equal to 1. By :user:`Nikolay Mayorov <nmayorov>`
-
-- The :class:`decomposition.PCA` now undoes whitening in its
-  ``inverse_transform``. Also, its ``components_`` now always have unit
-  length. By :user:`Michael Eickenberg <eickenberg>`.
-
-- Fix incomplete download of the dataset when
-  :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_.
-
-- Various fixes to the Gaussian processes subpackage by Vincent Dubourg
-  and Jan Hendrik Metzen.
-
-- Calling ``partial_fit`` with ``class_weight=='auto'`` throws an
-  appropriate error message and suggests a work around.
-  By :user:`Danny Sullivan <dsullivan7>`.
-
-- :class:`RBFSampler <kernel_approximation.RBFSampler>` with ``gamma=g``
-  formerly approximated :func:`rbf_kernel <metrics.pairwise.rbf_kernel>`
-  with ``gamma=g/2.``; the definition of ``gamma`` is now consistent,
-  which may substantially change your results if you use a fixed value.
-  (If you cross-validated over ``gamma``, it probably doesn't matter
-  too much.) By :user:`Dougal Sutherland <dougalsutherland>`.
-
-- Pipeline object delegate the ``classes_`` attribute to the underlying
-  estimator. It allows, for instance, to make bagging of a pipeline object.
-  By `Arnaud Joly`_
-
-- :class:`neighbors.NearestCentroid` now uses the median as the centroid
-  when metric is set to ``manhattan``. It was using the mean before.
-  By `Manoj Kumar`_
-
-- Fix numerical stability issues in :class:`linear_model.SGDClassifier`
-  and :class:`linear_model.SGDRegressor` by clipping large gradients and
-  ensuring that weight decay rescaling is always positive (for large
-  l2 regularization and large learning rate values).
-  By `Olivier Grisel`_
-
-- When `compute_full_tree` is set to "auto", the full tree is
-  built when n_clusters is high and is early stopped when n_clusters is
-  low, while the behavior should be vice-versa in
-  :class:`cluster.AgglomerativeClustering` (and friends).
-  This has been fixed By `Manoj Kumar`_
-
-- Fix lazy centering of data in :func:`linear_model.enet_path` and
-  :func:`linear_model.lasso_path`. It was centered around one. It has
-  been changed to be centered around the origin. By `Manoj Kumar`_
-
-- Fix handling of precomputed affinity matrices in
-  :class:`cluster.AgglomerativeClustering` when using connectivity
-  constraints. By :user:`Cathy Deng <cathydeng>`
-
-- Correct ``partial_fit`` handling of ``class_prior`` for
-  :class:`sklearn.naive_bayes.MultinomialNB` and
-  :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_.
-
-- Fixed a crash in :func:`metrics.precision_recall_fscore_support`
-  when using unsorted ``labels`` in the multi-label setting.
-  By `Andreas Müller`_.
-
-- Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``,
-  ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in
-  :class:`sklearn.neighbors.NearestNeighbors` and family, when the query
-  data is not the same as fit data. By `Manoj Kumar`_.
-
-- Fix log-density calculation in the :class:`mixture.GMM` with
-  tied covariance. By `Will Dawson`_
-
-- Fixed a scaling error in :class:`feature_selection.SelectFdr`
-  where a factor ``n_features`` was missing. By `Andrew Tulloch`_
-
-- Fix zero division in :class:`neighbors.KNeighborsRegressor` and related
-  classes when using distance weighting and having identical data points.
-  By `Garret-R <https://github.com/Garrett-R>`_.
-
-- Fixed round off errors with non positive-definite covariance matrices
-  in GMM. By :user:`Alexis Mignon <AlexisMignon>`.
-
-- Fixed a error in the computation of conditional probabilities in
-  :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_.
-
-- Make the method ``radius_neighbors`` of
-  :class:`neighbors.NearestNeighbors` return the samples lying on the
-  boundary for ``algorithm='brute'``. By `Yan Yi`_.
-
-- Flip sign of ``dual_coef_`` of :class:`svm.SVC`
-  to make it consistent with the documentation and
-  ``decision_function``. By Artem Sobolev.
-
-- Fixed handling of ties in :class:`isotonic.IsotonicRegression`.
-  We now use the weighted average of targets (secondary method). By
-  `Andreas Müller`_ and `Michael Bommarito <http://bommaritollc.com/>`_.
-
-API changes summary
--------------------
-
-- :class:`GridSearchCV <grid_search.GridSearchCV>` and
-  :func:`cross_val_score <cross_validation.cross_val_score>` and other
-  meta-estimators don't convert pandas DataFrames into arrays any more,
-  allowing DataFrame specific operations in custom estimators.
-
-- :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`,
-  :func:`predict_proba_ovr`,
-  :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`,
-  :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc`
-  are deprecated. Use the underlying estimators instead.
-
-- Nearest neighbors estimators used to take arbitrary keyword arguments
-  and pass these to their distance metric. This will no longer be supported
-  in scikit-learn 0.18; use the ``metric_params`` argument instead.
-
-- `n_jobs` parameter of the fit method shifted to the constructor of the
-       LinearRegression class.
-
-- The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier`
-  now returns two probabilities per sample in the multiclass case; this
-  is consistent with other estimators and with the method's documentation,
-  but previous versions accidentally returned only the positive
-  probability. Fixed by Will Lamond and `Lars Buitinck`_.
-
-- Change default value of precompute in :class:`ElasticNet` and :class:`Lasso`
-  to False. Setting precompute to "auto" was found to be slower when
-  n_samples > n_features since the computation of the Gram matrix is
-  computationally expensive and outweighs the benefit of fitting the Gram
-  for just one alpha.
-  ``precompute="auto"`` is now deprecated and will be removed in 0.18
-  By `Manoj Kumar`_.
-
-- Expose ``positive`` option in :func:`linear_model.enet_path` and
-  :func:`linear_model.enet_path` which constrains coefficients to be
-  positive. By `Manoj Kumar`_.
-
-- Users should now supply an explicit ``average`` parameter to
-  :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`,
-  :func:`sklearn.metrics.recall_score` and
-  :func:`sklearn.metrics.precision_score` when performing multiclass
-  or multilabel (i.e. not binary) classification. By `Joel Nothman`_.
-
-- `scoring` parameter for cross validation now accepts `'f1_micro'`,
-  `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification
-  only. Similar changes apply to `'precision'` and `'recall'`.
-  By `Joel Nothman`_.
-
-- The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in
-  :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have
-  been removed. They were deprecated since 0.14
-
-- From now onwards, all estimators will uniformly raise ``NotFittedError``
-  (:class:`utils.validation.NotFittedError`), when any of the ``predict``
-  like methods are called before the model is fit. By `Raghav RV`_.
-
-- Input data validation was refactored for more consistent input
-  validation. The ``check_arrays`` function was replaced by ``check_array``
-  and ``check_X_y``. By `Andreas Müller`_.
-
-- Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``,
-  ``kneighbors_graph`` and ``radius_neighbors_graph`` in
-  :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None,
-  then for every sample this avoids setting the sample itself as the
-  first nearest neighbor. By `Manoj Kumar`_.
-
-- Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph`
-  and :func:`neighbors.radius_neighbors_graph` which has to be explicitly
-  set by the user. If set to True, then the sample itself is considered
-  as the first nearest neighbor.
-
-- `thresh` parameter is deprecated in favor of new `tol` parameter in
-  :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements`
-  section for details. By `Hervé Bredin`_.
-
-- Estimators will treat input with dtype object as numeric when possible.
-  By `Andreas Müller`_
-
-- Estimators now raise `ValueError` consistently when fitted on empty
-  data (less than 1 sample or less than 1 feature for 2D input).
-  By `Olivier Grisel`_.
-
-
-- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`,
-  :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`,
-  :class:`linear_model.PassiveAgressiveClassifier` and
-  :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``.
-
-- :class:`cluster.DBSCAN` now uses a deterministic initialization. The
-  `random_state` parameter is deprecated. By :user:`Erich Schubert <kno10>`.
-
-Code Contributors
------------------
-A. Flaxman, Aaron Schumacher, Aaron Staple, abhishek thakur, Akshay, akshayah3,
-Aldrian Obaja, Alexander Fabisch, Alexandre Gramfort, Alexis Mignon, Anders
-Aagaard, Andreas Mueller, Andreas van Cranenburgh, Andrew Tulloch, Andrew
-Walker, Antony Lee, Arnaud Joly, banilo, Barmaley.exe, Ben Davies, Benedikt
-Koehler, bhsu, Boris Feld, Borja Ayerdi, Boyuan Deng, Brent Pedersen, Brian
-Wignall, Brooke Osborn, Calvin Giles, Cathy Deng, Celeo, cgohlke, chebee7i,
-Christian Stade-Schuldt, Christof Angermueller, Chyi-Kwei Yau, CJ Carey,
-Clemens Brunner, Daiki Aminaka, Dan Blanchard, danfrankj, Danny Sullivan, David
-Fletcher, Dmitrijs Milajevs, Dougal J. Sutherland, Erich Schubert, Fabian
-Pedregosa, Florian Wilhelm, floydsoft, Félix-Antoine Fortin, Gael Varoquaux,
-Garrett-R, Gilles Louppe, gpassino, gwulfs, Hampus Bengtsson, Hamzeh Alsalhi,
-Hanna Wallach, Harry Mavroforakis, Hasil Sharma, Helder, Herve Bredin,
-Hsiang-Fu Yu, Hugues SALAMIN, Ian Gilmore, Ilambharathi Kanniah, Imran Haque,
-isms, Jake VanderPlas, Jan Dlabal, Jan Hendrik Metzen, Jatin Shah, Javier López
-Peña, jdcaballero, Jean Kossaifi, Jeff Hammerbacher, Joel Nothman, Jonathan
-Helmus, Joseph, Kaicheng Zhang, Kevin Markham, Kyle Beauchamp, Kyle Kastner,
-Lagacherie Matthieu, Lars Buitinck, Laurent Direr, leepei, Loic Esteve, Luis
-Pedro Coelho, Lukas Michelbacher, maheshakya, Manoj Kumar, Manuel, Mario
-Michael Krell, Martin, Martin Billinger, Martin Ku, Mateusz Susik, Mathieu
-Blondel, Matt Pico, Matt Terry, Matteo Visconti dOC, Matti Lyra, Max Linke,
-Mehdi Cherti, Michael Bommarito, Michael Eickenberg, Michal Romaniuk, MLG,
-mr.Shu, Nelle Varoquaux, Nicola Montecchio, Nicolas, Nikolay Mayorov, Noel
-Dawe, Okal Billy, Olivier Grisel, Óscar Nájera, Paolo Puggioni, Peter
-Prettenhofer, Pratap Vardhan, pvnguyen, queqichao, Rafael Carrascosa, Raghav R
-V, Rahiel Kasim, Randall Mason, Rob Zinkov, Robert Bradshaw, Saket Choudhary,
-Sam Nicholls, Samuel Charron, Saurabh Jha, sethdandridge, sinhrks, snuderl,
-Stefan Otte, Stefan van der Walt, Steve Tjoa, swu, Sylvain Zimmer, tejesh95,
-terrycojones, Thomas Delteil, Thomas Unterthiner, Tomas Kazmar, trevorstephens,
-tttthomasssss, Tzu-Ming Kuo, ugurcaliskan, ugurthemaster, Vinayak Mehta,
-Vincent Dubourg, Vjacheslav Murashkin, Vlad Niculae, wadawson, Wei Xue, Will
-Lamond, Wu Jiang, x0l, Xinfan Meng, Yan Yi, Yu-Chin
-
-.. _changes_0_15_2:
-
-Version 0.15.2
-==============
-
-**September 4, 2014**
-
-Bug fixes
----------
-
-- Fixed handling of the ``p`` parameter of the Minkowski distance that was
-  previously ignored in nearest neighbors models. By :user:`Nikolay
-  Mayorov <nmayorov>`.
-
-- Fixed duplicated alphas in :class:`linear_model.LassoLars` with early
-  stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_.
-
-- Fixed the build under Windows when scikit-learn is built with MSVC while
-  NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico
-  Vaggi <FedericoV>`.
-
-- Fixed an array index overflow bug in the coordinate descent solver. By
-  `Gael Varoquaux`_.
-
-- Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_.
-
-- Removed unnecessary data copy in :class:`cluster.KMeans`.
-  By `Gael Varoquaux`_.
-
-- Explicitly close open files to avoid ``ResourceWarnings`` under Python 3.
-  By Calvin Giles.
-
-- The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis`
-  now projects the input on the most discriminant directions. By Martin Billinger.
-
-- Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_.
-
-- Performance optimization in :class:`isotonic.IsotonicRegression`.
-  By Robert Bradshaw.
-
-- ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for
-  running the tests. By `Joel Nothman`_.
-
-- Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_
-  :user:`Matt Pico <MattpSoftware>`, and others.
-
-.. _changes_0_15_1:
-
-Version 0.15.1
-==============
-
-**August 1, 2014**
-
-Bug fixes
----------
-
-- Made :func:`cross_validation.cross_val_score` use
-  :class:`cross_validation.KFold` instead of
-  :class:`cross_validation.StratifiedKFold` on multi-output classification
-  problems. By :user:`Nikolay Mayorov <nmayorov>`.
-
-- Support unseen labels :class:`preprocessing.LabelBinarizer` to restore
-  the default behavior of 0.14.1 for backward compatibility. By
-  :user:`Hamzeh Alsalhi <hamsal>`.
-
-- Fixed the :class:`cluster.KMeans` stopping criterion that prevented early
-  convergence detection. By Edward Raff and `Gael Varoquaux`_.
-
-- Fixed the behavior of :class:`multiclass.OneVsOneClassifier`.
-  in case of ties at the per-class vote level by computing the correct
-  per-class sum of prediction scores. By `Andreas Müller`_.
-
-- Made :func:`cross_validation.cross_val_score` and
-  :class:`grid_search.GridSearchCV` accept Python lists as input data.
-  This is especially useful for cross-validation and model selection of
-  text processing pipelines. By `Andreas Müller`_.
-
-- Fixed data input checks of most estimators to accept input data that
-  implements the NumPy ``__array__`` protocol. This is the case for
-  for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of
-  pandas. By `Gael Varoquaux`_.
-
-- Fixed a regression for :class:`linear_model.SGDClassifier` with
-  ``class_weight="auto"`` on data with non-contiguous labels. By
-  `Olivier Grisel`_.
-
-
-.. _changes_0_15:
-
-Version 0.15
-============
-
-**July 15, 2014**
-
-Highlights
------------
-
-- Many speed and memory improvements all across the code
-
-- Huge speed and memory improvements to random forests (and extra
-  trees) that also benefit better from parallel computing.
-
-- Incremental fit to :class:`BernoulliRBM <neural_network.BernoulliRBM>`
-
-- Added :class:`cluster.AgglomerativeClustering` for hierarchical
-  agglomerative clustering with average linkage, complete linkage and
-  ward strategies.
-
-- Added :class:`linear_model.RANSACRegressor` for robust regression
-  models.
-
-- Added dimensionality reduction with :class:`manifold.TSNE` which can be
-  used to visualize high-dimensional data.
-
-
-Changelog
----------
-
-New features
-............
-
-- Added :class:`ensemble.BaggingClassifier` and
-  :class:`ensemble.BaggingRegressor` meta-estimators for ensembling
-  any kind of base estimator. See the :ref:`Bagging <bagging>` section of
-  the user guide for details and examples. By `Gilles Louppe`_.
-
-- New unsupervised feature selection algorithm
-  :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_.
-
-- Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust
-  fitting of regression models. By :user:`Johannes Schönberger <ahojnnes>`.
-
-- Added :class:`cluster.AgglomerativeClustering` for hierarchical
-  agglomerative clustering with average linkage, complete linkage and
-  ward strategies, by  `Nelle Varoquaux`_ and `Gael Varoquaux`_.
-
-- Shorthand constructors :func:`pipeline.make_pipeline` and
-  :func:`pipeline.make_union` were added by `Lars Buitinck`_.
-
-- Shuffle option for :class:`cross_validation.StratifiedKFold`.
-  By :user:`Jeffrey Blackburne <jblackburne>`.
-
-- Incremental learning (``partial_fit``) for Gaussian Naive Bayes by
-  Imran Haque.
-
-- Added ``partial_fit`` to :class:`BernoulliRBM
-  <neural_network.BernoulliRBM>`
-  By :user:`Danny Sullivan <dsullivan7>`.
-
-- Added :func:`learning_curve <learning_curve.learning_curve>` utility to
-  chart performance with respect to training size. See
-  :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch.
-
-- Add positive option in :class:`LassoCV <linear_model.LassoCV>` and
-  :class:`ElasticNetCV <linear_model.ElasticNetCV>`.
-  By Brian Wignall and `Alexandre Gramfort`_.
-
-- Added :class:`linear_model.MultiTaskElasticNetCV` and
-  :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_.
-
-- Added :class:`manifold.TSNE`. By Alexander Fabisch.
-
-Enhancements
-............
-
-- Add sparse input support to :class:`ensemble.AdaBoostClassifier` and
-  :class:`ensemble.AdaBoostRegressor` meta-estimators.
-  By :user:`Hamzeh Alsalhi <hamsal>`.
-
-- Memory improvements of decision trees, by `Arnaud Joly`_.
-
-- Decision trees can now be built in best-first manner by using ``max_leaf_nodes``
-  as the stopping criteria. Refactored the tree code to use either a
-  stack or a priority queue for tree building.
-  By `Peter Prettenhofer`_ and `Gilles Louppe`_.
-
-- Decision trees can now be fitted on fortran- and c-style arrays, and
-  non-continuous arrays without the need to make a copy.
-  If the input array has a different dtype than ``np.float32``, a fortran-
-  style copy will be made since fortran-style memory layout has speed
-  advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_.
-
-- Speed improvement of regression trees by optimizing the
-  the computation of the mean square error criterion. This lead
-  to speed improvement of the tree, forest and gradient boosting tree
-  modules. By `Arnaud Joly`_
-
-- The ``img_to_graph`` and ``grid_tograph`` functions in
-  :mod:`sklearn.feature_extraction.image` now return ``np.ndarray``
-  instead of ``np.matrix`` when ``return_as=np.ndarray``.  See the
-  Notes section for more information on compatibility.
-
-- Changed the internal storage of decision trees to use a struct array.
-  This fixed some small bugs, while improving code and providing a small
-  speed gain. By `Joel Nothman`_.
-
-- Reduce memory usage and overhead when fitting and predicting with forests
-  of randomized trees in parallel with ``n_jobs != 1`` by leveraging new
-  threading backend of joblib 0.8 and releasing the GIL in the tree fitting
-  Cython code.  By `Olivier Grisel`_ and `Gilles Louppe`_.
-
-- Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module.
-  By `Gilles Louppe`_ and `Peter Prettenhofer`_.
-
-- Various enhancements to the  :mod:`sklearn.ensemble.gradient_boosting`
-  module: a ``warm_start`` argument to fit additional trees,
-  a ``max_leaf_nodes`` argument to fit GBM style trees,
-  a ``monitor`` fit argument to inspect the estimator during training, and
-  refactoring of the verbose code. By `Peter Prettenhofer`_.
-
-- Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values.
-  By `Arnaud Joly`_.
-
-- Faster depth-based tree building algorithm such as decision tree,
-  random forest, extra trees or gradient tree boosting (with depth based
-  growing strategy) by avoiding trying to split on found constant features
-  in the sample subset. By `Arnaud Joly`_.
-
-- Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based
-  methods: the minimum weighted fraction of the input samples required to be
-  at a leaf node. By `Noel Dawe`_.
-
-- Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais.
-
-- Added predict method to :class:`cluster.AffinityPropagation` and
-  :class:`cluster.MeanShift`, by `Mathieu Blondel`_.
-
-- Vector and matrix multiplications have been optimised throughout the
-  library by `Denis Engemann`_, and `Alexandre Gramfort`_.
-  In particular, they should take less memory with older NumPy versions
-  (prior to 1.7.2).
-
-- Precision-recall and ROC examples now use train_test_split, and have more
-  explanation of why these metrics are useful. By `Kyle Kastner`_
-
-- The training algorithm for :class:`decomposition.NMF` is faster for
-  sparse matrices and has much lower memory complexity, meaning it will
-  scale up gracefully to large datasets. By `Lars Buitinck`_.
-
-- Added svd_method option with default value to "randomized" to
-  :class:`decomposition.FactorAnalysis` to save memory and
-  significantly speedup computation by `Denis Engemann`_, and
-  `Alexandre Gramfort`_.
-
-- Changed :class:`cross_validation.StratifiedKFold` to try and
-  preserve as much of the original ordering of samples as possible so as
-  not to hide overfitting on datasets with a non-negligible level of
-  samples dependency.
-  By `Daniel Nouri`_ and `Olivier Grisel`_.
-
-- Add multi-output support to :class:`gaussian_process.GaussianProcess`
-  by John Novak.
-
-- Support for precomputed distance matrices in nearest neighbor estimators
-  by `Robert Layton`_ and `Joel Nothman`_.
-
-- Norm computations optimized for NumPy 1.6 and later versions by
-  `Lars Buitinck`_. In particular, the k-means algorithm no longer
-  needs a temporary data structure the size of its input.
-
-- :class:`dummy.DummyClassifier` can now be used to predict a constant
-  output value. By `Manoj Kumar`_.
-
-- :class:`dummy.DummyRegressor` has now a strategy parameter which allows
-  to predict the mean, the median of the training set or a constant
-  output value. By :user:`Maheshakya Wijewardena <maheshakya>`.
-
-- Multi-label classification output in multilabel indicator format
-  is now supported by :func:`metrics.roc_auc_score` and
-  :func:`metrics.average_precision_score` by `Arnaud Joly`_.
-
-- Significant performance improvements (more than 100x speedup for
-  large problems) in :class:`isotonic.IsotonicRegression` by
-  `Andrew Tulloch`_.
-
-- Speed and memory usage improvements to the SGD algorithm for linear
-  models: it now uses threads, not separate processes, when ``n_jobs>1``.
-  By `Lars Buitinck`_.
-
-- Grid search and cross validation allow NaNs in the input arrays so that
-  preprocessors such as :class:`preprocessing.Imputer
-  <preprocessing.Imputer>` can be trained within the cross validation loop,
-  avoiding potentially skewed results.
-
-- Ridge regression can now deal with sample weights in feature space
-  (only sample space until then). By :user:`Michael Eickenberg <eickenberg>`.
-  Both solutions are provided by the Cholesky solver.
-
-- Several classification and regression metrics now support weighted
-  samples with the new ``sample_weight`` argument:
-  :func:`metrics.accuracy_score`,
-  :func:`metrics.zero_one_loss`,
-  :func:`metrics.precision_score`,
-  :func:`metrics.average_precision_score`,
-  :func:`metrics.f1_score`,
-  :func:`metrics.fbeta_score`,
-  :func:`metrics.recall_score`,
-  :func:`metrics.roc_auc_score`,
-  :func:`metrics.explained_variance_score`,
-  :func:`metrics.mean_squared_error`,
-  :func:`metrics.mean_absolute_error`,
-  :func:`metrics.r2_score`.
-  By `Noel Dawe`_.
-
-- Speed up of the sample generator
-  :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_.
-
-Documentation improvements
-...........................
-
-- The :ref:`Working With Text Data <text_data_tutorial>` tutorial
-  has now been worked in to the main documentation's tutorial section.
-  Includes exercises and skeletons for tutorial presentation.
-  Original tutorial created by several authors including
-  `Olivier Grisel`_, Lars Buitinck and many others.
-  Tutorial integration into the scikit-learn documentation
-  by `Jaques Grobler`_
-
-- Added :ref:`Computational Performance <computational_performance>`
-  documentation. Discussion and examples of prediction latency / throughput
-  and different factors that have influence over speed. Additional tips for
-  building faster models and choosing a relevant compromise between speed
-  and predictive power.
-  By :user:`Eustache Diemert <oddskool>`.
-
-Bug fixes
-.........
-
-- Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` :
-  ``partial_fit`` was not working properly.
-
-- Fixed bug in :class:`linear_model.stochastic_gradient` :
-  ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` .
-
-- Fixed bug in :class:`multiclass.OneVsOneClassifier` with string
-  labels
-
-- Fixed a bug in :class:`LassoCV <linear_model.LassoCV>` and
-  :class:`ElasticNetCV <linear_model.ElasticNetCV>`: they would not
-  pre-compute the Gram matrix with ``precompute=True`` or
-  ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_.
-
-- Fixed incorrect estimation of the degrees of freedom in
-  :func:`feature_selection.f_regression` when variates are not centered.
-  By :user:`Virgile Fritsch <VirgileFritsch>`.
-
-- Fixed a race condition in parallel processing with
-  ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``).
-  By `Olivier Grisel`_.
-
-- Raise error in :class:`cluster.FeatureAgglomeration` and
-  :class:`cluster.WardAgglomeration` when no samples are given,
-  rather than returning meaningless clustering.
-
-- Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with
-  ``loss='huber'``: ``gamma`` might have not been initialized.
-
-- Fixed feature importances as computed with a forest of randomized trees
-  when fit with ``sample_weight != None`` and/or with ``bootstrap=True``.
-  By `Gilles Louppe`_.
-
-API changes summary
--------------------
-
-- :mod:`sklearn.hmm` is deprecated. Its removal is planned
-  for the 0.17 release.
-
-- Use of :class:`covariance.EllipticEnvelop` has now been removed after
-  deprecation.
-  Please use :class:`covariance.EllipticEnvelope` instead.
-
-- :class:`cluster.Ward` is deprecated. Use
-  :class:`cluster.AgglomerativeClustering` instead.
-
-- :class:`cluster.WardClustering` is deprecated. Use
-- :class:`cluster.AgglomerativeClustering` instead.
-
-- :class:`cross_validation.Bootstrap` is deprecated.
-  :class:`cross_validation.KFold` or
-  :class:`cross_validation.ShuffleSplit` are recommended instead.
-
-- Direct support for the sequence of sequences (or list of lists) multilabel
-  format is deprecated. To convert to and from the supported binary
-  indicator matrix format, use
-  :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
-  By `Joel Nothman`_.
-
-- Add score method to :class:`PCA <decomposition.PCA>` following the model of
-  probabilistic PCA and deprecate
-  :class:`ProbabilisticPCA <decomposition.ProbabilisticPCA>` model whose
-  score implementation is not correct. The computation now also exploits the
-  matrix inversion lemma for faster computation. By `Alexandre Gramfort`_.
-
-- The score method of :class:`FactorAnalysis <decomposition.FactorAnalysis>`
-  now returns the average log-likelihood of the samples. Use score_samples
-  to get log-likelihood of each sample. By `Alexandre Gramfort`_.
-
-- Generating boolean masks (the setting ``indices=False``)
-  from cross-validation generators is deprecated.
-  Support for masks will be removed in 0.17.
-  The generators have produced arrays of indices by default since 0.10.
-  By `Joel Nothman`_.
-
-- 1-d arrays containing strings with ``dtype=object`` (as used in Pandas)
-  are now considered valid classification targets. This fixes a regression
-  from version 0.13 in some classifiers. By `Joel Nothman`_.
-
-- Fix wrong ``explained_variance_ratio_`` attribute in
-  :class:`RandomizedPCA <decomposition.RandomizedPCA>`.
-  By `Alexandre Gramfort`_.
-
-- Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in
-  :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`.
-  This changes the shape of ``alphas_`` from ``(n_alphas,)`` to
-  ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like
-  object of length greater than one.
-  By `Manoj Kumar`_.
-
-- Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`
-  when fitting intercept and input data is sparse. The automatic grid
-  of alphas was not computed correctly and the scaling with normalize
-  was wrong. By `Manoj Kumar`_.
-
-- Fix wrong maximal number of features drawn (``max_features``) at each split
-  for decision trees, random forests and gradient tree boosting.
-  Previously, the count for the number of drawn features started only after
-  one non constant features in the split. This bug fix will affect
-  computational and generalization performance of those algorithms in the
-  presence of constant features. To get back previous generalization
-  performance, you should modify the value of ``max_features``.
-  By `Arnaud Joly`_.
-
-- Fix wrong maximal number of features drawn (``max_features``) at each split
-  for :class:`ensemble.ExtraTreesClassifier` and
-  :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant
-  features in the split was counted as drawn. Now constant features are
-  counted as drawn. Furthermore at least one feature must be non constant
-  in order to make a valid split. This bug fix will affect
-  computational and generalization performance of extra trees in the
-  presence of constant features. To get back previous generalization
-  performance, you should modify the value of ``max_features``.
-  By `Arnaud Joly`_.
-
-- Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``.
-  Previously it was broken for input of non-integer ``dtype`` and the
-  weighted array that was returned was wrong. By `Manoj Kumar`_.
-
-- Fix :class:`cross_validation.Bootstrap` to return ``ValueError``
-  when ``n_train + n_test > n``. By :user:`Ronald Phlypo <rphlypo>`.
-
-
-People
-------
-
-List of contributors for release 0.15 by number of commits.
-
-* 312	Olivier Grisel
-* 275	Lars Buitinck
-* 221	Gael Varoquaux
-* 148	Arnaud Joly
-* 134	Johannes Schönberger
-* 119	Gilles Louppe
-* 113	Joel Nothman
-* 111	Alexandre Gramfort
-*  95	Jaques Grobler
-*  89	Denis Engemann
-*  83	Peter Prettenhofer
-*  83	Alexander Fabisch
-*  62	Mathieu Blondel
-*  60	Eustache Diemert
-*  60	Nelle Varoquaux
-*  49	Michael Bommarito
-*  45	Manoj-Kumar-S
-*  28	Kyle Kastner
-*  26	Andreas Mueller
-*  22	Noel Dawe
-*  21	Maheshakya Wijewardena
-*  21	Brooke Osborn
-*  21	Hamzeh Alsalhi
-*  21	Jake VanderPlas
-*  21	Philippe Gervais
-*  19	Bala Subrahmanyam Varanasi
-*  12	Ronald Phlypo
-*  10	Mikhail Korobov
-*   8	Thomas Unterthiner
-*   8	Jeffrey Blackburne
-*   8	eltermann
-*   8	bwignall
-*   7	Ankit Agrawal
-*   7	CJ Carey
-*   6	Daniel Nouri
-*   6	Chen Liu
-*   6	Michael Eickenberg
-*   6	ugurthemaster
-*   5	Aaron Schumacher
-*   5	Baptiste Lagarde
-*   5	Rajat Khanduja
-*   5	Robert McGibbon
-*   5	Sergio Pascual
-*   4	Alexis Metaireau
-*   4	Ignacio Rossi
-*   4	Virgile Fritsch
-*   4	Sebastian Säger
-*   4	Ilambharathi Kanniah
-*   4	sdenton4
-*   4	Robert Layton
-*   4	Alyssa
-*   4	Amos Waterland
-*   3	Andrew Tulloch
-*   3	murad
-*   3	Steven Maude
-*   3	Karol Pysniak
-*   3	Jacques Kvam
-*   3	cgohlke
-*   3	cjlin
-*   3	Michael Becker
-*   3	hamzeh
-*   3	Eric Jacobsen
-*   3	john collins
-*   3	kaushik94
-*   3	Erwin Marsi
-*   2	csytracy
-*   2	LK
-*   2	Vlad Niculae
-*   2	Laurent Direr
-*   2	Erik Shilts
-*   2	Raul Garreta
-*   2	Yoshiki Vázquez Baeza
-*   2	Yung Siang Liau
-*   2	abhishek thakur
-*   2	James Yu
-*   2	Rohit Sivaprasad
-*   2	Roland Szabo
-*   2	amormachine
-*   2	Alexis Mignon
-*   2	Oscar Carlsson
-*   2	Nantas Nardelli
-*   2	jess010
-*   2	kowalski87
-*   2	Andrew Clegg
-*   2	Federico Vaggi
-*   2	Simon Frid
-*   2	Félix-Antoine Fortin
-*   1	Ralf Gommers
-*   1	t-aft
-*   1	Ronan Amicel
-*   1	Rupesh Kumar Srivastava
-*   1	Ryan Wang
-*   1	Samuel Charron
-*   1	Samuel St-Jean
-*   1	Fabian Pedregosa
-*   1	Skipper Seabold
-*   1	Stefan Walk
-*   1	Stefan van der Walt
-*   1	Stephan Hoyer
-*   1	Allen Riddell
-*   1	Valentin Haenel
-*   1	Vijay Ramesh
-*   1	Will Myers
-*   1	Yaroslav Halchenko
-*   1	Yoni Ben-Meshulam
-*   1	Yury V. Zaytsev
-*   1	adrinjalali
-*   1	ai8rahim
-*   1	alemagnani
-*   1	alex
-*   1	benjamin wilson
-*   1	chalmerlowe
-*   1	dzikie drożdże
-*   1	jamestwebber
-*   1	matrixorz
-*   1	popo
-*   1	samuela
-*   1	François Boulogne
-*   1	Alexander Measure
-*   1	Ethan White
-*   1	Guilherme Trein
-*   1	Hendrik Heuer
-*   1	IvicaJovic
-*   1	Jan Hendrik Metzen
-*   1	Jean Michel Rouly
-*   1	Eduardo Ariño de la Rubia
-*   1	Jelle Zijlstra
-*   1	Eddy L O Jansson
-*   1	Denis
-*   1	John
-*   1	John Schmidt
-*   1	Jorge Cañardo Alastuey
-*   1	Joseph Perla
-*   1	Joshua Vredevoogd
-*   1	José Ricardo
-*   1	Julien Miotte
-*   1	Kemal Eren
-*   1	Kenta Sato
-*   1	David Cournapeau
-*   1	Kyle Kelley
-*   1	Daniele Medri
-*   1	Laurent Luce
-*   1	Laurent Pierron
-*   1	Luis Pedro Coelho
-*   1	DanielWeitzenfeld
-*   1	Craig Thompson
-*   1	Chyi-Kwei Yau
-*   1	Matthew Brett
-*   1	Matthias Feurer
-*   1	Max Linke
-*   1	Chris Filo Gorgolewski
-*   1	Charles Earl
-*   1	Michael Hanke
-*   1	Michele Orrù
-*   1	Bryan Lunt
-*   1	Brian Kearns
-*   1	Paul Butler
-*   1	Paweł Mandera
-*   1	Peter
-*   1	Andrew Ash
-*   1	Pietro Zambelli
-*   1	staubda
-
-
-.. _changes_0_14:
-
-Version 0.14
-===============
-
-**August 7, 2013**
-
-Changelog
----------
-
-- Missing values with sparse and dense matrices can be imputed with the
-  transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_.
-
-- The core implementation of decisions trees has been rewritten from
-  scratch, allowing for faster tree induction and lower memory
-  consumption in all tree-based estimators. By `Gilles Louppe`_.
-
-- Added :class:`ensemble.AdaBoostClassifier` and
-  :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_  and
-  `Gilles Louppe`_. See the :ref:`AdaBoost <adaboost>` section of the user
-  guide for details and examples.
-
-- Added :class:`grid_search.RandomizedSearchCV` and
-  :class:`grid_search.ParameterSampler` for randomized hyperparameter
-  optimization. By `Andreas Müller`_.
-
-- Added :ref:`biclustering <biclustering>` algorithms
-  (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and
-  :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data
-  generation methods (:func:`sklearn.datasets.make_biclusters` and
-  :func:`sklearn.datasets.make_checkerboard`), and scoring metrics
-  (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_.
-
-- Added :ref:`Restricted Boltzmann Machines<rbm>`
-  (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_.
-
-- Python 3 support by :user:`Justin Vincent <justinvf>`, `Lars Buitinck`_,
-  :user:`Subhodeep Moitra <smoitra87>` and `Olivier Grisel`_. All tests now pass under
-  Python 3.3.
-
-- Ability to pass one penalty (alpha value) per target in
-  :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_.
-
-- Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization
-  issue (minor practical significance).
-  By :user:`Norbert Crombach <norbert>` and `Mathieu Blondel`_ .
-
-- Added an interactive version of `Andreas Müller`_'s
-  `Machine Learning Cheat Sheet (for scikit-learn)
-  <http://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html>`_
-  to the documentation. See :ref:`Choosing the right estimator <ml_map>`.
-  By `Jaques Grobler`_.
-
-- :class:`grid_search.GridSearchCV` and
-  :func:`cross_validation.cross_val_score` now support the use of advanced
-  scoring function such as area under the ROC curve and f-beta scores.
-  See :ref:`scoring_parameter` for details. By `Andreas Müller`_
-  and `Lars Buitinck`_.
-  Passing a function from :mod:`sklearn.metrics` as ``score_func`` is
-  deprecated.
-
-- Multi-label classification output is now supported by
-  :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`,
-  :func:`metrics.f1_score`, :func:`metrics.fbeta_score`,
-  :func:`metrics.classification_report`,
-  :func:`metrics.precision_score` and :func:`metrics.recall_score`
-  by `Arnaud Joly`_.
-
-- Two new metrics :func:`metrics.hamming_loss` and
-  :func:`metrics.jaccard_similarity_score`
-  are added with multi-label support by `Arnaud Joly`_.
-
-- Speed and memory usage improvements in
-  :class:`feature_extraction.text.CountVectorizer` and
-  :class:`feature_extraction.text.TfidfVectorizer`,
-  by Jochen Wersdörfer and Roman Sinayev.
-
-- The ``min_df`` parameter in
-  :class:`feature_extraction.text.CountVectorizer` and
-  :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2,
-  has been reset to 1 to avoid unpleasant surprises (empty vocabularies)
-  for novice users who try it out on tiny document collections.
-  A value of at least 2 is still recommended for practical use.
-
-- :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and
-  :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that
-  converts their ``coef_`` into a sparse matrix, meaning stored models
-  trained using these estimators can be made much more compact.
-
-- :class:`linear_model.SGDClassifier` now produces multiclass probability
-  estimates when trained under log loss or modified Huber loss.
-
-- Hyperlinks to documentation in example code on the website by
-  :user:`Martin Luessi <mluessi>`.
-
-- Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling
-  of the features for non-default ``feature_range`` settings. By `Andreas
-  Müller`_.
-
-- ``max_features`` in :class:`tree.DecisionTreeClassifier`,
-  :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
-  now supports percentage values. By `Gilles Louppe`_.
-
-- Performance improvements in :class:`isotonic.IsotonicRegression` by
-  `Nelle Varoquaux`_.
-
-- :func:`metrics.accuracy_score` has an option normalize to return
-  the fraction or the number of correctly classified sample
-  by `Arnaud Joly`_.
-
-- Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy
-  loss. By Jochen Wersdörfer and `Lars Buitinck`_.
-
-- A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output
-  incorrect probabilities has been fixed.
-
-- Feature selectors now share a mixin providing consistent ``transform``,
-  ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_.
-
-- A fitted :class:`grid_search.GridSearchCV` or
-  :class:`grid_search.RandomizedSearchCV` can now generally be pickled.
-  By `Joel Nothman`_.
-
-- Refactored and vectorized implementation of :func:`metrics.roc_curve`
-  and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_.
-
-- The new estimator :class:`sklearn.decomposition.TruncatedSVD`
-  performs dimensionality reduction using SVD on sparse matrices,
-  and can be used for latent semantic analysis (LSA).
-  By `Lars Buitinck`_.
-
-- Added self-contained example of out-of-core learning on text data
-  :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`.
-  By :user:`Eustache Diemert <oddskool>`.
-
-- The default number of components for
-  :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented
-  to be ``n_features``. This was the default behavior, so programs using it
-  will continue to work as they did.
-
-- :class:`sklearn.cluster.KMeans` now fits several orders of magnitude
-  faster on sparse data (the speedup depends on the sparsity). By
-  `Lars Buitinck`_.
-
-- Reduce memory footprint of FastICA by `Denis Engemann`_ and
-  `Alexandre Gramfort`_.
-
-- Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses
-  a column format and prints progress in decreasing frequency.
-  It also shows the remaining time. By `Peter Prettenhofer`_.
-
-- :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement
-  :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_`
-  rather than the OOB score for model selection. An example that shows
-  how to use OOB estimates to select the number of trees was added.
-  By `Peter Prettenhofer`_.
-
-- Most metrics now support string labels for multiclass classification
-  by `Arnaud Joly`_ and `Lars Buitinck`_.
-
-- New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_
-  and `Vlad Niculae`_.
-
-- Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the
-  'alphas' parameter now works as expected when given a list of
-  values. By Philippe Gervais.
-
-- Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV`
-  that prevented all folds provided by a CV object to be used (only
-  the first 3 were used). When providing a CV object, execution
-  time may thus increase significantly compared to the previous
-  version (bug results are correct now). By Philippe Gervais.
-
-- :class:`cross_validation.cross_val_score` and the :mod:`grid_search`
-  module is now tested with multi-output data by `Arnaud Joly`_.
-
-- :func:`datasets.make_multilabel_classification` can now return
-  the output in label indicator multilabel format  by `Arnaud Joly`_.
-
-- K-nearest neighbors, :class:`neighbors.KNeighborsRegressor`
-  and :class:`neighbors.RadiusNeighborsRegressor`,
-  and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and
-  :class:`neighbors.RadiusNeighborsClassifier` support multioutput data
-  by `Arnaud Joly`_.
-
-- Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`,
-  :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be
-  controlled.  This is useful to ensure consistency in the probability
-  estimates for the classifiers trained with ``probability=True``. By
-  `Vlad Niculae`_.
-
-- Out-of-core learning support for discrete naive Bayes classifiers
-  :class:`sklearn.naive_bayes.MultinomialNB` and
-  :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit``
-  method by `Olivier Grisel`_.
-
-- New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_,
-  Vincent Michel and `Andreas Müller`_.
-
-- Improved documentation on :ref:`multi-class, multi-label and multi-output
-  classification <multiclass>` by `Yannick Schwartz`_ and `Arnaud Joly`_.
-
-- Better input and error handling in the :mod:`metrics` module by
-  `Arnaud Joly`_ and `Joel Nothman`_.
-
-- Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov <kmike>`
-
-- Significant speed improvements for :class:`sklearn.cluster.DBSCAN`
-  by `cleverless <https://github.com/cleverless>`_
-
-
-API changes summary
--------------------
-
-- The :func:`auc_score` was renamed :func:`roc_auc_score`.
-
-- Testing scikit-learn with ``sklearn.test()`` is deprecated. Use
-  ``nosetests sklearn`` from the command line.
-
-- Feature importances in :class:`tree.DecisionTreeClassifier`,
-  :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
-  are now computed on the fly when accessing  the ``feature_importances_``
-  attribute. Setting ``compute_importances=True`` is no longer required.
-  By `Gilles Louppe`_.
-
-- :class:`linear_model.lasso_path` and
-  :class:`linear_model.enet_path` can return its results in the same
-  format as that of :class:`linear_model.lars_path`. This is done by
-  setting the ``return_models`` parameter to ``False``. By
-  `Jaques Grobler`_ and `Alexandre Gramfort`_
-
-- :class:`grid_search.IterGrid` was renamed to
-  :class:`grid_search.ParameterGrid`.
-
-- Fixed bug in :class:`KFold` causing imperfect class balance in some
-  cases. By `Alexandre Gramfort`_ and Tadej Janež.
-
-- :class:`sklearn.neighbors.BallTree` has been refactored, and a
-  :class:`sklearn.neighbors.KDTree` has been
-  added which shares the same interface.  The Ball Tree now works with
-  a wide variety of distance metrics.  Both classes have many new
-  methods, including single-tree and dual-tree queries, breadth-first
-  and depth-first searching, and more advanced queries such as
-  kernel density estimation and 2-point correlation functions.
-  By `Jake Vanderplas`_
-
-- Support for scipy.spatial.cKDTree within neighbors queries has been
-  removed, and the functionality replaced with the new :class:`KDTree`
-  class.
-
-- :class:`sklearn.neighbors.KernelDensity` has been added, which performs
-  efficient kernel density estimation with a variety of kernels.
-
-- :class:`sklearn.decomposition.KernelPCA` now always returns output with
-  ``n_components`` components, unless the new parameter ``remove_zero_eig``
-  is set to ``True``. This new behavior is consistent with the way
-  kernel PCA was always documented; previously, the removal of components
-  with zero eigenvalues was tacitly performed on all data.
-
-- ``gcv_mode="auto"`` no longer tries to perform SVD on a densified
-  sparse matrix in :class:`sklearn.linear_model.RidgeCV`.
-
-- Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA`
-  is now deprecated in favor of the new ``TruncatedSVD``.
-
-- :class:`cross_validation.KFold` and
-  :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2`
-  otherwise a ``ValueError`` is raised. By `Olivier Grisel`_.
-
-- :func:`datasets.load_files`'s ``charset`` and ``charset_errors``
-  parameters were renamed ``encoding`` and ``decode_errors``.
-
-- Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor`
-  and :class:`sklearn.ensemble.GradientBoostingClassifier`
-  is deprecated and has been replaced by ``oob_improvement_`` .
-
-- Attributes in OrthogonalMatchingPursuit have been deprecated
-  (copy_X, Gram, ...) and precompute_gram renamed precompute
-  for consistency. See #2224.
-
-- :class:`sklearn.preprocessing.StandardScaler` now converts integer input
-  to float, and raises a warning. Previously it rounded for dense integer
-  input.
-
-- :class:`sklearn.multiclass.OneVsRestClassifier` now has a
-  ``decision_function`` method. This will return the distance of each
-  sample from the decision boundary for each class, as long as the
-  underlying estimators implement the ``decision_function`` method.
-  By `Kyle Kastner`_.
-
-- Better input validation, warning on unexpected shapes for y.
-
-People
-------
-List of contributors for release 0.14 by number of commits.
-
- * 277  Gilles Louppe
- * 245  Lars Buitinck
- * 187  Andreas Mueller
- * 124  Arnaud Joly
- * 112  Jaques Grobler
- * 109  Gael Varoquaux
- * 107  Olivier Grisel
- * 102  Noel Dawe
- *  99  Kemal Eren
- *  79  Joel Nothman
- *  75  Jake VanderPlas
- *  73  Nelle Varoquaux
- *  71  Vlad Niculae
- *  65  Peter Prettenhofer
- *  64  Alexandre Gramfort
- *  54  Mathieu Blondel
- *  38  Nicolas Trésegnie
- *  35  eustache
- *  27  Denis Engemann
- *  25  Yann N. Dauphin
- *  19  Justin Vincent
- *  17  Robert Layton
- *  15  Doug Coleman
- *  14  Michael Eickenberg
- *  13  Robert Marchman
- *  11  Fabian Pedregosa
- *  11  Philippe Gervais
- *  10  Jim Holmström
- *  10  Tadej Janež
- *  10  syhw
- *   9  Mikhail Korobov
- *   9  Steven De Gryze
- *   8  sergeyf
- *   7  Ben Root
- *   7  Hrishikesh Huilgolkar
- *   6  Kyle Kastner
- *   6  Martin Luessi
- *   6  Rob Speer
- *   5  Federico Vaggi
- *   5  Raul Garreta
- *   5  Rob Zinkov
- *   4  Ken Geis
- *   3  A. Flaxman
- *   3  Denton Cockburn
- *   3  Dougal Sutherland
- *   3  Ian Ozsvald
- *   3  Johannes Schönberger
- *   3  Robert McGibbon
- *   3  Roman Sinayev
- *   3  Szabo Roland
- *   2  Diego Molla
- *   2  Imran Haque
- *   2  Jochen Wersdörfer
- *   2  Sergey Karayev
- *   2  Yannick Schwartz
- *   2  jamestwebber
- *   1  Abhijeet Kolhe
- *   1  Alexander Fabisch
- *   1  Bastiaan van den Berg
- *   1  Benjamin Peterson
- *   1  Daniel Velkov
- *   1  Fazlul Shahriar
- *   1  Felix Brockherde
- *   1  Félix-Antoine Fortin
- *   1  Harikrishnan S
- *   1  Jack Hale
- *   1  JakeMick
- *   1  James McDermott
- *   1  John Benediktsson
- *   1  John Zwinck
- *   1  Joshua Vredevoogd
- *   1  Justin Pati
- *   1  Kevin Hughes
- *   1  Kyle Kelley
- *   1  Matthias Ekman
- *   1  Miroslav Shubernetskiy
- *   1  Naoki Orii
- *   1  Norbert Crombach
- *   1  Rafael Cunha de Almeida
- *   1  Rolando Espinoza La fuente
- *   1  Seamus Abshere
- *   1  Sergey Feldman
- *   1  Sergio Medina
- *   1  Stefano Lattarini
- *   1  Steve Koch
- *   1  Sturla Molden
- *   1  Thomas Jarosch
- *   1  Yaroslav Halchenko
-
-.. _changes_0_13_1:
-
-Version 0.13.1
-==============
-
-**February 23, 2013**
-
-The 0.13.1 release only fixes some bugs and does not add any new functionality.
-
-Changelog
----------
-
-- Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being
-  interpreted as a test by `Yaroslav Halchenko`_.
-
-- Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans`
-  by `Gael Varoquaux`_.
-
-- Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_.
-
-- Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_.
-
-- Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_.
-
-- Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_.
-
-- Other small improvements to tests and documentation.
-
-People
-------
-List of contributors for release 0.13.1 by number of commits.
- * 16  `Lars Buitinck`_
- * 12  `Andreas Müller`_
- *  8  `Gael Varoquaux`_
- *  5  Robert Marchman
- *  3  `Peter Prettenhofer`_
- *  2  Hrishikesh Huilgolkar
- *  1  Bastiaan van den Berg
- *  1  Diego Molla
- *  1  `Gilles Louppe`_
- *  1  `Mathieu Blondel`_
- *  1  `Nelle Varoquaux`_
- *  1  Rafael Cunha de Almeida
- *  1  Rolando Espinoza La fuente
- *  1  `Vlad Niculae`_
- *  1  `Yaroslav Halchenko`_
-
-
-.. _changes_0_13:
-
-Version 0.13
-============
-
-**January 21, 2013**
-
-New Estimator Classes
----------------------
-
-- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two
-  data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check
-  your estimators. See :ref:`dummy_estimators` in the user guide.
-  Multioutput support added by `Arnaud Joly`_.
-
-- :class:`decomposition.FactorAnalysis`, a transformer implementing the
-  classical factor analysis, by `Christian Osendorfer`_ and `Alexandre
-  Gramfort`_. See :ref:`FA` in the user guide.
-
-- :class:`feature_extraction.FeatureHasher`, a transformer implementing the
-  "hashing trick" for fast, low-memory feature extraction from string fields
-  by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer`
-  for text documents by `Olivier Grisel`_  See :ref:`feature_hashing` and
-  :ref:`hashing_vectorizer` for the documentation and sample usage.
-
-- :class:`pipeline.FeatureUnion`, a transformer that concatenates
-  results of several other transformers by `Andreas Müller`_. See
-  :ref:`feature_union` in the user guide.
-
-- :class:`random_projection.GaussianRandomProjection`,
-  :class:`random_projection.SparseRandomProjection` and the function
-  :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are
-  transformers implementing Gaussian and sparse random projection matrix
-  by `Olivier Grisel`_ and `Arnaud Joly`_.
-  See :ref:`random_projection` in the user guide.
-
-- :class:`kernel_approximation.Nystroem`, a transformer for approximating
-  arbitrary kernels by `Andreas Müller`_. See
-  :ref:`nystroem_kernel_approx` in the user guide.
-
-- :class:`preprocessing.OneHotEncoder`, a transformer that computes binary
-  encodings of categorical features by `Andreas Müller`_. See
-  :ref:`preprocessing_categorical_features` in the user guide.
-
-- :class:`linear_model.PassiveAggressiveClassifier` and
-  :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing
-  an efficient stochastic optimization for linear models by `Rob Zinkov`_ and
-  `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user
-  guide.
-
-- :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional
-  sparse representations using ensembles of totally random trees by  `Andreas Müller`_.
-  See :ref:`random_trees_embedding` in the user guide.
-
-- :class:`manifold.SpectralEmbedding` and function
-  :func:`manifold.spectral_embedding`, implementing the "laplacian
-  eigenmaps" transformation for non-linear dimensionality reduction by Wei
-  Li. See :ref:`spectral_embedding` in the user guide.
-
-- :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_
-  and `Nelle Varoquaux`_,
-
-
-Changelog
----------
-
-- :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has
-  option for normalized output that reports the fraction of
-  misclassifications, rather than the raw number of misclassifications. By
-  Kyle Beauchamp.
-
-- :class:`tree.DecisionTreeClassifier` and all derived ensemble models now
-  support sample weighting, by `Noel Dawe`_  and `Gilles Louppe`_.
-
-- Speedup improvement when using bootstrap samples in forests of randomized
-  trees, by `Peter Prettenhofer`_  and `Gilles Louppe`_.
-
-- Partial dependence plots for :ref:`gradient_boosting` in
-  :func:`ensemble.partial_dependence.partial_dependence` by `Peter
-  Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an
-  example.
-
-- The table of contents on the website has now been made expandable by
-  `Jaques Grobler`_.
-
-- :class:`feature_selection.SelectPercentile` now breaks ties
-  deterministically instead of returning all equally ranked features.
-
-- :class:`feature_selection.SelectKBest` and
-  :class:`feature_selection.SelectPercentile` are more numerically stable
-  since they use scores, rather than p-values, to rank results. This means
-  that they might sometimes select different features than they did
-  previously.
-
-- Ridge regression and ridge classification fitting with ``sparse_cg`` solver
-  no longer has quadratic memory complexity, by `Lars Buitinck`_ and
-  `Fabian Pedregosa`_.
-
-- Ridge regression and ridge classification now support a new fast solver
-  called ``lsqr``, by `Mathieu Blondel`_.
-
-- Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee.
-
-- Added support for reading/writing svmlight files with pairwise
-  preference attribute (qid in svmlight file format) in
-  :func:`datasets.dump_svmlight_file` and
-  :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_.
-
-- Faster and more robust :func:`metrics.confusion_matrix` and
-  :ref:`clustering_evaluation` by Wei Li.
-
-- :func:`cross_validation.cross_val_score` now works with precomputed kernels
-  and affinity matrices, by `Andreas Müller`_.
-
-- LARS algorithm made more numerically stable with heuristics to drop
-  regressors too correlated as well as to stop the path when
-  numerical noise becomes predominant, by `Gael Varoquaux`_.
-
-- Faster implementation of :func:`metrics.precision_recall_curve` by
-  Conrad Lee.
-
-- New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used
-  in computer vision applications.
-
-- Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by
-  Shaun Jackman.
-
-- Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`,
-  by Andrew Winterman.
-
-- Improve consistency in gradient boosting: estimators
-  :class:`ensemble.GradientBoostingRegressor` and
-  :class:`ensemble.GradientBoostingClassifier` use the estimator
-  :class:`tree.DecisionTreeRegressor` instead of the
-  :class:`tree._tree.Tree` data structure by `Arnaud Joly`_.
-
-- Fixed a floating point exception in the :ref:`decision trees <tree>`
-  module, by Seberg.
-
-- Fix :func:`metrics.roc_curve` fails when y_true has only one class
-  by Wei Li.
-
-- Add the :func:`metrics.mean_absolute_error` function which computes the
-  mean absolute error. The :func:`metrics.mean_squared_error`,
-  :func:`metrics.mean_absolute_error` and
-  :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_.
-
-- Fixed ``class_weight`` support in :class:`svm.LinearSVC` and
-  :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning
-  of ``class_weight`` was reversed as erroneously higher weight meant less
-  positives of a given class in earlier releases.
-
-- Improve narrative documentation and consistency in
-  :mod:`sklearn.metrics` for regression and classification metrics
-  by `Arnaud Joly`_.
-
-- Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with
-  unsorted indices by Xinfan Meng and `Andreas Müller`_.
-
-- :class:`MiniBatchKMeans`: Add random reassignment of cluster centers
-  with little observations attached to them, by `Gael Varoquaux`_.
-
-
-API changes summary
--------------------
-- Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency.
-  This applies to :class:`decomposition.DictionaryLearning`,
-  :class:`decomposition.MiniBatchDictionaryLearning`,
-  :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`.
-
-- Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency.
-  This applies to :class:`semi_supervised.LabelPropagation` and
-  :class:`semi_supervised.label_propagation.LabelSpreading`.
-
-- Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for
-  consistency in :class:`ensemble.BaseGradientBoosting` and
-  :class:`ensemble.GradientBoostingRegressor`.
-
-- The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support
-  was already integrated into the "regular" linear models.
-
-- :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the
-  accumulated error, was removed. Use ``mean_squared_error`` instead.
-
-- Passing ``class_weight`` parameters to ``fit`` methods is no longer
-  supported. Pass them to estimator constructors instead.
-
-- GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``,
-  ``predict`` or ``sample`` methods instead.
-
-- The ``solver`` fit option in Ridge regression and classification is now
-  deprecated and will be removed in v0.14. Use the constructor option
-  instead.
-
-- :class:`feature_extraction.text.DictVectorizer` now returns sparse
-  matrices in the CSR format, instead of COO.
-
-- Renamed ``k`` in :class:`cross_validation.KFold` and
-  :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed
-  ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``.
-
-- Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency.
-  This applies to :class:`cross_validation.ShuffleSplit`,
-  :class:`cross_validation.StratifiedShuffleSplit`,
-  :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`.
-
-- Replaced ``rho`` in :class:`linear_model.ElasticNet` and
-  :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter
-  had different meanings; ``l1_ratio`` was introduced to avoid confusion.
-  It has the same meaning as previously ``rho`` in
-  :class:`linear_model.ElasticNet` and ``(1-rho)`` in
-  :class:`linear_model.SGDClassifier`.
-
-- :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now
-  store a list of paths in the case of multiple targets, rather than
-  an array of paths.
-
-- The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_``
-  to adhere more strictly with the API.
-
-- :func:`cluster.spectral_embedding` was moved to
-  :func:`manifold.spectral_embedding`.
-
-- Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`,
-  :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode``
-  to ``eigen_solver``.
-
-- Renamed ``mode`` in :func:`manifold.spectral_embedding` and
-  :class:`cluster.SpectralClustering` to ``eigen_solver``.
-
-- ``classes_`` and ``n_classes_`` attributes of
-  :class:`tree.DecisionTreeClassifier` and all derived ensemble models are
-  now flat in case of single output problems and nested in case of
-  multi-output problems.
-
-- The ``estimators_`` attribute of
-  :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and
-  :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an
-  array of :class:'tree.DecisionTreeRegressor'.
-
-- Renamed ``chunk_size`` to ``batch_size`` in
-  :class:`decomposition.MiniBatchDictionaryLearning` and
-  :class:`decomposition.MiniBatchSparsePCA` for consistency.
-
-- :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_``
-  attribute and support arbitrary dtypes for labels ``y``.
-  Also, the dtype returned by ``predict`` now reflects the dtype of
-  ``y`` during ``fit`` (used to be ``np.float``).
-
-- Changed default test_size in :func:`cross_validation.train_test_split`
-  to None, added possibility to infer ``test_size`` from ``train_size`` in
-  :class:`cross_validation.ShuffleSplit` and
-  :class:`cross_validation.StratifiedShuffleSplit`.
-
-- Renamed function :func:`sklearn.metrics.zero_one` to
-  :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior
-  in :func:`sklearn.metrics.zero_one_loss` is different from
-  :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to
-  ``normalize=True``.
-
-- Renamed function :func:`metrics.zero_one_score` to
-  :func:`metrics.accuracy_score`.
-
-- :func:`datasets.make_circles` now has the same number of inner and outer points.
-
-- In the Naive Bayes classifiers, the ``class_prior`` parameter was moved
-  from ``fit`` to ``__init__``.
-
-People
-------
-List of contributors for release 0.13 by number of commits.
-
- * 364  `Andreas Müller`_
- * 143  `Arnaud Joly`_
- * 137  `Peter Prettenhofer`_
- * 131  `Gael Varoquaux`_
- * 117  `Mathieu Blondel`_
- * 108  `Lars Buitinck`_
- * 106  Wei Li
- * 101  `Olivier Grisel`_
- *  65  `Vlad Niculae`_
- *  54  `Gilles Louppe`_
- *  40  `Jaques Grobler`_
- *  38  `Alexandre Gramfort`_
- *  30  `Rob Zinkov`_
- *  19  Aymeric Masurelle
- *  18  Andrew Winterman
- *  17  `Fabian Pedregosa`_
- *  17  Nelle Varoquaux
- *  16  `Christian Osendorfer`_
- *  14  `Daniel Nouri`_
- *  13  :user:`Virgile Fritsch <VirgileFritsch>`
- *  13  syhw
- *  12  `Satrajit Ghosh`_
- *  10  Corey Lynch
- *  10  Kyle Beauchamp
- *   9  Brian Cheung
- *   9  Immanuel Bayer
- *   9  mr.Shu
- *   8  Conrad Lee
- *   8  `James Bergstra`_
- *   7  Tadej Janež
- *   6  Brian Cajes
- *   6  `Jake Vanderplas`_
- *   6  Michael
- *   6  Noel Dawe
- *   6  Tiago Nunes
- *   6  cow
- *   5  Anze
- *   5  Shiqiao Du
- *   4  Christian Jauvin
- *   4  Jacques Kvam
- *   4  Richard T. Guy
- *   4  `Robert Layton`_
- *   3  Alexandre Abraham
- *   3  Doug Coleman
- *   3  Scott Dickerson
- *   2  ApproximateIdentity
- *   2  John Benediktsson
- *   2  Mark Veronda
- *   2  Matti Lyra
- *   2  Mikhail Korobov
- *   2  Xinfan Meng
- *   1  Alejandro Weinstein
- *   1  `Alexandre Passos`_
- *   1  Christoph Deil
- *   1  Eugene Nizhibitsky
- *   1  Kenneth C. Arnold
- *   1  Luis Pedro Coelho
- *   1  Miroslav Batchkarov
- *   1  Pavel
- *   1  Sebastian Berg
- *   1  Shaun Jackman
- *   1  Subhodeep Moitra
- *   1  bob
- *   1  dengemann
- *   1  emanuele
- *   1  x006
-
-
-.. _changes_0_12.1:
-
-Version 0.12.1
-===============
-
-**October 8, 2012**
-
-The 0.12.1 release is a bug-fix release with no additional features, but is
-instead a set of bug fixes
-
-Changelog
-----------
-
-- Improved numerical stability in spectral embedding by `Gael
-  Varoquaux`_
-
-- Doctest under windows 64bit by `Gael Varoquaux`_
-
-- Documentation fixes for elastic net by `Andreas Müller`_ and
-  `Alexandre Gramfort`_
-
-- Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_
-
-- Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_
-
-- Fix parallel computing in MDS by `Gael Varoquaux`_
-
-- Fix Unicode support in count vectorizer by `Andreas Müller`_
-
-- Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch <VirgileFritsch>`
-
-- Fix clone of SGD objects by `Peter Prettenhofer`_
-
-- Stabilize GMM by :user:`Virgile Fritsch <VirgileFritsch>`
-
-People
-------
-
- *  14  `Peter Prettenhofer`_
- *  12  `Gael Varoquaux`_
- *  10  `Andreas Müller`_
- *   5  `Lars Buitinck`_
- *   3  :user:`Virgile Fritsch <VirgileFritsch>`
- *   1  `Alexandre Gramfort`_
- *   1  `Gilles Louppe`_
- *   1  `Mathieu Blondel`_
-
-.. _changes_0_12:
-
-Version 0.12
-============
-
-**September 4, 2012**
-
-Changelog
----------
-
-- Various speed improvements of the :ref:`decision trees <tree>` module, by
-  `Gilles Louppe`_.
-
-- :class:`ensemble.GradientBoostingRegressor` and
-  :class:`ensemble.GradientBoostingClassifier` now support feature subsampling
-  via the ``max_features`` argument, by `Peter Prettenhofer`_.
-
-- Added Huber and Quantile loss functions to
-  :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_.
-
-- :ref:`Decision trees <tree>` and :ref:`forests of randomized trees <forest>`
-  now support multi-output classification and regression problems, by
-  `Gilles Louppe`_.
-
-- Added :class:`preprocessing.LabelEncoder`, a simple utility class to
-  normalize labels or transform non-numerical labels, by `Mathieu Blondel`_.
-
-- Added the epsilon-insensitive loss and the ability to make probabilistic
-  predictions with the modified huber loss in :ref:`sgd`, by
-  `Mathieu Blondel`_.
-
-- Added :ref:`multidimensional_scaling`, by Nelle Varoquaux.
-
-- SVMlight file format loader now detects compressed (gzip/bzip2) files and
-  decompresses them on the fly, by `Lars Buitinck`_.
-
-- SVMlight file format serializer now preserves double precision floating
-  point values, by `Olivier Grisel`_.
-
-- A common testing framework for all estimators was added, by `Andreas Müller`_.
-
-- Understandable error messages for estimators that do not accept
-  sparse input by `Gael Varoquaux`_
-
-- Speedups in hierarchical clustering by `Gael Varoquaux`_. In
-  particular building the tree now supports early stopping. This is
-  useful when the number of clusters is not small compared to the
-  number of samples.
-
-- Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection,
-  by `Alexandre Gramfort`_.
-
-- Added :func:`metrics.auc_score` and
-  :func:`metrics.average_precision_score` convenience functions by `Andreas
-  Müller`_.
-
-- Improved sparse matrix support in the :ref:`feature_selection`
-  module by `Andreas Müller`_.
-
-- New word boundaries-aware character n-gram analyzer for the
-  :ref:`text_feature_extraction` module by :user:`@kernc <kernc>`.
-
-- Fixed bug in spectral clustering that led to single point clusters
-  by `Andreas Müller`_.
-
-- In :class:`feature_extraction.text.CountVectorizer`, added an option to
-  ignore infrequent words, ``min_df`` by  `Andreas Müller`_.
-
-- Add support for multiple targets in some linear models (ElasticNet, Lasso
-  and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and
-  `Alexandre Gramfort`_.
-
-- Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li.
-
-- Fixed feature importance computation in
-  :ref:`gradient_boosting`.
-
-API changes summary
--------------------
-
-- The old ``scikits.learn`` package has disappeared; all code should import
-  from ``sklearn`` instead, which was introduced in 0.9.
-
-- In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned
-  with it's order reversed, in order to keep it consistent with the order
-  of the returned ``fpr`` and ``tpr``.
-
-- In :class:`hmm` objects, like :class:`hmm.GaussianHMM`,
-  :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the
-  object when initialising it and not through ``fit``. Now ``fit`` will
-  only accept the data as an input parameter.
-
-- For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously,
-  the default gamma value was only computed the first time ``fit`` was called
-  and then stored. It is now recalculated on every call to ``fit``.
-
-- All ``Base`` classes are now abstract meta classes so that they can not be
-  instantiated.
-
-- :func:`cluster.ward_tree` now also returns the parent array. This is
-  necessary for early-stopping in which case the tree is not
-  completely built.
-
-- In :class:`feature_extraction.text.CountVectorizer` the parameters
-  ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to
-  enable grid-searching both at once.
-
-- In :class:`feature_extraction.text.CountVectorizer`, words that appear
-  only in one document are now ignored by default. To reproduce
-  the previous behavior, set ``min_df=1``.
-
-- Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now
-  returns 2d array when fit on two classes.
-
-- Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function`
-  and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays
-  when fit on two classes.
-
-- Grid of alphas used for fitting :class:`linear_model.LassoCV` and
-  :class:`linear_model.ElasticNetCV` is now stored
-  in the attribute ``alphas_`` rather than overriding the init parameter
-  ``alphas``.
-
-- Linear models when alpha is estimated by cross-validation store
-  the estimated value in the ``alpha_`` attribute rather than just
-  ``alpha`` or ``best_alpha``.
-
-- :class:`ensemble.GradientBoostingClassifier` now supports
-  :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and
-  :meth:`ensemble.GradientBoostingClassifier.staged_predict`.
-
-- :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated.
-  The all classes in the :ref:`svm` module now automatically select the
-  sparse or dense representation base on the input.
-
-- All clustering algorithms now interpret the array ``X`` given to ``fit`` as
-  input data, in particular :class:`cluster.SpectralClustering` and
-  :class:`cluster.AffinityPropagation` which previously expected affinity matrices.
-
-- For clustering algorithms that take the desired number of clusters as a parameter,
-  this parameter is now called ``n_clusters``.
-
-
-People
-------
- * 267  `Andreas Müller`_
- *  94  `Gilles Louppe`_
- *  89  `Gael Varoquaux`_
- *  79  `Peter Prettenhofer`_
- *  60  `Mathieu Blondel`_
- *  57  `Alexandre Gramfort`_
- *  52  `Vlad Niculae`_
- *  45  `Lars Buitinck`_
- *  44  Nelle Varoquaux
- *  37  `Jaques Grobler`_
- *  30  Alexis Mignon
- *  30  Immanuel Bayer
- *  27  `Olivier Grisel`_
- *  16  Subhodeep Moitra
- *  13  Yannick Schwartz
- *  12  :user:`@kernc <kernc>`
- *  11  :user:`Virgile Fritsch <VirgileFritsch>`
- *   9  Daniel Duckworth
- *   9  `Fabian Pedregosa`_
- *   9  `Robert Layton`_
- *   8  John Benediktsson
- *   7  Marko Burjek
- *   5  `Nicolas Pinto`_
- *   4  Alexandre Abraham
- *   4  `Jake Vanderplas`_
- *   3  `Brian Holt`_
- *   3  `Edouard Duchesnay`_
- *   3  Florian Hoenig
- *   3  flyingimmidev
- *   2  Francois Savard
- *   2  Hannes Schulz
- *   2  Peter Welinder
- *   2  `Yaroslav Halchenko`_
- *   2  Wei Li
- *   1  Alex Companioni
- *   1  Brandyn A. White
- *   1  Bussonnier Matthias
- *   1  Charles-Pierre Astolfi
- *   1  Dan O'Huiginn
- *   1  David Cournapeau
- *   1  Keith Goodman
- *   1  Ludwig Schwardt
- *   1  Olivier Hervieu
- *   1  Sergio Medina
- *   1  Shiqiao Du
- *   1  Tim Sheerman-Chase
- *   1  buguen
-
-
-
-.. _changes_0_11:
-
-Version 0.11
-============
-
-**May 7, 2012**
-
-Changelog
----------
-
-Highlights
-.............
-
-- Gradient boosted regression trees (:ref:`gradient_boosting`)
-  for classification and regression by `Peter Prettenhofer`_
-  and `Scott White`_ .
-
-- Simple dict-based feature loader with support for categorical variables
-  (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_.
-
-- Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`)
-  and added macro and micro average options to
-  :func:`metrics.precision_score`, :func:`metrics.recall_score` and
-  :func:`metrics.f1_score` by `Satrajit Ghosh`_.
-
-- :ref:`out_of_bag` of generalization error for :ref:`ensemble`
-  by `Andreas Müller`_.
-
-- Randomized sparse linear models for feature
-  selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_
-
-- :ref:`label_propagation` for semi-supervised learning, by Clay
-  Woolam. **Note** the semi-supervised API is still work in progress,
-  and may change.
-
-- Added BIC/AIC model selection to classical :ref:`gmm` and unified
-  the API with the remainder of scikit-learn, by `Bertrand Thirion`_
-
-- Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is
-  a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits,
-  by Yannick Schwartz.
-
-- :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a
-  ``shrink_threshold`` parameter, which implements **shrunken centroid
-  classification**, by `Robert Layton`_.
-
-Other changes
-..............
-
-- Merged dense and sparse implementations of :ref:`sgd` module and
-  exposed utility extension types for sequential
-  datasets ``seq_dataset`` and weight vectors ``weight_vector``
-  by `Peter Prettenhofer`_.
-
-- Added ``partial_fit`` (support for online/minibatch learning) and
-  warm_start to the :ref:`sgd` module by `Mathieu Blondel`_.
-
-- Dense and sparse implementations of :ref:`svm` classes and
-  :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_.
-
-- Regressors can now be used as base estimator in the :ref:`multiclass`
-  module by `Mathieu Blondel`_.
-
-- Added n_jobs option to :func:`metrics.pairwise.pairwise_distances`
-  and :func:`metrics.pairwise.pairwise_kernels` for parallel computation,
-  by `Mathieu Blondel`_.
-
-- :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument
-  to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_.
-
-- Improved :ref:`cross_validation` and :ref:`grid_search` documentation
-  and introduced the new :func:`cross_validation.train_test_split`
-  helper function by `Olivier Grisel`_
-
-- :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for
-  consistency with ``decision_function``; for ``kernel==linear``,
-  ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_.
-
-- Performance improvements to efficient leave-one-out cross-validated
-  Ridge regression, esp. for the ``n_samples > n_features`` case, in
-  :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin.
-
-- Refactoring and simplification of the :ref:`text_feature_extraction`
-  API and fixed a bug that caused possible negative IDF,
-  by `Olivier Grisel`_.
-
-- Beam pruning option in :class:`_BaseHMM` module has been removed since it
-  is difficult to Cythonize. If you are interested in contributing a Cython
-  version, you can use the python version in the git history as a reference.
-
-- Classes in :ref:`neighbors` now support arbitrary Minkowski metric for
-  nearest neighbors searches. The metric can be specified by argument ``p``.
-
-API changes summary
--------------------
-
-- :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope`
-  instead.
-
-- ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module
-  :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`,
-  :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor`
-  and/or :class:`RadiusNeighborsRegressor` instead.
-
-- Sparse classes in the :ref:`sgd` module are now deprecated.
-
-- In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`,
-  parameters must be passed to an object when initialising it and not through
-  ``fit``. Now ``fit`` will only accept the data as an input parameter.
-
-- methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated.
-  ``sample`` and ``score`` or ``predict`` should be used instead.
-
-- attribute ``_scores`` and ``_pvalues`` in univariate feature selection
-  objects are now deprecated.
-  ``scores_`` or ``pvalues_`` should be used instead.
-
-- In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and
-  :class:`NuSVC`, the ``class_weight`` parameter is now an initialization
-  parameter, not a parameter to fit. This makes grid searches
-  over this parameter possible.
-
-- LFW ``data`` is now always shape ``(n_samples, n_features)`` to be
-  consistent with the Olivetti faces dataset. Use ``images`` and
-  ``pairs`` attribute to access the natural images shapes instead.
-
-- In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter
-  changed.  Options now are ``'ovr'`` and ``'crammer_singer'``, with
-  ``'ovr'`` being the default.  This does not change the default behavior
-  but hopefully is less confusing.
-
-- Class :class:`feature_selection.text.Vectorizer` is deprecated and
-  replaced by :class:`feature_selection.text.TfidfVectorizer`.
-
-- The preprocessor / analyzer nested structure for text feature
-  extraction has been removed. All those features are
-  now directly passed as flat constructor arguments
-  to :class:`feature_selection.text.TfidfVectorizer` and
-  :class:`feature_selection.text.CountVectorizer`, in particular the
-  following parameters are now used:
-
-- ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default
-  analysis scheme, or use a specific python callable (as previously).
-
-- ``tokenizer`` and ``preprocessor`` have been introduced to make it
-  still possible to customize those steps with the new API.
-
-- ``input`` explicitly control how to interpret the sequence passed to
-  ``fit`` and ``predict``: filenames, file objects or direct (byte or
-  Unicode) strings.
-
-- charset decoding is explicit and strict by default.
-
-- the ``vocabulary``, fitted or not is now stored in the
-  ``vocabulary_`` attribute to be consistent with the project
-  conventions.
-
-- Class :class:`feature_selection.text.TfidfVectorizer` now derives directly
-  from :class:`feature_selection.text.CountVectorizer` to make grid
-  search trivial.
-
-- methods ``rvs`` in :class:`_BaseHMM` module are now deprecated.
-  ``sample`` should be used instead.
-
-- Beam pruning option in :class:`_BaseHMM` module is removed since it is
-  difficult to be Cythonized. If you are interested, you can look in the
-  history codes by git.
-
-- The SVMlight format loader now supports files with both zero-based and
-  one-based column indices, since both occur "in the wild".
-
-- Arguments in class :class:`ShuffleSplit` are now consistent with
-  :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and
-  ``train_fraction`` are deprecated and renamed to ``test_size`` and
-  ``train_size`` and can accept both ``float`` and ``int``.
-
-- Arguments in class :class:`Bootstrap` are now consistent with
-  :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and
-  ``n_train`` are deprecated and renamed to ``test_size`` and
-  ``train_size`` and can accept both ``float`` and ``int``.
-
-- Argument ``p`` added to classes in :ref:`neighbors` to specify an
-  arbitrary Minkowski metric for nearest neighbors searches.
-
-
-People
-------
-   * 282  `Andreas Müller`_
-   * 239  `Peter Prettenhofer`_
-   * 198  `Gael Varoquaux`_
-   * 129  `Olivier Grisel`_
-   * 114  `Mathieu Blondel`_
-   * 103  Clay Woolam
-   *  96  `Lars Buitinck`_
-   *  88  `Jaques Grobler`_
-   *  82  `Alexandre Gramfort`_
-   *  50  `Bertrand Thirion`_
-   *  42  `Robert Layton`_
-   *  28  flyingimmidev
-   *  26  `Jake Vanderplas`_
-   *  26  Shiqiao Du
-   *  21  `Satrajit Ghosh`_
-   *  17  `David Marek`_
-   *  17  `Gilles Louppe`_
-   *  14  `Vlad Niculae`_
-   *  11  Yannick Schwartz
-   *  10  `Fabian Pedregosa`_
-   *   9  fcostin
-   *   7  Nick Wilson
-   *   5  Adrien Gaidon
-   *   5  `Nicolas Pinto`_
-   *   4  `David Warde-Farley`_
-   *   5  Nelle Varoquaux
-   *   5  Emmanuelle Gouillart
-   *   3  Joonas Sillanpää
-   *   3  Paolo Losi
-   *   2  Charles McCarthy
-   *   2  Roy Hyunjin Han
-   *   2  Scott White
-   *   2  ibayer
-   *   1  Brandyn White
-   *   1  Carlos Scheidegger
-   *   1  Claire Revillet
-   *   1  Conrad Lee
-   *   1  `Edouard Duchesnay`_
-   *   1  Jan Hendrik Metzen
-   *   1  Meng Xinfan
-   *   1  `Rob Zinkov`_
-   *   1  Shiqiao
-   *   1  Udi Weinsberg
-   *   1  Virgile Fritsch
-   *   1  Xinfan Meng
-   *   1  Yaroslav Halchenko
-   *   1  jansoe
-   *   1  Leon Palafox
-
-
-.. _changes_0_10:
-
-Version 0.10
-============
-
-**January 11, 2012**
-
-Changelog
----------
-
-- Python 2.5 compatibility was dropped; the minimum Python version needed
-  to use scikit-learn is now 2.6.
-
-- :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with
-  associated cross-validated estimator, by `Gael Varoquaux`_
-
-- New :ref:`Tree <tree>` module by `Brian Holt`_, `Peter Prettenhofer`_,
-  `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete
-  documentation and examples.
-
-- Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378).
-
-- Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367).
-
-- Faster tests by `Fabian Pedregosa`_ and others.
-
-- Silhouette Coefficient cluster analysis evaluation metric added as
-  :func:`sklearn.metrics.silhouette_score` by Robert Layton.
-
-- Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter:
-  the clustering algorithm used to be run ``n_init`` times but the last
-  solution was retained instead of the best solution by `Olivier Grisel`_.
-
-- Minor refactoring in :ref:`sgd` module; consolidated dense and sparse
-  predict methods; Enhanced test time performance by converting model
-  parameters to fortran-style arrays after fitting (only multi-class).
-
-- Adjusted Mutual Information metric added as
-  :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton.
-
-- Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear
-  now support scaling of C regularization parameter by the number of
-  samples by `Alexandre Gramfort`_.
-
-- New :ref:`Ensemble Methods <ensemble>` module by `Gilles Louppe`_ and
-  `Brian Holt`_. The module comes with the random forest algorithm and the
-  extra-trees method, along with documentation and examples.
-
-- :ref:`outlier_detection`: outlier and novelty detection, by
-  :user:`Virgile Fritsch <VirgileFritsch>`.
-
-- :ref:`kernel_approximation`: a transform implementing kernel
-  approximation for fast SGD on non-linear kernels by
-  `Andreas Müller`_.
-
-- Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_.
-
-- :ref:`SparseCoder` by `Vlad Niculae`_.
-
-- :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_.
-
-- :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_.
-
-- Improved documentation for developers and for the :mod:`sklearn.utils`
-  module, by `Jake Vanderplas`_.
-
-- Vectorized 20newsgroups dataset loader
-  (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by
-  `Mathieu Blondel`_.
-
-- :ref:`multiclass` by `Lars Buitinck`_.
-
-- Utilities for fast computation of mean and variance for sparse matrices
-  by `Mathieu Blondel`_.
-
-- Make :func:`sklearn.preprocessing.scale` and
-  :class:`sklearn.preprocessing.Scaler` work on sparse matrices by
-  `Olivier Grisel`_
-
-- Feature importances using decision trees and/or forest of trees,
-  by `Gilles Louppe`_.
-
-- Parallel implementation of forests of randomized trees by
-  `Gilles Louppe`_.
-
-- :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train
-  sets as well as the test sets by `Olivier Grisel`_.
-
-- Errors in the build of the documentation fixed by `Andreas Müller`_.
-
-
-API changes summary
--------------------
-
-Here are the code migration instructions when upgrading from scikit-learn
-version 0.9:
-
-- Some estimators that may overwrite their inputs to save memory previously
-  had ``overwrite_`` parameters; these have been replaced with ``copy_``
-  parameters with exactly the opposite meaning.
-
-  This particularly affects some of the estimators in :mod:`linear_model`.
-  The default behavior is still to copy everything passed in.
-
-- The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no
-  longer supports loading two files at once; use ``load_svmlight_files``
-  instead. Also, the (unused) ``buffer_mb`` parameter is gone.
-
-- Sparse estimators in the :ref:`sgd` module use dense parameter vector
-  ``coef_`` instead of ``sparse_coef_``. This significantly improves
-  test time performance.
-
-- The :ref:`covariance` module now has a robust estimator of
-  covariance, the Minimum Covariance Determinant estimator.
-
-- Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored
-  but the changes are backwards compatible. They have been moved to the
-  :mod:`metrics.cluster.supervised`, along with
-  :mod:`metrics.cluster.unsupervised` which contains the Silhouette
-  Coefficient.
-
-- The ``permutation_test_score`` function now behaves the same way as
-  ``cross_val_score`` (i.e. uses the mean score across the folds.)
-
-- Cross Validation generators now use integer indices (``indices=True``)
-  by default instead of boolean masks. This make it more intuitive to
-  use with sparse matrix data.
-
-- The functions used for sparse coding, ``sparse_encode`` and
-  ``sparse_encode_parallel`` have been combined into
-  :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays
-  have been transposed for consistency with the matrix factorization setting,
-  as opposed to the regression setting.
-
-- Fixed an off-by-one error in the SVMlight/LibSVM file format handling;
-  files generated using :func:`sklearn.datasets.dump_svmlight_file` should be
-  re-generated. (They should continue to work, but accidentally had one
-  extra column of zeros prepended.)
-
-- ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``.
-
-- :func:`sklearn.utils.extmath.fast_svd` has been renamed
-  :func:`sklearn.utils.extmath.randomized_svd` and the default
-  oversampling is now fixed to 10 additional random vectors instead
-  of doubling the number of components to extract. The new behavior
-  follows the reference paper.
-
-
-People
-------
-
-The following people contributed to scikit-learn since last release:
-
-   * 246  `Andreas Müller`_
-   * 242  `Olivier Grisel`_
-   * 220  `Gilles Louppe`_
-   * 183  `Brian Holt`_
-   * 166  `Gael Varoquaux`_
-   * 144  `Lars Buitinck`_
-   *  73  `Vlad Niculae`_
-   *  65  `Peter Prettenhofer`_
-   *  64  `Fabian Pedregosa`_
-   *  60  Robert Layton
-   *  55  `Mathieu Blondel`_
-   *  52  `Jake Vanderplas`_
-   *  44  Noel Dawe
-   *  38  `Alexandre Gramfort`_
-   *  24  :user:`Virgile Fritsch <VirgileFritsch>`
-   *  23  `Satrajit Ghosh`_
-   *   3  Jan Hendrik Metzen
-   *   3  Kenneth C. Arnold
-   *   3  Shiqiao Du
-   *   3  Tim Sheerman-Chase
-   *   3  `Yaroslav Halchenko`_
-   *   2  Bala Subrahmanyam Varanasi
-   *   2  DraXus
-   *   2  Michael Eickenberg
-   *   1  Bogdan Trach
-   *   1  Félix-Antoine Fortin
-   *   1  Juan Manuel Caicedo Carvajal
-   *   1  Nelle Varoquaux
-   *   1  `Nicolas Pinto`_
-   *   1  Tiziano Zito
-   *   1  Xinfan Meng
-
-
-
-.. _changes_0_9:
-
-Version 0.9
-===========
-
-**September 21, 2011**
-
-scikit-learn 0.9 was released on September 2011, three months after the 0.8
-release and includes the new modules :ref:`manifold`, :ref:`dirichlet_process`
-as well as several new algorithms and documentation improvements.
-
-This release also includes the dictionary-learning work developed by
-`Vlad Niculae`_ as part of the `Google Summer of Code
-<https://developers.google.com/open-source/gsoc>`_ program.
-
-
-
-.. |banner1| image:: ./auto_examples/manifold/images/thumb/sphx_glr_plot_compare_methods_thumb.png
-   :target: auto_examples/manifold/plot_compare_methods.html
-
-.. |banner2| image:: ./auto_examples/linear_model/images/thumb/sphx_glr_plot_omp_thumb.png
-   :target: auto_examples/linear_model/plot_omp.html
-
-.. |banner3| image:: ./auto_examples/decomposition/images/thumb/sphx_glr_plot_kernel_pca_thumb.png
-   :target: auto_examples/decomposition/plot_kernel_pca.html
-
-.. |center-div| raw:: html
-
-    <div style="text-align: center; margin: 0px 0 -5px 0;">
-
-.. |end-div| raw:: html
-
-    </div>
-
-
-|center-div| |banner2| |banner1| |banner3| |end-div|
-
-Changelog
----------
-
-- New :ref:`manifold` module by `Jake Vanderplas`_ and
-  `Fabian Pedregosa`_.
-
-- New :ref:`Dirichlet Process <dirichlet_process>` Gaussian Mixture
-  Model by `Alexandre Passos`_
-
-- :ref:`neighbors` module refactoring by `Jake Vanderplas`_ :
-  general refactoring, support for sparse matrices in input, speed and
-  documentation improvements. See the next section for a full list of API
-  changes.
-
-- Improvements on the :ref:`feature_selection` module by
-  `Gilles Louppe`_ : refactoring of the RFE classes, documentation
-  rewrite, increased efficiency and minor API changes.
-
-- :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and
-  `Alexandre Gramfort`_
-
-- Printing an estimator now behaves independently of architectures
-  and Python version thanks to :user:`Jean Kossaifi <JeanKossaifi>`.
-
-- :ref:`Loader for libsvm/svmlight format <libsvm_loader>` by
-  `Mathieu Blondel`_ and `Lars Buitinck`_
-
-- Documentation improvements: thumbnails in
-  example gallery by `Fabian Pedregosa`_.
-
-- Important bugfixes in :ref:`svm` module (segfaults, bad
-  performance) by `Fabian Pedregosa`_.
-
-- Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes`
-  by `Lars Buitinck`_
-
-- Text feature extraction optimizations by Lars Buitinck
-
-- Chi-Square feature selection
-  (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_.
-
-- :ref:`sample_generators` module refactoring by `Gilles Louppe`_
-
-- :ref:`multiclass` by `Mathieu Blondel`_
-
-- Ball tree rewrite by `Jake Vanderplas`_
-
-- Implementation of :ref:`dbscan` algorithm by Robert Layton
-
-- Kmeans predict and transform by Robert Layton
-
-- Preprocessing module refactoring by `Olivier Grisel`_
-
-- Faster mean shift by Conrad Lee
-
-- New ``Bootstrap``, :ref:`ShuffleSplit` and various other
-  improvements in cross validation schemes by `Olivier Grisel`_ and
-  `Gael Varoquaux`_
-
-- Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_
-
-- Added :class:`Orthogonal Matching Pursuit <linear_model.OrthogonalMatchingPursuit>` by `Vlad Niculae`_
-
-- Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_
-
-- Implementation of :class:`linear_model.LassoLarsCV`
-  (cross-validated Lasso solver using the Lars algorithm) and
-  :class:`linear_model.LassoLarsIC` (BIC/AIC model
-  selection in Lars) by `Gael Varoquaux`_
-  and `Alexandre Gramfort`_
-
-- Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu
-
-- Distance helper functions :func:`metrics.pairwise.pairwise_distances`
-  and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton
-
-- :class:`Mini-Batch K-Means <cluster.MiniBatchKMeans>` by Nelle Varoquaux and Peter Prettenhofer.
-
-- :ref:`mldata` utilities by Pietro Berkes.
-
-- :ref:`olivetti_faces` by `David Warde-Farley`_.
-
-
-API changes summary
--------------------
-
-Here are the code migration instructions when upgrading from scikit-learn
-version 0.8:
-
-- The ``scikits.learn`` package was renamed ``sklearn``. There is
-  still a ``scikits.learn`` package alias for backward compatibility.
-
-  Third-party projects with a dependency on scikit-learn 0.9+ should
-  upgrade their codebase. For instance, under Linux / MacOSX just run
-  (make a backup first!)::
-
-      find -name "*.py" | xargs sed -i 's/\bscikits.learn\b/sklearn/g'
-
-- Estimators no longer accept model parameters as ``fit`` arguments:
-  instead all parameters must be only be passed as constructor
-  arguments or using the now public ``set_params`` method inherited
-  from :class:`base.BaseEstimator`.
-
-  Some estimators can still accept keyword arguments on the ``fit``
-  but this is restricted to data-dependent values (e.g. a Gram matrix
-  or an affinity matrix that are precomputed from the ``X`` data matrix.
-
-- The ``cross_val`` package has been renamed to ``cross_validation``
-  although there is also a ``cross_val`` package alias in place for
-  backward compatibility.
-
-  Third-party projects with a dependency on scikit-learn 0.9+ should
-  upgrade their codebase. For instance, under Linux / MacOSX just run
-  (make a backup first!)::
-
-      find -name "*.py" | xargs sed -i 's/\bcross_val\b/cross_validation/g'
-
-- The ``score_func`` argument of the
-  ``sklearn.cross_validation.cross_val_score`` function is now expected
-  to accept ``y_test`` and ``y_predicted`` as only arguments for
-  classification and regression tasks or ``X_test`` for unsupervised
-  estimators.
-
-- ``gamma`` parameter for support vector machine algorithms is set
-  to ``1 / n_features`` by default, instead of ``1 / n_samples``.
-
-- The ``sklearn.hmm`` has been marked as orphaned: it will be removed
-  from scikit-learn in version 0.11 unless someone steps up to
-  contribute documentation, examples and fix lurking numerical
-  stability issues.
-
-- ``sklearn.neighbors`` has been made into a submodule.  The two previously
-  available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor``
-  have been marked as deprecated.  Their functionality has been divided
-  among five new classes: ``NearestNeighbors`` for unsupervised neighbors
-  searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier``
-  for supervised classification problems, and ``KNeighborsRegressor``
-  & ``RadiusNeighborsRegressor`` for supervised regression problems.
-
-- ``sklearn.ball_tree.BallTree`` has been moved to
-  ``sklearn.neighbors.BallTree``.  Using the former will generate a warning.
-
-- ``sklearn.linear_model.LARS()`` and related classes (LassoLARS,
-  LassoLARSCV, etc.) have been renamed to
-  ``sklearn.linear_model.Lars()``.
-
-- All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y
-  parameter, which by default is None. If not given, the result is the distance
-  (or kernel similarity) between each sample in Y. If given, the result is the
-  pairwise distance (or kernel similarity) between samples in X to Y.
-
-- ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``,
-  and by default returns the pairwise distance. For the component wise distance,
-  set the parameter ``sum_over_features`` to ``False``.
-
-Backward compatibility package aliases and other deprecated classes and
-functions will be removed in version 0.11.
-
-
-People
-------
-
-38 people contributed to this release.
-
-- 387  `Vlad Niculae`_
-- 320  `Olivier Grisel`_
-- 192  `Lars Buitinck`_
-- 179  `Gael Varoquaux`_
-- 168  `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_)
-- 127  `Jake Vanderplas`_
-- 120  `Mathieu Blondel`_
-- 85  `Alexandre Passos`_
-- 67  `Alexandre Gramfort`_
-- 57  `Peter Prettenhofer`_
-- 56  `Gilles Louppe`_
-- 42  Robert Layton
-- 38  Nelle Varoquaux
-- 32  :user:`Jean Kossaifi <JeanKossaifi>`
-- 30  Conrad Lee
-- 22  Pietro Berkes
-- 18  andy
-- 17  David Warde-Farley
-- 12  Brian Holt
-- 11  Robert
-- 8  Amit Aides
-- 8  :user:`Virgile Fritsch <VirgileFritsch>`
-- 7  `Yaroslav Halchenko`_
-- 6  Salvatore Masecchia
-- 5  Paolo Losi
-- 4  Vincent Schut
-- 3  Alexis Metaireau
-- 3  Bryan Silverthorn
-- 3  `Andreas Müller`_
-- 2  Minwoo Jake Lee
-- 1  Emmanuelle Gouillart
-- 1  Keith Goodman
-- 1  Lucas Wiman
-- 1  `Nicolas Pinto`_
-- 1  Thouis (Ray) Jones
-- 1  Tim Sheerman-Chase
-
-
-.. _changes_0_8:
-
-Version 0.8
-===========
-
-**May 11, 2011**
-
-scikit-learn 0.8 was released on May 2011, one month after the first
-"international" `scikit-learn coding sprint
-<https://github.com/scikit-learn/scikit-learn/wiki/Upcoming-events>`_ and is
-marked by the inclusion of important modules: :ref:`hierarchical_clustering`,
-:ref:`cross_decomposition`, :ref:`NMF`, initial support for Python 3 and by important
-enhancements and bug fixes.
-
-
-Changelog
----------
-
-Several new modules where introduced during this release:
-
-- New :ref:`hierarchical_clustering` module by Vincent Michel,
-  `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_.
-
-- :ref:`kernel_pca` implementation by `Mathieu Blondel`_
-
-- :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_.
-
-- New :ref:`cross_decomposition` module by `Edouard Duchesnay`_.
-
-- :ref:`NMF` module `Vlad Niculae`_
-
-- Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by
-  :user:`Virgile Fritsch <VirgileFritsch>` in the :ref:`covariance` module.
-
-
-Some other modules benefited from significant improvements or cleanups.
-
-
-- Initial support for Python 3: builds and imports cleanly,
-  some modules are usable while others have failing tests by `Fabian Pedregosa`_.
-
-- :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_.
-
-- Guide :ref:`performance-howto` by `Olivier Grisel`_.
-
-- Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck.
-
-- bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter.
-
-- Add attribute converged to Gaussian Mixture Models by Vincent Schut.
-
-- Implemented ``transform``, ``predict_log_proba`` in
-  :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_.
-
-- Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_,
-  `Gael Varoquaux`_ and Amit Aides.
-
-- Refactored SGD module (removed code duplication, better variable naming),
-  added interface for sample weight by `Peter Prettenhofer`_.
-
-- Wrapped BallTree with Cython by Thouis (Ray) Jones.
-
-- Added function :func:`svm.l1_min_c` by Paolo Losi.
-
-- Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_,
-  `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and
-  `Fabian Pedregosa`_.
-
-
-People
--------
-
-People that made this release possible preceded by number of commits:
-
-
-- 159  `Olivier Grisel`_
-- 96  `Gael Varoquaux`_
-- 96  `Vlad Niculae`_
-- 94  `Fabian Pedregosa`_
-- 36  `Alexandre Gramfort`_
-- 32  Paolo Losi
-- 31  `Edouard Duchesnay`_
-- 30  `Mathieu Blondel`_
-- 25  `Peter Prettenhofer`_
-- 22  `Nicolas Pinto`_
-- 11  :user:`Virgile Fritsch <VirgileFritsch>`
-   -  7  Lars Buitinck
-   -  6  Vincent Michel
-   -  5  `Bertrand Thirion`_
-   -  4  Thouis (Ray) Jones
-   -  4  Vincent Schut
-   -  3  Jan Schlüter
-   -  2  Julien Miotte
-   -  2  `Matthieu Perrot`_
-   -  2  Yann Malet
-   -  2  `Yaroslav Halchenko`_
-   -  1  Amit Aides
-   -  1  `Andreas Müller`_
-   -  1  Feth Arezki
-   -  1  Meng Xinfan
-
-
-.. _changes_0_7:
-
-Version 0.7
-===========
-
-**March 2, 2011**
-
-scikit-learn 0.7 was released in March 2011, roughly three months
-after the 0.6 release. This release is marked by the speed
-improvements in existing algorithms like k-Nearest Neighbors and
-K-Means algorithm and by the inclusion of an efficient algorithm for
-computing the Ridge Generalized Cross Validation solution. Unlike the
-preceding release, no new modules where added to this release.
-
-Changelog
----------
-
-- Performance improvements for Gaussian Mixture Model sampling [Jan
-  Schlüter].
-
-- Implementation of efficient leave-one-out cross-validated Ridge in
-  :class:`linear_model.RidgeCV` [`Mathieu Blondel`_]
-
-- Better handling of collinearity and early stopping in
-  :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian
-  Pedregosa`_].
-
-- Fixes for liblinear ordering of labels and sign of coefficients
-  [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_].
-
-- Performance improvements for Nearest Neighbors algorithm in
-  high-dimensional spaces [`Fabian Pedregosa`_].
-
-- Performance improvements for :class:`cluster.KMeans` [`Gael
-  Varoquaux`_ and `James Bergstra`_].
-
-- Sanity checks for SVM-based classes [`Mathieu Blondel`_].
-
-- Refactoring of :class:`neighbors.NeighborsClassifier` and
-  :func:`neighbors.kneighbors_graph`: added different algorithms for
-  the k-Nearest Neighbor Search and implemented a more stable
-  algorithm for finding barycenter weights. Also added some
-  developer documentation for this module, see
-  `notes_neighbors
-  <https://github.com/scikit-learn/scikit-learn/wiki/Neighbors-working-notes>`_ for more information [`Fabian Pedregosa`_].
-
-- Documentation improvements: Added :class:`pca.RandomizedPCA` and
-  :class:`linear_model.LogisticRegression` to the class
-  reference. Also added references of matrices used for clustering
-  and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu
-  Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle
-  Gouillart]
-
-- Binded decision_function in classes that make use of liblinear_,
-  dense and sparse variants, like :class:`svm.LinearSVC` or
-  :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_].
-
-- Performance and API improvements to
-  :func:`metrics.euclidean_distances` and to
-  :class:`pca.RandomizedPCA` [`James Bergstra`_].
-
-- Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche]
-
-- Allow input sequences of different lengths in :class:`hmm.GaussianHMM`
-  [`Ron Weiss`_].
-
-- Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng]
-
-
-People
-------
-
-People that made this release possible preceded by number of commits:
-
-- 85  `Fabian Pedregosa`_
-- 67  `Mathieu Blondel`_
-- 20  `Alexandre Gramfort`_
-- 19  `James Bergstra`_
-- 14  Dan Yamins
-- 13  `Olivier Grisel`_
-- 12  `Gael Varoquaux`_
-- 4  `Edouard Duchesnay`_
-- 4  `Ron Weiss`_
-- 2  Satrajit Ghosh
-- 2  Vincent Dubourg
-- 1  Emmanuelle Gouillart
-- 1  Kamel Ibn Hassen Derouiche
-- 1  Paolo Losi
-- 1  VirgileFritsch
-- 1  `Yaroslav Halchenko`_
-- 1  Xinfan Meng
-
-
-.. _changes_0_6:
-
-Version 0.6
-===========
-
-**December 21, 2010**
-
-scikit-learn 0.6 was released on December 2010. It is marked by the
-inclusion of several new modules and a general renaming of old
-ones. It is also marked by the inclusion of new example, including
-applications to real-world datasets.
-
-
-Changelog
----------
-
-- New `stochastic gradient
-  <http://scikit-learn.org/stable/modules/sgd.html>`_ descent
-  module by Peter Prettenhofer. The module comes with complete
-  documentation and examples.
-
-- Improved svm module: memory consumption has been reduced by 50%,
-  heuristic to automatically set class weights, possibility to
-  assign weights to samples (see
-  :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example).
-
-- New :ref:`gaussian_process` module by Vincent Dubourg. This module
-  also has great documentation and some very neat examples. See
-  example_gaussian_process_plot_gp_regression.py or
-  example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py
-  for a taste of what can be done.
-
-- It is now possible to use liblinear’s Multi-class SVC (option
-  multi_class in :class:`svm.LinearSVC`)
-
-- New features and performance improvements of text feature
-  extraction.
-
-- Improved sparse matrix support, both in main classes
-  (:class:`grid_search.GridSearchCV`) as in modules
-  sklearn.svm.sparse and sklearn.linear_model.sparse.
-
-- Lots of cool new examples and a new section that uses real-world
-  datasets was created. These include:
-  :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`,
-  :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`,
-  :ref:`sphx_glr_auto_examples_applications_svm_gui.py`,
-  :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and
-  others.
-
-- Faster :ref:`least_angle_regression` algorithm. It is now 2x
-  faster than the R version on worst case and up to 10x times faster
-  on some cases.
-
-- Faster coordinate descent algorithm. In particular, the full path
-  version of lasso (:func:`linear_model.lasso_path`) is more than
-  200x times faster than before.
-
-- It is now possible to get probability estimates from a
-  :class:`linear_model.LogisticRegression` model.
-
-- module renaming: the glm module has been renamed to linear_model,
-  the gmm module has been included into the more general mixture
-  model and the sgd module has been included in linear_model.
-
-- Lots of bug fixes and documentation improvements.
-
-
-People
-------
-
-People that made this release possible preceded by number of commits:
-
-   * 207  `Olivier Grisel`_
-
-   * 167 `Fabian Pedregosa`_
-
-   * 97 `Peter Prettenhofer`_
-
-   * 68 `Alexandre Gramfort`_
-
-   * 59  `Mathieu Blondel`_
-
-   * 55  `Gael Varoquaux`_
-
-   * 33  Vincent Dubourg
-
-   * 21  `Ron Weiss`_
-
-   * 9  Bertrand Thirion
-
-   * 3  `Alexandre Passos`_
-
-   * 3  Anne-Laure Fouque
-
-   * 2  Ronan Amicel
-
-   * 1 `Christian Osendorfer`_
-
-
-
-.. _changes_0_5:
-
-
-Version 0.5
-===========
-
-**October 11, 2010**
-
-Changelog
----------
-
-New classes
------------
-
-- Support for sparse matrices in some classifiers of modules
-  ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`,
-  :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`,
-  :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`)
-
-- New :class:`pipeline.Pipeline` object to compose different estimators.
-
-- Recursive Feature Elimination routines in module
-  :ref:`feature_selection`.
-
-- Addition of various classes capable of cross validation in the
-  linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`,
-  etc.).
-
-- New, more efficient LARS algorithm implementation. The Lasso
-  variant of the algorithm is also implemented. See
-  :class:`linear_model.lars_path`, :class:`linear_model.Lars` and
-  :class:`linear_model.LassoLars`.
-
-- New Hidden Markov Models module (see classes
-  :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`,
-  :class:`hmm.GMMHMM`)
-
-- New module feature_extraction (see :ref:`class reference
-  <feature_extraction_ref>`)
-
-- New FastICA algorithm in module sklearn.fastica
-
-
-Documentation
--------------
-
-- Improved documentation for many modules, now separating
-  narrative documentation from the class reference. As an example,
-  see `documentation for the SVM module
-  <http://scikit-learn.org/stable/modules/svm.html>`_ and the
-  complete `class reference
-  <http://scikit-learn.org/stable/modules/classes.html>`_.
-
-Fixes
------
-
-- API changes: adhere variable names to PEP-8, give more
-  meaningful names.
-
-- Fixes for svm module to run on a shared memory context
-  (multiprocessing).
-
-- It is again possible to generate latex (and thus PDF) from the
-  sphinx docs.
-
-Examples
---------
-
-- new examples using some of the mlcomp datasets:
-  ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and
-  :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py`
-
-- Many more examples. `See here
-  <http://scikit-learn.org/stable/auto_examples/index.html>`_
-  the full list of examples.
-
-
-External dependencies
----------------------
-
-- Joblib is now a dependency of this package, although it is
-  shipped with (sklearn.externals.joblib).
-
-Removed modules
----------------
-
-- Module ann (Artificial Neural Networks) has been removed from
-  the distribution. Users wanting this sort of algorithms should
-  take a look into pybrain.
-
-Misc
-----
-
-- New sphinx theme for the web page.
-
-
-Authors
--------
-
-The following is a list of authors for this release, preceded by
-number of commits:
-
-     * 262  Fabian Pedregosa
-     * 240  Gael Varoquaux
-     * 149  Alexandre Gramfort
-     * 116  Olivier Grisel
-     *  40  Vincent Michel
-     *  38  Ron Weiss
-     *  23  Matthieu Perrot
-     *  10  Bertrand Thirion
-     *   7  Yaroslav Halchenko
-     *   9  VirgileFritsch
-     *   6  Edouard Duchesnay
-     *   4  Mathieu Blondel
-     *   1  Ariel Rokem
-     *   1  Matthieu Brucher
-
-Version 0.4
-===========
-
-**August 26, 2010**
-
-Changelog
----------
-
-Major changes in this release include:
-
-- Coordinate Descent algorithm (Lasso, ElasticNet) refactoring &
-  speed improvements (roughly 100x times faster).
-
-- Coordinate Descent Refactoring (and bug fixing) for consistency
-  with R's package GLMNET.
-
-- New metrics module.
-
-- New GMM module contributed by Ron Weiss.
-
-- Implementation of the LARS algorithm (without Lasso variant for now).
-
-- feature_selection module redesign.
-
-- Migration to GIT as version control system.
-
-- Removal of obsolete attrselect module.
-
-- Rename of private compiled extensions (added underscore).
-
-- Removal of legacy unmaintained code.
-
-- Documentation improvements (both docstring and rst).
-
-- Improvement of the build system to (optionally) link with MKL.
-  Also, provide a lite BLAS implementation in case no system-wide BLAS is
-  found.
-
-- Lots of new examples.
-
-- Many, many bug fixes ...
-
-
-Authors
--------
-
-The committer list for this release is the following (preceded by number
-of commits):
-
-    * 143  Fabian Pedregosa
-    * 35  Alexandre Gramfort
-    * 34  Olivier Grisel
-    * 11  Gael Varoquaux
-    *  5  Yaroslav Halchenko
-    *  2  Vincent Michel
-    *  1  Chris Filo Gorgolewski
-
-
-Earlier versions
-================
-
-Earlier versions included contributions by Fred Mailhot, David Cooke,
-David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
-
-.. _Olivier Grisel: https://twitter.com/ogrisel
-
-.. _Gael Varoquaux: http://gael-varoquaux.info
-
-.. _Alexandre Gramfort: http://alexandre.gramfort.net
-
-.. _Fabian Pedregosa: http://fa.bianp.net
-
-.. _Mathieu Blondel: http://www.mblondel.org
-
-.. _James Bergstra: http://www-etud.iro.umontreal.ca/~bergstrj/
-
-.. _liblinear: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
-
-.. _Yaroslav Halchenko: http://www.onerussian.com/
-
-.. _Vlad Niculae: http://vene.ro
-
-.. _Edouard Duchesnay: https://sites.google.com/site/duchesnay/home
-
-.. _Peter Prettenhofer: https://sites.google.com/site/peterprettenhofer/
-
-.. _Alexandre Passos: http://atpassos.me
-
-.. _Nicolas Pinto: https://twitter.com/npinto
-
-.. _Bertrand Thirion: https://team.inria.fr/parietal/bertrand-thirions-page
-
-.. _Andreas Müller: http://peekaboo-vision.blogspot.com
-
-.. _Matthieu Perrot: http://brainvisa.info/biblio/lnao/en/Author/PERROT-M.html
-
-.. _Jake Vanderplas: http://staff.washington.edu/jakevdp/
-
-.. _Gilles Louppe: http://www.montefiore.ulg.ac.be/~glouppe/
-
-.. _INRIA: http://www.inria.fr
-
-.. _Parietal Team: http://parietal.saclay.inria.fr/
-
-.. _David Warde-Farley: http://www-etud.iro.umontreal.ca/~wardefar/
-
-.. _Brian Holt: http://personal.ee.surrey.ac.uk/Personal/B.Holt
-
-.. _Satrajit Ghosh: http://www.mit.edu/~satra/
-
-.. _Robert Layton: https://twitter.com/robertlayton
-
-.. _Scott White: https://twitter.com/scottblanc
-
-.. _David Marek: http://www.davidmarek.cz/
-
-.. _Christian Osendorfer: https://osdf.github.io
-
-.. _Arnaud Joly: http://www.ajoly.org
-
-.. _Rob Zinkov: http://zinkov.com
-
-.. _Joel Nothman: http://joelnothman.com
-
-.. _Nicolas Trésegnie : http://nicolastr.com/
-
-.. _Kemal Eren: http://www.kemaleren.com
-
-.. _Yann Dauphin: http://ynd.github.io/
-
-.. _Yannick Schwartz: https://team.inria.fr/parietal/schwarty/
-
-.. _Kyle Kastner: http://kastnerkyle.github.io
-
-.. _Daniel Nouri: http://danielnouri.org
-
-.. _Manoj Kumar: https://manojbits.wordpress.com
-
-.. _Luis Pedro Coelho: http://luispedro.org
-
-.. _Fares Hedyati: http://www.eecs.berkeley.edu/~fareshed
-
-.. _Antony Lee: https://www.ocf.berkeley.edu/~antonyl/
-
-.. _Martin Billinger: http://tnsre.embs.org/author/martinbillinger
-
-.. _Matteo Visconti di Oleggio Castello: http://www.mvdoc.me
-
-.. _Trevor Stephens: http://trevorstephens.com/
-
-.. _Jan Hendrik Metzen: https://jmetzen.github.io/
-
-.. _Will Dawson: http://www.dawsonresearch.com
-
-.. _Andrew Tulloch: http://tullo.ch/
-
-.. _Hanna Wallach: http://dirichlet.net/
-
-.. _Yan Yi: http://seowyanyi.org
-
-.. _Hervé Bredin: http://herve.niderb.fr/
-
-.. _Eric Martin: http://www.ericmart.in
-
-.. _Nicolas Goix: https://perso.telecom-paristech.fr/~goix/
-
-.. _Sebastian Raschka: http://sebastianraschka.com
-
-.. _Brian McFee: https://bmcfee.github.io
-
-.. _Valentin Stolbunov: http://www.vstolbunov.com
-
-.. _Jaques Grobler: https://github.com/jaquesgrobler
-
-.. _Lars Buitinck: https://github.com/larsmans
-
-.. _Loic Esteve: https://github.com/lesteve
-
-.. _Noel Dawe: https://github.com/ndawe
-
-.. _Raghav RV: https://github.com/raghavrv
-
-.. _Tom Dupre la Tour: https://github.com/TomDLT
-
-.. _Nelle Varoquaux: https://github.com/nellev
-
-.. _Bing Tian Dai: https://github.com/btdai
-
-.. _Dylan Werner-Meier: https://github.com/unautre
-
-.. _Alyssa Batula: https://github.com/abatula
-
-.. _Srivatsan Ramesh: https://github.com/srivatsan-ramesh
-
-.. _Ron Weiss: http://www.ee.columbia.edu/~ronw
-
-.. _Kathleen Chen: https://github.com/kchen17
-
-.. _Vincent Pham: https://github.com/vincentpham1991
-
-.. _Denis Engemann: http://denis-engemann.de
-.. _Anish Shah: https://github.com/AnishShah
-
-.. _Neeraj Gangwar: http://neerajgangwar.in
-.. _Arthur Mensch: https://amensch.fr
-
-.. _Ivan Nazarov: https://github.com/ivannz
diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst
index ca0f8ede93afa..89a9dcf40a0e0 100644
--- a/doc/whats_new/_contributors.rst
+++ b/doc/whats_new/_contributors.rst
@@ -176,4 +176,6 @@
 
 .. _Nicolas Hug: https://github.com/NicolasHug
 
-.. _Guillaume Lemaitre: https://github.com/glemaitre
\ No newline at end of file
+.. _Guillaume Lemaitre: https://github.com/glemaitre
+
+.. _Ivan Nazarov: https://github.com/ivannz
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index add4d97f6de09..21437674e0333 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -1486,6 +1486,10 @@ Support for Python 3.3 has been officially dropped.
   version 0.22 to account better for unscaled features. :issue:`8361` by
   :user:`Gaurav Dhingra <gxyd>` and :user:`Ting Neo <neokt>`.
 
+- |Feature| Added the :class:`svm.SVDD` class for novelty detection based on
+  soft minimal volume hypersphere around the sample data.
+  :user:`Ivan Nazarov <ivannz>`.
+
 
 :mod:`sklearn.tree`
 ...................

From 941ca4b199f75f5c0247838fa66fe4720bf14b4f Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Sat, 7 Oct 2017 01:47:06 +0300
Subject: [PATCH 06/41] review and sync with #9015

---
 doc/modules/svm.rst                   | 19 +++++----
 examples/svm/plot_oneclass_vs_svdd.py | 13 +++---
 sklearn/svm/_classes.py               | 60 +++++++++++++++++++--------
 3 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index b2aa26d11bd3e..2a79b9393dee7 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -404,11 +404,11 @@ Tips on Practical Use
     function can be configured to be almost the same as the :class:`LinearSVC`
     model.
 
-  * **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and
-    :class:`NuSVR`, the size of the kernel cache has a strong impact on run
-    times for larger problems.  If you have enough RAM available, it is
-    recommended to set ``cache_size`` to a higher value than the default of
-    200(MB), such as 500(MB) or 1000(MB).
+  * **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC`,
+    :class:`NuSVR`, :class:`OneClassSVM` and :class:`SVDD` the size of the
+    kernel cache has a strong impact on run times for larger problems.  If
+    you have enough RAM available, it is recommended to set ``cache_size``
+    to a higher value than the default of 200(MB), such as 500(MB) or 1000(MB).
 
 
   * **Setting C**: ``C`` is ``1`` by default and it's a reasonable default
@@ -458,9 +458,10 @@ Tips on Practical Use
     ``probability`` is set to ``True``). This randomness can be controlled
     with the ``random_state`` parameter. If ``probability`` is set to ``False``
     these estimators are not random and ``random_state`` has no effect on the
-    results. The underlying :class:`OneClassSVM` implementation is similar to
-    the ones of :class:`SVC` and :class:`NuSVC`. As no probability estimation
-    is provided for :class:`OneClassSVM`, it is not random.
+    results. The underlying :class:`OneClassSVM` and :class:`SVDD`
+    implementation is similar to the ones of :class:`SVC` and :class:`NuSVC`.
+    As no probability estimation is provided for :class:`OneClassSVM` and
+    :class:`SVDD`, they are not random.
 
     The underlying :class:`LinearSVC` implementation uses a random number
     generator to select features when fitting the model with a dual coordinate
@@ -849,7 +850,7 @@ SVDD
 ----
 
 Support Vector Data Description (SVDD), proposed by Tax and Duin (2004),
-aims at finding a spherically shaped boundary around a data set. Specifially,
+aims at finding a spherically shaped boundary around a data set. Specifically,
 it computes a minimum volume hypersphere containing the most of the data with
 the number of outliers controlled by the parameter of the model.
 
diff --git a/examples/svm/plot_oneclass_vs_svdd.py b/examples/svm/plot_oneclass_vs_svdd.py
index a2d20df63a72a..bc19ad53bdd10 100644
--- a/examples/svm/plot_oneclass_vs_svdd.py
+++ b/examples/svm/plot_oneclass_vs_svdd.py
@@ -17,9 +17,9 @@
 decision functions for non-stationary kernels, e.g. polynomial. This
 example demonstrates this.
 
-Note, that it is incorrect to say that the SVDD generalizes the One-Class
-SVM: these are different models, which just happen to coincide for a
-particular family of kernels.
+Note that it is incorrect to say that the SVDD is equivalent to the
+One-Class SVM: these are different models, which just happen to coincide
+for a particular family of kernels.
 """
 import numpy as np
 import matplotlib.pyplot as plt
@@ -82,12 +82,13 @@
                        zorder=-97, label="learned frontier")
 
         s = 40
-        b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c='white', s=s)
+        b1 = ax.scatter(X_train[:, 0], X_train[:, 1], s=s,
+                        c='white', edgecolors='k')
         b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c='blueviolet', s=s)
         c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c='gold', s=s)
         ax.axis('tight')
-        ax.set_xlim((-7, 7))
-        ax.set_ylim((-7, 7))
+        ax.set_xlim((-6, 6))
+        ax.set_ylim((-6, 6))
 
         ax.set_title("%s %s (%d/200, %d/40, %d/40)"
                      % (model_name, kernel_name, n_error_train,
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 2abefe1dd8d7d..a15146ab7d28f 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1527,7 +1527,7 @@ class OneClassSVM(OutlierMixin, BaseLibSVM):
 
     Estimate the support of a high-dimensional distribution by finding the
     maximum margin soft boundary hyperplane separating a data set from the
-    origin. At most the fraction ``nu`` (``0 < nu <= 1``) of the data
+    origin. At most a fraction ``nu`` (``0 < nu <= 1``) of the data
     are permitted to be outliers.
 
     The implementation is based on libsvm.
@@ -1826,8 +1826,8 @@ class SVDD(BaseLibSVM):
     """Support Vector Data Description (SVDD) for Unsupervised Outlier Detection.
 
     Estimate the support of a high-dimensional distribution by finding the
-    tightest soft boundary hypersphere around a data set, which permits at
-    most the fraction ``nu`` (``0 < nu <= 1``) of the data as outliers.
+    tightest soft hypersphere around a data set, which permits at most a
+    fraction ``nu`` (``0 < nu <= 1``) of the data as outliers.
 
     The implementation is based on libsvm.
 
@@ -1844,7 +1844,7 @@ class SVDD(BaseLibSVM):
 
     nu : float, optional
         An upper bound on the fraction of training errors and a lower bound
-        of the fraction of support vectors. Should be in the interval (0, 1].
+        on the fraction of support vectors. Should be in the interval (0, 1].
         By default 0.5 will be taken.
 
     degree : int, optional (default=3)
@@ -1877,11 +1877,11 @@ class SVDD(BaseLibSVM):
         Hard limit on iterations within solver, or -1 for no limit.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        The seed of the pseudo random number generator to use when shuffling
-        the data.  If int, random_state is the seed used by the random number
-        generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
-        instance used by `np.random`.
+        Ignored.
+
+        .. deprecated:: 0.20
+           ``random_state`` has been deprecated in 0.20 and will be removed in
+           0.22.
 
     Attributes
     ----------
@@ -1891,18 +1891,24 @@ class SVDD(BaseLibSVM):
     support_vectors_ : array-like, shape = [nSV, n_features]
         Support vectors.
 
-    dual_coef_ : array, shape = [n_classes-1, n_SV]
+    dual_coef_ : array, shape = [1, n_SV]
         Coefficients of the support vectors in the decision function.
 
-    coef_ : array, shape = [n_classes-1, n_features]
+    coef_ : array, shape = [1, n_features]
         Weights assigned to the features (coefficients in the primal
         problem). This is only available in the case of a linear kernel.
 
         `coef_` is readonly property derived from `dual_coef_` and
         `support_vectors_`
 
-    intercept_ : array, shape = [n_classes-1]
-        Constants in decision function.
+    intercept_ : array, shape = [1,]
+        The constant in the decision function.
+
+    offset_ : float
+        Offset used to define the decision function from the raw scores.
+        We have the relation: decision_function = score_samples - offset_.
+        The offset is the opposite of intercept_ and is provided for
+        consistency with other outlier detection algorithms.
 
     References
     ----------
@@ -1918,13 +1924,15 @@ class SVDD(BaseLibSVM):
     def __init__(self, kernel='rbf', degree=3, gamma='auto', coef0=0.0,
                  tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
                  verbose=False, max_iter=-1, random_state=None):
+
         super(SVDD, self).__init__(
-            'svdd_l1', kernel, degree, gamma, coef0, tol, 0., nu, 0.,
-            shrinking, False, cache_size, None, verbose, max_iter,
-            random_state)
+            'svdd_l1', kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,
+            tol=tol, C=0., nu=nu, epsilon=0., shrinking=shrinking,
+            probability=False, cache_size=cache_size, class_weight=None,
+            verbose=verbose, max_iter=max_iter, random_state=random_state)
 
     def fit(self, X, y=None, sample_weight=None, **params):
-        """Detects the soft minimum volume hypersphere around the sample X.
+        """Learns the soft minimum volume hypersphere around the sample X.
 
         Parameters
         ----------
@@ -1948,6 +1956,7 @@ def fit(self, X, y=None, sample_weight=None, **params):
         """
         super(SVDD, self).fit(X, np.ones(_num_samples(X)),
                               sample_weight=sample_weight, **params)
+        self.offset_ = -self._intercept_
         return self
 
     def decision_function(self, X):
@@ -1964,9 +1973,24 @@ def decision_function(self, X):
         X : array-like, shape (n_samples,)
             Returns the decision function of the samples.
         """
-        dec = self._decision_function(X)
+        dec = self._decision_function(X).ravel()
         return dec
 
+    def score_samples(self, X):
+        """Raw scoring function of the samples.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+
+        Returns
+        -------
+        score_samples : array-like, shape (n_samples,)
+            Returns the (unshifted) scoring function of the samples.
+        """
+        score_samples = self.decision_function(X) + self.offset_
+        return score_samples
+
     def predict(self, X):
         """
         Perform classification on samples in X.

From e8cd614a993676514cef839dc081f642354c2ec4 Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Mon, 9 Oct 2017 12:32:04 +0300
Subject: [PATCH 07/41] temporary ocSVM-test patch

---
 sklearn/svm/tests/test_svm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index a19285e4b8728..6c3464aa44d11 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -447,7 +447,7 @@ def test_oneclass_and_svdd():
     mesh = np.c_[xx.ravel(), yy.ravel()]
 
     svdd_df = svdd.decision_function(mesh)
-    ocsvm_df = ocsvm.decision_function(mesh)
+    ocsvm_df = ocsvm.decision_function(mesh).ravel()
     assert_array_almost_equal(svdd_df, ocsvm_df)
 
 

From 543c4e68ad1f58d1e1d8aaf088ea182f4071707d Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Tue, 10 Oct 2017 16:09:43 +0300
Subject: [PATCH 08/41] removed 'random_state' from SVDD

---
 sklearn/svm/_classes.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index a15146ab7d28f..23f27a785de11 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1876,13 +1876,6 @@ class SVDD(BaseLibSVM):
     max_iter : int, optional (default=-1)
         Hard limit on iterations within solver, or -1 for no limit.
 
-    random_state : int, RandomState instance or None, optional (default=None)
-        Ignored.
-
-        .. deprecated:: 0.20
-           ``random_state`` has been deprecated in 0.20 and will be removed in
-           0.22.
-
     Attributes
     ----------
     support_ : array-like, shape = [n_SV]
@@ -1923,13 +1916,13 @@ class SVDD(BaseLibSVM):
     """
     def __init__(self, kernel='rbf', degree=3, gamma='auto', coef0=0.0,
                  tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
-                 verbose=False, max_iter=-1, random_state=None):
+                 verbose=False, max_iter=-1):
 
         super(SVDD, self).__init__(
             'svdd_l1', kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,
             tol=tol, C=0., nu=nu, epsilon=0., shrinking=shrinking,
             probability=False, cache_size=cache_size, class_weight=None,
-            verbose=verbose, max_iter=max_iter, random_state=random_state)
+            verbose=verbose, max_iter=max_iter, random_state=None)
 
     def fit(self, X, y=None, sample_weight=None, **params):
         """Learns the soft minimum volume hypersphere around the sample X.

From d7511484f5f6b81cf6dccf8644a29acd7e70f392 Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Tue, 10 Oct 2017 16:59:13 +0300
Subject: [PATCH 09/41] fixed sparse SVDD test

---
 sklearn/svm/tests/test_sparse.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index 5ffaf8f0af08c..d34275528d877 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -348,8 +348,8 @@ def test_sparse_svdd():
     kernels = ["linear", "poly", "rbf", "sigmoid"]
     for dataset in datasets:
         for kernel in kernels:
-            clf = svm.SVDD(kernel=kernel, random_state=0)
-            sp_clf = svm.SVDD(kernel=kernel, random_state=0)
+            clf = svm.SVDD(kernel=kernel)
+            sp_clf = svm.SVDD(kernel=kernel)
             check_svm_model_equal(clf, sp_clf, *dataset)
 
 

From ba1117366293fe72545f9c7239ac251a96b9f7e9 Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Wed, 11 Oct 2017 02:33:50 +0300
Subject: [PATCH 10/41] score_samples() test for the SVDD

---
 sklearn/svm/tests/test_svm.py | 51 ++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 6c3464aa44d11..a11d31cc4d7e5 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -18,7 +18,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.datasets import make_classification, make_blobs
 from sklearn.metrics import f1_score
-from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.metrics.pairwise import rbf_kernel, polynomial_kernel
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import ignore_warnings
 from sklearn.utils.validation import _num_samples
@@ -413,6 +413,55 @@ def test_svdd_decision_function():
     assert_array_equal((dec_func_outliers > 0).ravel(), y_pred_outliers == 1)
 
 
+def test_svdd_score_samples():
+    # Test the raw sample scores of the SVDD
+    # Background: the theoretical decision function score of the SVDD is
+    #  d(x) = R - \|\phi(x) - a\|^2
+    #       = R - \alpha^T Q \alpha / (\nu W)^2 - K(x, x)
+    #           + 2 / (\nu W) \sum_i \alpha_i K(z_i, x)
+    #       = 2 / (\nu W) (-\rho + \sum_i \alpha_i (K(z_i, x) - 0.5 K(x, x)))
+    # where \rho = 0.5 \nu W (\alpha^T Q \alpha / (\nu W)^2 - R), W is the
+    # sum of sample weights and \sum_i \alpha_i = \nu W since \alpha is
+    # feasible.
+    # In contrast, the current implementation returns a scaled score:
+    #  d(x) = 0.5 (\nu W) (R - \|\phi(x) - a\|^2)
+    #       = -\rho + \sum_i \alpha_i (K(z_i, x) - 0.5 K(x, x))
+    # Implicit scaling makes the raw decision function scores of the ocSVM
+    # and SVDD identical when the models coincide (stationary kernel).
+
+    # Generate train data
+    rnd = check_random_state(2)
+    X = 0.3 * rnd.randn(100, 2)
+    X_train = np.r_[X + 2, X - 2]
+
+    # Evaluate the scores on a small uniform 2-d mesh
+    xx, yy = np.meshgrid(np.linspace(-5, 5, num=26),
+                         np.linspace(-5, 5, num=26))
+    X_test = np.c_[xx.ravel(), yy.ravel()]
+
+    # Fit the model for at least 10% support vectors
+    clf = svm.SVDD(nu=0.1, kernel="poly", degree=2, coef0=1.0)
+    clf.fit(X_train)
+
+    # Check score_samples() implementation
+    assert_array_almost_equal(clf.score_samples(X_test),
+                              clf.decision_function(X_test) + clf.offset_)
+
+    # Compute the kernel matrices
+    k_zx = polynomial_kernel(X_train[clf.support_], X_test,
+                             degree=clf.degree, coef0=clf.coef0)
+    k_xx = polynomial_kernel(X_test,
+                             degree=clf.degree, coef0=clf.coef0).diagonal()
+
+    # Compute the sample scores = decision scores without `-\rho`
+    scores_ = np.dot(clf.dual_coef_, k_zx - k_xx[np.newaxis] / 2).ravel()
+    assert_array_almost_equal(clf.score_samples(X_test), scores_)
+
+    # Get the decision function scores
+    decision_ = scores_ + clf.intercept_  # intercept_ = - \rho
+    assert_array_almost_equal(clf.decision_function(X_test), decision_)
+
+
 def test_oneclass_and_svdd():
     # Generate a sample: two symmetrically placed clusters
     rnd = check_random_state(2)

From 1a7083ab8ac86f866aa078e3c8d34248bf32f3b8 Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Fri, 23 Feb 2018 23:47:42 +0300
Subject: [PATCH 11/41] BaseLibSVM interface update

---
 sklearn/svm/_classes.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 23f27a785de11..0d99497ec5058 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1822,7 +1822,7 @@ def _more_tags(self):
         }
 
 
-class SVDD(BaseLibSVM):
+class SVDD(BaseLibSVM, OutlierMixin):
     """Support Vector Data Description (SVDD) for Unsupervised Outlier Detection.
 
     Estimate the support of a high-dimensional distribution by finding the
@@ -1914,12 +1914,15 @@ class SVDD(BaseLibSVM):
            Report, Department of Computer Science, National Taiwan
            University.
     """
+
+    _impl = 'svdd_l1'
+
     def __init__(self, kernel='rbf', degree=3, gamma='auto', coef0=0.0,
                  tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
                  verbose=False, max_iter=-1):
 
         super(SVDD, self).__init__(
-            'svdd_l1', kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,
+            kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,
             tol=tol, C=0., nu=nu, epsilon=0., shrinking=shrinking,
             probability=False, cache_size=cache_size, class_weight=None,
             verbose=verbose, max_iter=max_iter, random_state=None)

From 54725055e98c537c49d2440f68a0ff1f36355444 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Sun, 29 Jul 2018 10:50:46 +0300
Subject: [PATCH 12/41] FIX: Updated the default gamma to reflect #10331 and
 tests, fixed the docstring parameter order

---
 sklearn/svm/_classes.py          | 22 ++++++++++++++--------
 sklearn/svm/tests/test_sparse.py |  4 ++--
 sklearn/svm/tests/test_svm.py    | 24 +++++++++++++++---------
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 0d99497ec5058..891b7ac57bb29 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1842,18 +1842,19 @@ class SVDD(BaseLibSVM, OutlierMixin):
          If none is given, 'rbf' will be used. If a callable is given it is
          used to precompute the kernel matrix.
 
-    nu : float, optional
-        An upper bound on the fraction of training errors and a lower bound
-        on the fraction of support vectors. Should be in the interval (0, 1].
-        By default 0.5 will be taken.
-
     degree : int, optional (default=3)
         Degree of the polynomial kernel function ('poly').
         Ignored by all other kernels.
 
     gamma : float, optional (default='auto')
         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
-        If gamma is 'auto' then 1/n_features will be used instead.
+
+        Current default is 'auto' which uses 1 / n_features,
+        if ``gamma='scale'`` is passed then it uses 1 / (n_features * X.std())
+        as value of gamma. The current default of gamma, 'auto', will change
+        to 'scale' in version 0.22. 'auto_deprecated', a deprecated version of
+        'auto' is used as a default indicating that no explicit value of gamma
+        was passed.
 
     coef0 : float, optional (default=0.0)
         Independent term in kernel function.
@@ -1862,6 +1863,11 @@ class SVDD(BaseLibSVM, OutlierMixin):
     tol : float, optional
         Tolerance for stopping criterion.
 
+    nu : float, optional
+        An upper bound on the fraction of training errors and a lower bound
+        on the fraction of support vectors. Should be in the interval (0, 1].
+        By default 0.5 will be taken.
+
     shrinking : boolean, optional
         Whether to use the shrinking heuristic.
 
@@ -1917,8 +1923,8 @@ class SVDD(BaseLibSVM, OutlierMixin):
 
     _impl = 'svdd_l1'
 
-    def __init__(self, kernel='rbf', degree=3, gamma='auto', coef0=0.0,
-                 tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
+    def __init__(self, kernel='rbf', degree=3, gamma='auto_deprecated',
+                 coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
                  verbose=False, max_iter=-1):
 
         super(SVDD, self).__init__(
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index d34275528d877..893cddff71b6c 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -348,8 +348,8 @@ def test_sparse_svdd():
     kernels = ["linear", "poly", "rbf", "sigmoid"]
     for dataset in datasets:
         for kernel in kernels:
-            clf = svm.SVDD(kernel=kernel)
-            sp_clf = svm.SVDD(kernel=kernel)
+            clf = svm.SVDD(gamma='scale', kernel=kernel)
+            sp_clf = svm.SVDD(gamma='scale', kernel=kernel)
             check_svm_model_equal(clf, sp_clf, *dataset)
 
 
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index a11d31cc4d7e5..709b892b10420 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -364,15 +364,15 @@ def test_oneclass_fit_params_is_deprecated():
 
 def test_svdd():
     # Test the output of libsvm for the SVDD problem with default parameters
-    clf = svm.SVDD()
+    clf = svm.SVDD(gamma='scale')
     clf.fit(X)
     pred = clf.predict(T)
 
     assert_array_equal(pred, [-1, -1, -1])
     assert_equal(pred.dtype, np.dtype('intp'))
-    assert_array_almost_equal(clf.intercept_, [0.491], decimal=3)
+    assert_array_almost_equal(clf.intercept_, [0.383], decimal=3)
     assert_array_almost_equal(clf.dual_coef_,
-                              [[0.632, 0.233, 0.633, 0.234, 0.632, 0.633]],
+                              [[0.681, 0.139, 0.680, 0.140, 0.680, 0.680]],
                               decimal=3)
     assert_false(hasattr(clf, "coef_"))
 
@@ -397,7 +397,8 @@ def test_svdd_decision_function():
     X_outliers = rnd.uniform(low=-4, high=4, size=(20, 2))
 
     # fit the model
-    clf = svm.SVDD(nu=0.1, kernel="poly", degree=2, coef0=1.0).fit(X_train)
+    clf = svm.SVDD(gamma='scale', nu=0.1,
+                   kernel="poly", degree=2, coef0=1.0).fit(X_train)
 
     # predict and validate things
     y_pred_test = clf.predict(X_test)
@@ -440,17 +441,22 @@ def test_svdd_score_samples():
     X_test = np.c_[xx.ravel(), yy.ravel()]
 
     # Fit the model for at least 10% support vectors
-    clf = svm.SVDD(nu=0.1, kernel="poly", degree=2, coef0=1.0)
+    clf = svm.SVDD(nu=0.1, kernel="poly", gamma='scale', degree=2, coef0=1.0)
     clf.fit(X_train)
 
     # Check score_samples() implementation
     assert_array_almost_equal(clf.score_samples(X_test),
                               clf.decision_function(X_test) + clf.offset_)
 
+    # Test the gamma="scale"
+    gamma = 1.0 / (X.shape[1] * X_train.std())
+
+    assert_almost_equal(clf._gamma, gamma)
+
     # Compute the kernel matrices
     k_zx = polynomial_kernel(X_train[clf.support_], X_test,
-                             degree=clf.degree, coef0=clf.coef0)
-    k_xx = polynomial_kernel(X_test,
+                             gamma=gamma, degree=clf.degree, coef0=clf.coef0)
+    k_xx = polynomial_kernel(X_test, gamma=gamma,
                              degree=clf.degree, coef0=clf.coef0).diagonal()
 
     # Compute the sample scores = decision scores without `-\rho`
@@ -472,10 +478,10 @@ def test_oneclass_and_svdd():
     # Test the output of libsvm for the SVDD and the One-Class SVM
     nu = 0.15
 
-    svdd = svm.SVDD(nu=nu, kernel="rbf")
+    svdd = svm.SVDD(nu=nu, kernel="rbf", gamma="scale")
     svdd.fit(X_train)
 
-    ocsvm = svm.OneClassSVM(nu=nu, kernel="rbf")
+    ocsvm = svm.OneClassSVM(nu=nu, kernel="rbf", gamma="scale")
     ocsvm.fit(X_train)
 
     # The intercept of the SVDD differs from that of the One-Class SVM:

From a959082cd074dcdbd98d11979183d937fcb21ef9 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Fri, 7 Dec 2018 10:54:08 +0300
Subject: [PATCH 13/41] TEST: fixed assertion in test_svm.py to reflect #12717

---
 sklearn/svm/tests/test_svm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 709b892b10420..29afb9e69458c 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -374,7 +374,7 @@ def test_svdd():
     assert_array_almost_equal(clf.dual_coef_,
                               [[0.681, 0.139, 0.680, 0.140, 0.680, 0.680]],
                               decimal=3)
-    assert_false(hasattr(clf, "coef_"))
+    assert not hasattr(clf, "coef_")
 
 
 def test_svdd_decision_function():

From 355c548c7536db2cd12178d9fb56c51c639d3971 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Tue, 19 Mar 2019 12:54:58 +0300
Subject: [PATCH 14/41] TST Fixed SVDD tests affected by scale redefinition in
 #13221

---
 sklearn/svm/tests/test_svm.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 29afb9e69458c..5698332dada8c 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -368,11 +368,11 @@ def test_svdd():
     clf.fit(X)
     pred = clf.predict(T)
 
-    assert_array_equal(pred, [-1, -1, -1])
+    assert_array_equal(pred, [+1, -1, -1])
     assert_equal(pred.dtype, np.dtype('intp'))
-    assert_array_almost_equal(clf.intercept_, [0.383], decimal=3)
+    assert_array_almost_equal(clf.intercept_, [0.2817], decimal=3)
     assert_array_almost_equal(clf.dual_coef_,
-                              [[0.681, 0.139, 0.680, 0.140, 0.680, 0.680]],
+                              [[0.7500, 0.7499, 0.7499, 0.7500]],
                               decimal=3)
     assert not hasattr(clf, "coef_")
 
@@ -405,7 +405,7 @@ def test_svdd_decision_function():
     assert_greater(np.mean(y_pred_test == 1), .9)
 
     y_pred_outliers = clf.predict(X_outliers)
-    assert_greater(np.mean(y_pred_outliers == -1), .8)
+    assert_greater(np.mean(y_pred_outliers == -1), .65)
 
     dec_func_test = clf.decision_function(X_test)
     assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1)
@@ -448,8 +448,8 @@ def test_svdd_score_samples():
     assert_array_almost_equal(clf.score_samples(X_test),
                               clf.decision_function(X_test) + clf.offset_)
 
-    # Test the gamma="scale"
-    gamma = 1.0 / (X.shape[1] * X_train.std())
+    # Test the gamma="scale": use .var() for scaling (c.f. issue #12741)
+    gamma = 1.0 / (X.shape[1] * X_train.var())
 
     assert_almost_equal(clf._gamma, gamma)
 

From d765a406eed4f1bc05fa288dc72279b9be55fb78 Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Thu, 25 Jul 2019 11:37:44 +0300
Subject: [PATCH 15/41] updated docstrings and default parameters

---
 sklearn/svm/_classes.py | 43 ++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 891b7ac57bb29..82f422ce4a5db 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1837,8 +1837,8 @@ class SVDD(BaseLibSVM, OutlierMixin):
     ----------
     kernel : string, optional (default='rbf')
          Specifies the kernel type to be used in the algorithm.
-         It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'
-         or a callable.
+         It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or
+         a callable.
          If none is given, 'rbf' will be used. If a callable is given it is
          used to precompute the kernel matrix.
 
@@ -1846,15 +1846,15 @@ class SVDD(BaseLibSVM, OutlierMixin):
         Degree of the polynomial kernel function ('poly').
         Ignored by all other kernels.
 
-    gamma : float, optional (default='auto')
+    gamma : {'scale', 'auto'} or float, optional (default='scale')
         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
 
-        Current default is 'auto' which uses 1 / n_features,
-        if ``gamma='scale'`` is passed then it uses 1 / (n_features * X.std())
-        as value of gamma. The current default of gamma, 'auto', will change
-        to 'scale' in version 0.22. 'auto_deprecated', a deprecated version of
-        'auto' is used as a default indicating that no explicit value of gamma
-        was passed.
+        - if ``gamma='scale'`` (default) is passed then it uses
+          1 / (n_features * X.var()) as value of gamma,
+        - if 'auto', uses 1 / n_features.
+
+        .. versionchanged:: 0.22
+           The default value of ``gamma`` changed from 'auto' to 'scale'.
 
     coef0 : float, optional (default=0.0)
         Independent term in kernel function.
@@ -1864,9 +1864,10 @@ class SVDD(BaseLibSVM, OutlierMixin):
         Tolerance for stopping criterion.
 
     nu : float, optional
-        An upper bound on the fraction of training errors and a lower bound
-        on the fraction of support vectors. Should be in the interval (0, 1].
-        By default 0.5 will be taken.
+        An upper bound on the fraction of training
+        errors and a lower bound of the fraction of support
+        vectors. Should be in the interval (0, 1]. By default 0.5
+        will be taken.
 
     shrinking : boolean, optional
         Whether to use the shrinking heuristic.
@@ -1905,10 +1906,20 @@ class SVDD(BaseLibSVM, OutlierMixin):
 
     offset_ : float
         Offset used to define the decision function from the raw scores.
-        We have the relation: decision_function = score_samples - offset_.
-        The offset is the opposite of intercept_ and is provided for
+        We have the relation: decision_function = score_samples - `offset_`.
+        The offset is the opposite of `intercept_` and is provided for
         consistency with other outlier detection algorithms.
 
+    Examples
+    --------
+    >>> from sklearn.svm import SVDD
+    >>> X = [[0], [0.44], [0.45], [0.46], [1]]
+    >>> clf = OneClassSVM(gamma='auto').fit(X)
+    >>> clf.predict(X)
+    array([-1,  1,  1,  1, -1])
+    >>> clf.score_samples(X)  # doctest: +ELLIPSIS
+    array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])
+
     References
     ----------
     .. [1] Tax, D.M. and Duin, R.P., 2004. "Support vector data
@@ -1923,11 +1934,11 @@ class SVDD(BaseLibSVM, OutlierMixin):
 
     _impl = 'svdd_l1'
 
-    def __init__(self, kernel='rbf', degree=3, gamma='auto_deprecated',
+    def __init__(self, kernel='rbf', degree=3, gamma='scale',
                  coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
                  verbose=False, max_iter=-1):
 
-        super(SVDD, self).__init__(
+        super().__init__(
             kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,
             tol=tol, C=0., nu=nu, epsilon=0., shrinking=shrinking,
             probability=False, cache_size=cache_size, class_weight=None,

From bba31b608902937228d19662cd3a3a7625311263 Mon Sep 17 00:00:00 2001
From: Ivan <ivannz@yandex.ru>
Date: Thu, 25 Jul 2019 12:01:50 +0300
Subject: [PATCH 16/41] TST fixed legacy asserts _eq and _gt in SVDD realted
 tests

---
 sklearn/svm/tests/test_svm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 5698332dada8c..111ae55532913 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -369,7 +369,7 @@ def test_svdd():
     pred = clf.predict(T)
 
     assert_array_equal(pred, [+1, -1, -1])
-    assert_equal(pred.dtype, np.dtype('intp'))
+    assert pred.dtype == np.dtype('intp')
     assert_array_almost_equal(clf.intercept_, [0.2817], decimal=3)
     assert_array_almost_equal(clf.dual_coef_,
                               [[0.7500, 0.7499, 0.7499, 0.7500]],
@@ -402,10 +402,10 @@ def test_svdd_decision_function():
 
     # predict and validate things
     y_pred_test = clf.predict(X_test)
-    assert_greater(np.mean(y_pred_test == 1), .9)
+    assert np.mean(y_pred_test == 1) > .9
 
     y_pred_outliers = clf.predict(X_outliers)
-    assert_greater(np.mean(y_pred_outliers == -1), .65)
+    assert np.mean(y_pred_outliers == -1) > .65
 
     dec_func_test = clf.decision_function(X_test)
     assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1)

From 71ecce1da9728b998973596a8eac1b94d006ed6b Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannz@yandex.ru>
Date: Sat, 5 Oct 2019 21:07:55 +0300
Subject: [PATCH 17/41] Update MRO in SVDD to satisfy #14884

---
 sklearn/svm/_classes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 82f422ce4a5db..35292c0871433 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1822,7 +1822,7 @@ def _more_tags(self):
         }
 
 
-class SVDD(BaseLibSVM, OutlierMixin):
+class SVDD(OutlierMixin, BaseLibSVM):
     """Support Vector Data Description (SVDD) for Unsupervised Outlier Detection.
 
     Estimate the support of a high-dimensional distribution by finding the

From 8c60b69adc235efea6665bd4936f37cf04c6582e Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannz@yandex.ru>
Date: Fri, 1 Nov 2019 12:43:29 +0300
Subject: [PATCH 18/41] Simplified super() calls according to #12812

---
 sklearn/svm/_classes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 35292c0871433..548d39d54ea94 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1967,8 +1967,8 @@ def fit(self, X, y=None, sample_weight=None, **params):
         If X is not a C-ordered contiguous array it is copied.
 
         """
-        super(SVDD, self).fit(X, np.ones(_num_samples(X)),
-                              sample_weight=sample_weight, **params)
+        super().fit(X, np.ones(_num_samples(X)),
+                    sample_weight=sample_weight, **params)
         self.offset_ = -self._intercept_
         return self
 
@@ -2021,5 +2021,5 @@ def predict(self, X):
         y_pred : array, shape (n_samples,)
             Class labels for samples in X.
         """
-        y = super(SVDD, self).predict(X)
+        y = super().predict(X)
         return np.asarray(y, dtype=np.intp)

From fab45389c0550cbd133cf3e8f7b328422ba16097 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Fri, 10 Jan 2020 11:41:46 +0300
Subject: [PATCH 19/41] DOC new docstring guidelines in svdd (according to
 #16060)

---
 sklearn/svm/_classes.py | 69 +++++++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 31 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 548d39d54ea94..bbab979848852 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1823,7 +1823,7 @@ def _more_tags(self):
 
 
 class SVDD(OutlierMixin, BaseLibSVM):
-    """Support Vector Data Description (SVDD) for Unsupervised Outlier Detection.
+    """Support Vector Data Description for Unsupervised Outlier Detection.
 
     Estimate the support of a high-dimensional distribution by finding the
     tightest soft hypersphere around a data set, which permits at most a
@@ -1835,18 +1835,18 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     Parameters
     ----------
-    kernel : string, optional (default='rbf')
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'
          Specifies the kernel type to be used in the algorithm.
          It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or
          a callable.
          If none is given, 'rbf' will be used. If a callable is given it is
          used to precompute the kernel matrix.
 
-    degree : int, optional (default=3)
+    degree : int, default=3
         Degree of the polynomial kernel function ('poly').
         Ignored by all other kernels.
 
-    gamma : {'scale', 'auto'} or float, optional (default='scale')
+    gamma : {'scale', 'auto'} or float, default='scale'
         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
 
         - if ``gamma='scale'`` (default) is passed then it uses
@@ -1856,52 +1856,52 @@ class SVDD(OutlierMixin, BaseLibSVM):
         .. versionchanged:: 0.22
            The default value of ``gamma`` changed from 'auto' to 'scale'.
 
-    coef0 : float, optional (default=0.0)
+    coef0 : float, default=0.0
         Independent term in kernel function.
         It is only significant in 'poly' and 'sigmoid'.
 
-    tol : float, optional
+    tol : float, default=1e-3
         Tolerance for stopping criterion.
 
-    nu : float, optional
+    nu : float, default=0.5
         An upper bound on the fraction of training
         errors and a lower bound of the fraction of support
         vectors. Should be in the interval (0, 1]. By default 0.5
         will be taken.
 
-    shrinking : boolean, optional
+    shrinking : bool, default=True
         Whether to use the shrinking heuristic.
 
-    cache_size : float, optional
+    cache_size : float, default=200
         Specify the size of the kernel cache (in MB).
 
-    verbose : bool, default: False
+    verbose : bool, default=False
         Enable verbose output. Note that this setting takes advantage of a
         per-process runtime setting in libsvm that, if enabled, may not work
         properly in a multithreaded context.
 
-    max_iter : int, optional (default=-1)
+    max_iter : int, default=-1
         Hard limit on iterations within solver, or -1 for no limit.
 
     Attributes
     ----------
-    support_ : array-like, shape = [n_SV]
+    support_ : ndarray of shape (n_SV,)
         Indices of support vectors.
 
-    support_vectors_ : array-like, shape = [nSV, n_features]
+    support_vectors_ : ndarray of shape (n_SV, n_features)
         Support vectors.
 
-    dual_coef_ : array, shape = [1, n_SV]
+    dual_coef_ : ndarray of shape (1, n_SV)
         Coefficients of the support vectors in the decision function.
 
-    coef_ : array, shape = [1, n_features]
+    coef_ : ndarray of shape (1, n_features)
         Weights assigned to the features (coefficients in the primal
         problem). This is only available in the case of a linear kernel.
 
         `coef_` is readonly property derived from `dual_coef_` and
         `support_vectors_`
 
-    intercept_ : array, shape = [1,]
+    intercept_ : ndarray of shape (1,)
         The constant in the decision function.
 
     offset_ : float
@@ -1910,6 +1910,9 @@ class SVDD(OutlierMixin, BaseLibSVM):
         The offset is the opposite of `intercept_` and is provided for
         consistency with other outlier detection algorithms.
 
+    fit_status_ : int
+        0 if correctly fitted, 1 otherwise (will raise warning)
+
     Examples
     --------
     >>> from sklearn.svm import SVDD
@@ -1949,18 +1952,20 @@ def fit(self, X, y=None, sample_weight=None, **params):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
             Set of samples, where n_samples is the number of samples and
             n_features is the number of features.
 
-        sample_weight : array-like, shape (n_samples,)
-            Per-sample weights. Higher weights force the novelty detector
-            to put more emphasis on these points.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Per-sample weights. Rescale C per sample. Higher weights
+            force the classifier to put more emphasis on these points.
+
+        y : Ignored
+            not used, present for API consistency by convention.
 
         Returns
         -------
         self : object
-            Returns self.
 
         Notes
         -----
@@ -1979,11 +1984,12 @@ def decision_function(self, X):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
+        X : array-like of shape (n_samples, n_features)
+            The data matrix.
 
         Returns
         -------
-        X : array-like, shape (n_samples,)
+        dec : ndarray of shape (n_samples,)
             Returns the decision function of the samples.
         """
         dec = self._decision_function(X).ravel()
@@ -1994,31 +2000,32 @@ def score_samples(self, X):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
+        X : array-like of shape (n_samples, n_features)
+            The data matrix.
 
         Returns
         -------
-        score_samples : array-like, shape (n_samples,)
+        score_samples : ndarray of shape (n_samples,)
             Returns the (unshifted) scoring function of the samples.
         """
         score_samples = self.decision_function(X) + self.offset_
         return score_samples
 
     def predict(self, X):
-        """
-        Perform classification on samples in X.
+        """Perform classification on samples in X.
 
-        For an one-class model, +1 or -1 is returned.
+        For a one-class model, +1 or -1 is returned.
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \
+                (n_samples_test, n_samples_train)
             For kernel="precomputed", the expected shape of X is
-            [n_samples_test, n_samples_train]
+            (n_samples_test, n_samples_train).
 
         Returns
         -------
-        y_pred : array, shape (n_samples,)
+        y_pred : ndarray of shape (n_samples,)
             Class labels for samples in X.
         """
         y = super().predict(X)

From 7082a56b73b8e0d289d09b28f7fa3953db90690a Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Fri, 10 Jan 2020 11:51:09 +0300
Subject: [PATCH 20/41] DOCTEST fixed object and corrected reference scores

---
 sklearn/svm/_classes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index bbab979848852..a75482a7eb380 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1917,11 +1917,11 @@ class SVDD(OutlierMixin, BaseLibSVM):
     --------
     >>> from sklearn.svm import SVDD
     >>> X = [[0], [0.44], [0.45], [0.46], [1]]
-    >>> clf = OneClassSVM(gamma='auto').fit(X)
+    >>> clf = SVDD(gamma='auto').fit(X)
     >>> clf.predict(X)
     array([-1,  1,  1,  1, -1])
     >>> clf.score_samples(X)  # doctest: +ELLIPSIS
-    array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])
+    array([0.5298..., 0.8047..., 0.8056..., 0.8061..., 0.4832...])
 
     References
     ----------

From dbbc90bd6eccd7e01e9e1b65aae43662044866c3 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Fri, 10 Jan 2020 13:38:21 +0300
Subject: [PATCH 21/41] fixed sphinx warnings due to bad indentation for
 circleci

---
 doc/modules/svm.rst | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index 2a79b9393dee7..2f2ef8474a776 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -831,17 +831,17 @@ where :math:`+1` indicates an inliner and :math:`-1` an outlier.
 
 The parameter :math:`\nu\in(0,1]` determines the fraction of outliers
 in the training dataset. More technically :math:`\nu` is:
-  * an upper bound on the fraction of the training points lying outside
-    the estimated region;
 
-  * a lower bound on the fraction of support vectors.
+  - an upper bound on the fraction of the training points lying outside
+    the estimated region;
+  - a lower bound on the fraction of support vectors.
 
 .. topic:: References:
 
-  * `Estimating the support of a high-dimensional distribution
-    <http://dl.acm.org/citation.cfm?id=1119749>`_ Schölkopf,
-    Bernhard, et al. Neural computation 13.7 (2001): 1443-1471.
-    doi:10.1162/089976601750264965
+ * `Estimating the support of a high-dimensional distribution
+   <http://dl.acm.org/citation.cfm?id=1119749>`_ Schölkopf,
+   Bernhard, et al. Neural computation 13.7 (2001): 1443-1471.
+   doi:10.1162/089976601750264965
 
 
 .. _svm_svdd:
@@ -945,14 +945,14 @@ for a particular family of kernels (see :ref:`outlier_detection_ocsvm_vs_svdd`).
 
 .. topic:: References:
 
-  * `Support vector data description
-    <http://dx.doi.org/10.1023/B:MACH.0000008084.60811.49>`_
-    Tax, and Duin. Machine learning, 54(1) (2004), pp.45-66.
+ * `Support vector data description
+   <http://dx.doi.org/10.1023/B:MACH.0000008084.60811.49>`_
+   Tax, and Duin. Machine learning, 54(1) (2004), pp.45-66.
 
-  * `A revisit to support vector data description (SVDD).
-    <http://w.csie.org/~cjlin/papers/svdd.pdf>`_ Chang, Lee,
-    and Lin. Technical Report (2013), Dept. of Computer Science,
-    National Taiwan University.
+ * `A revisit to support vector data description (SVDD).
+   <http://w.csie.org/~cjlin/papers/svdd.pdf>`_ Chang, Lee,
+   and Lin. Technical Report (2013), Dept. of Computer Science,
+   National Taiwan University.
 
 
 .. _svm_implementation_details:

From c13582dce61c1b08059862815f9b7f44ff00bd8e Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Thu, 30 Jan 2020 23:00:32 +0300
Subject: [PATCH 22/41] fixed unresolved conflict

---
 sklearn/svm/_libsvm.pyx | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx
index 4ca510c9dcdf3..a2b0b7d0a82ff 100644
--- a/sklearn/svm/_libsvm.pyx
+++ b/sklearn/svm/_libsvm.pyx
@@ -74,15 +74,9 @@ def fit(
     Y : array, dtype=float64 of shape (n_samples,)
         target vector
 
-<<<<<<< HEAD
-    svm_type : {0, 1, 2, 3, 4}, default=0
-        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
-        respectively.
-=======
     svm_type : {0, 1, 2, 3, 4, 5}, optional
         Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR, NuSVR, or
         SVDD-L1 respectively. 0 by default.
->>>>>>> ENH: nu-SVDD with sample weights, based on Chang, Lee, Lin (2013)
 
     kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default="rbf"
         Kernel to use in the model: linear, polynomial, RBF, sigmoid
@@ -614,18 +608,12 @@ def cross_validation(
     Y : array, dtype=float of shape (n_samples,)
         target vector
 
-<<<<<<< HEAD
     n_fold : int32
         Number of folds for cross validation.
-=======
+
     svm_type : {0, 1, 2, 3, 4, 5}
         Type of SVM: C SVC, nu SVC, one class, epsilon SVR, nu SVR,
         or SVDD-L1.
->>>>>>> ENH: nu-SVDD with sample weights, based on Chang, Lee, Lin (2013)
-
-    svm_type : {0, 1, 2, 3, 4}, default=0
-        Type of SVM: C_SVC, NuSVC, OneClassSVM, EpsilonSVR or NuSVR
-        respectively.
 
     kernel : {'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'}, default='rbf'
         Kernel to use in the model: linear, polynomial, RBF, sigmoid

From 90085e045e4280f86c53bca22a31fac4bf408192 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Thu, 30 Jan 2020 23:01:24 +0300
Subject: [PATCH 23/41] fixed unused kwarg warning

---
 examples/svm/plot_oneclass_vs_svdd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/svm/plot_oneclass_vs_svdd.py b/examples/svm/plot_oneclass_vs_svdd.py
index bc19ad53bdd10..9f9b31e16fc71 100644
--- a/examples/svm/plot_oneclass_vs_svdd.py
+++ b/examples/svm/plot_oneclass_vs_svdd.py
@@ -79,7 +79,7 @@
         ax.contourf(xx, yy, Z, levels=[0, Z.max()], colors='palevioletred',
                     zorder=-98)
         a = ax.contour(xx, yy, Z, levels=[0], linewidths=2, colors='darkred',
-                       zorder=-97, label="learned frontier")
+                       zorder=-97)
 
         s = 40
         b1 = ax.scatter(X_train[:, 0], X_train[:, 1], s=s,

From 84a1dcbda838168acd0ca0471be6bc375d1835b9 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Thu, 30 Jan 2020 23:08:36 +0300
Subject: [PATCH 24/41] some oneliners

---
 sklearn/svm/_classes.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index a75482a7eb380..9b7aa17d68ace 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1992,8 +1992,7 @@ def decision_function(self, X):
         dec : ndarray of shape (n_samples,)
             Returns the decision function of the samples.
         """
-        dec = self._decision_function(X).ravel()
-        return dec
+        return self._decision_function(X).ravel()
 
     def score_samples(self, X):
         """Raw scoring function of the samples.
@@ -2008,8 +2007,7 @@ def score_samples(self, X):
         score_samples : ndarray of shape (n_samples,)
             Returns the (unshifted) scoring function of the samples.
         """
-        score_samples = self.decision_function(X) + self.offset_
-        return score_samples
+        return self.decision_function(X) + self.offset_
 
     def predict(self, X):
         """Perform classification on samples in X.

From 3d39584240c978c4fe1d809ae2000fdaaa5b0d53 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Thu, 30 Jan 2020 23:27:12 +0300
Subject: [PATCH 25/41] side-by-side comparison of scsvm with svdd (stationary
 kernel)

---
 examples/miscellaneous/plot_anomaly_comparison.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/miscellaneous/plot_anomaly_comparison.py b/examples/miscellaneous/plot_anomaly_comparison.py
index efb4f6d86edfc..3f42cd8e54c2c 100644
--- a/examples/miscellaneous/plot_anomaly_comparison.py
+++ b/examples/miscellaneous/plot_anomaly_comparison.py
@@ -108,6 +108,7 @@
             ),
         ),
     ),
+    ("SVDD", svm.SVDD(nu=outliers_fraction, kernel="rbf", gamma=0.1)),
     (
         "Isolation Forest",
         IsolationForest(contamination=outliers_fraction, random_state=42),

From 4d7217d64c23c5cf7760e29e80e58cf0bf8fbf4a Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Fri, 31 Jan 2020 00:04:09 +0300
Subject: [PATCH 26/41] moved SVDD announcement from v0.20 to v0.23

---
 doc/whats_new/v0.20.rst | 4 ----
 doc/whats_new/v0.23.rst | 4 ++++
 sklearn/svm/_classes.py | 5 ++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 21437674e0333..add4d97f6de09 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -1486,10 +1486,6 @@ Support for Python 3.3 has been officially dropped.
   version 0.22 to account better for unscaled features. :issue:`8361` by
   :user:`Gaurav Dhingra <gxyd>` and :user:`Ting Neo <neokt>`.
 
-- |Feature| Added the :class:`svm.SVDD` class for novelty detection based on
-  soft minimal volume hypersphere around the sample data.
-  :user:`Ivan Nazarov <ivannz>`.
-
 
 :mod:`sklearn.tree`
 ...................
diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst
index ebf63eac5b8a3..4fef8e3e0a90c 100644
--- a/doc/whats_new/v0.23.rst
+++ b/doc/whats_new/v0.23.rst
@@ -721,6 +721,10 @@ Changelog
   `probB_`, are now deprecated as they were not useful. :pr:`15558` by
   `Thomas Fan`_.
 
+- |Feature| Added the :class:`svm.SVDD` class for novelty detection based on
+  soft minimal volume hypersphere around the sample data.
+  :user:`Ivan Nazarov <ivannz>`.
+
 :mod:`sklearn.tree`
 ...................
 
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 9b7aa17d68ace..afe29f23ba5b0 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1833,6 +1833,8 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     Read more in the :ref:`User Guide <svm_outlier_detection>`.
 
+    ..versionadded: 0.23
+
     Parameters
     ----------
     kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'
@@ -1853,9 +1855,6 @@ class SVDD(OutlierMixin, BaseLibSVM):
           1 / (n_features * X.var()) as value of gamma,
         - if 'auto', uses 1 / n_features.
 
-        .. versionchanged:: 0.22
-           The default value of ``gamma`` changed from 'auto' to 'scale'.
-
     coef0 : float, default=0.0
         Independent term in kernel function.
         It is only significant in 'poly' and 'sigmoid'.

From 2654479e215ac15b44fd59181c4d8626c8b432b9 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Fri, 31 Jan 2020 00:30:05 +0300
Subject: [PATCH 27/41] removed hardcoded sample sizes

---
 examples/svm/plot_oneclass_vs_svdd.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/svm/plot_oneclass_vs_svdd.py b/examples/svm/plot_oneclass_vs_svdd.py
index 9f9b31e16fc71..7353a1d09aae3 100644
--- a/examples/svm/plot_oneclass_vs_svdd.py
+++ b/examples/svm/plot_oneclass_vs_svdd.py
@@ -90,9 +90,11 @@
         ax.set_xlim((-6, 6))
         ax.set_ylim((-6, 6))
 
-        ax.set_title("%s %s (%d/200, %d/40, %d/40)"
-                     % (model_name, kernel_name, n_error_train,
-                        n_error_test, n_error_outliers))
+        ax.set_title("%s %s (%d/%d, %d/%d, %d/%d)"
+                     % (model_name, kernel_name,
+                        n_error_train, len(X_train),
+                        n_error_test, len(X_test),
+                        n_error_outliers, len(X_outliers)))
 
         ax.legend([a.collections[0], b1, b2, c],
                   ["learned frontier", "training observations",

From 1e54626decf7e541532596ec3e57c099ab69ab3c Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Sat, 30 May 2020 18:29:25 +0300
Subject: [PATCH 28/41] patches to svdd-l1 reflecting #14286, #16530, #16992
 and #16973

---
 sklearn/svm/_classes.py        |  4 ++--
 sklearn/svm/src/libsvm/svm.cpp | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index afe29f23ba5b0..ea452574a3f7a 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1919,7 +1919,7 @@ class SVDD(OutlierMixin, BaseLibSVM):
     >>> clf = SVDD(gamma='auto').fit(X)
     >>> clf.predict(X)
     array([-1,  1,  1,  1, -1])
-    >>> clf.score_samples(X)  # doctest: +ELLIPSIS
+    >>> clf.score_samples(X)
     array([0.5298..., 0.8047..., 0.8056..., 0.8061..., 0.4832...])
 
     References
@@ -1936,7 +1936,7 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     _impl = 'svdd_l1'
 
-    def __init__(self, kernel='rbf', degree=3, gamma='scale',
+    def __init__(self, *, kernel='rbf', degree=3, gamma='scale',
                  coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
                  verbose=False, max_iter=-1):
 
diff --git a/sklearn/svm/src/libsvm/svm.cpp b/sklearn/svm/src/libsvm/svm.cpp
index 8becae88ece14..21bec8bd93d7d 100644
--- a/sklearn/svm/src/libsvm/svm.cpp
+++ b/sklearn/svm/src/libsvm/svm.cpp
@@ -1840,14 +1840,14 @@ static void solve_nu_svr(
 
 static void solve_svdd_l1(
 	const PREFIX(problem) *prob, const svm_parameter *param,
-	double *alpha, Solver::SolutionInfo* si)
+	double *alpha, Solver::SolutionInfo* si, BlasFunctions *blas_functions)
 {
 	int l = prob->l;
 	int i, j;
 
 	double r_square;
 
-	ONE_CLASS_Q Q = ONE_CLASS_Q(*prob, *param);
+	ONE_CLASS_Q Q = ONE_CLASS_Q(*prob, *param, blas_functions);
 
 	if(param->nu < 1) {
 		// case \nu < 1: the dual problem is
@@ -1990,7 +1990,7 @@ static decision_function svm_train_one(
  			break;
 		case SVDD_L1:
 			si.upper_bound = Malloc(double,prob->l);
-			solve_svdd_l1(prob,param,alpha,&si);
+			solve_svdd_l1(prob,param,alpha,&si,blas_functions);
 			break;
 	}
 
@@ -2945,7 +2945,7 @@ double PREFIX(predict_values)(const PREFIX(model) *model, const PREFIX(node) *x,
 
 		if(model->param.svm_type == SVDD_L1)
 		{
-			double K_xx = NAMESPACE::Kernel::k_function(x,x,model->param) / 2;
+			double K_xx = NAMESPACE::Kernel::k_function(x,x,model->param,blas_functions) / 2;
 			for(int i=0;i<model->l;i++)
 				sum -= sv_coef[i] * K_xx;
 		}
@@ -3266,7 +3266,8 @@ const char *PREFIX(check_parameter)(const PREFIX(problem) *prob, const svm_param
 	if(svm_type == C_SVC ||
 	   svm_type == EPSILON_SVR ||
 	   svm_type == NU_SVR ||
-	   svm_type == ONE_CLASS)
+	   svm_type == ONE_CLASS ||
+	   svm_type == SVDD_L1)
 	{
 		PREFIX(problem) newprob;
 		// filter samples with negative and null weights 

From 7a0ede0df3fc7919cbd40842798288394df5833d Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Sun, 5 Jul 2020 14:39:44 +0300
Subject: [PATCH 29/41] reflect #17176: zero weight in SV models means that a
 sample is never a support vector

---
 sklearn/svm/_classes.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index ea452574a3f7a..e6413a45100f3 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -2027,3 +2027,11 @@ def predict(self, X):
         """
         y = super().predict(X)
         return np.asarray(y, dtype=np.intp)
+
+    def _more_tags(self):
+        return {
+            '_xfail_checks': {
+                'check_sample_weights_invariance(kind=zeros)':
+                'zero sample_weight is not equivalent to removing samples',
+            }
+        }

From 6bf0fb549e33b40433ffe2e95c8c0b3612454c89 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Sun, 5 Jul 2020 14:42:34 +0300
Subject: [PATCH 30/41] reflect #15521: document attribtues inherited by SVDD
 from BaseLibSVM

---
 sklearn/svm/_classes.py | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index e6413a45100f3..b87bb35b5fd88 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1870,6 +1870,7 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     shrinking : bool, default=True
         Whether to use the shrinking heuristic.
+        See the :ref:`User Guide <shrinking_svm>`.
 
     cache_size : float, default=200
         Specify the size of the kernel cache (in MB).
@@ -1884,33 +1885,43 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     Attributes
     ----------
-    support_ : ndarray of shape (n_SV,)
-        Indices of support vectors.
-
-    support_vectors_ : ndarray of shape (n_SV, n_features)
-        Support vectors.
-
-    dual_coef_ : ndarray of shape (1, n_SV)
-        Coefficients of the support vectors in the decision function.
+    class_weight_ : ndarray of shape (n_classes,)
+        Multipliers of parameter C for each class.
+        Computed based on the ``class_weight`` parameter.
 
     coef_ : ndarray of shape (1, n_features)
         Weights assigned to the features (coefficients in the primal
         problem). This is only available in the case of a linear kernel.
 
         `coef_` is readonly property derived from `dual_coef_` and
-        `support_vectors_`
+        `support_vectors_`.
+
+    dual_coef_ : ndarray of shape (1, n_SV)
+        Coefficients of the support vectors in the decision function.
+
+    fit_status_ : int
+        0 if correctly fitted, 1 otherwise (will raise warning)
 
     intercept_ : ndarray of shape (1,)
         The constant in the decision function.
 
+    n_support_ : ndarray of shape (n_classes,), dtype=int32
+        Number of support vectors for each class.
+
     offset_ : float
         Offset used to define the decision function from the raw scores.
         We have the relation: decision_function = score_samples - `offset_`.
         The offset is the opposite of `intercept_` and is provided for
         consistency with other outlier detection algorithms.
 
-    fit_status_ : int
-        0 if correctly fitted, 1 otherwise (will raise warning)
+    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
+        Array dimensions of training vector ``X``.
+
+    support_ : ndarray of shape (n_SV,)
+        Indices of support vectors.
+
+    support_vectors_ : ndarray of shape (n_SV, n_features)
+        Support vectors.
 
     Examples
     --------

From 00f32796881b3205d78f234054a3020d205ceac7 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Sun, 5 Jul 2020 14:52:19 +0300
Subject: [PATCH 31/41] reflect #14286: test for negative or null
 sample_weights in SVDD

---
 sklearn/svm/tests/test_svm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 111ae55532913..30a453ef8f4ca 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -715,8 +715,9 @@ def test_svm_equivalence_sample_weight_C():
         (svm.SVR, "Invalid input - all samples have zero or negative weights."),
         (svm.NuSVR, "Invalid input - all samples have zero or negative weights."),
         (svm.OneClassSVM, "Invalid input - all samples have zero or negative weights."),
+        (svm.SVDD, "Invalid input - all samples have zero or negative weights."),
     ],
-    ids=["SVC", "NuSVC", "SVR", "NuSVR", "OneClassSVM"],
+    ids=["SVC", "NuSVC", "SVR", "NuSVR", "OneClassSVM", "SVDD"],
 )
 @pytest.mark.parametrize(
     "sample_weight",

From 0e015e0d8aa9c703dc0980fd78fb2aa00b758188 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Thu, 15 Oct 2020 15:16:12 +0300
Subject: [PATCH 32/41] update mode in more-tags (reflecting #17361)

see-also cross-reference in ocSVM and SVDD (reflecting #18332)
---
 sklearn/svm/_classes.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index b87bb35b5fd88..3908d67356fb5 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1654,6 +1654,10 @@ class OneClassSVM(OutlierMixin, BaseLibSVM):
         Local Outlier Factor (LOF).
     sklearn.ensemble.IsolationForest : Isolation Forest Algorithm.
 
+    sklearn.svm.SVDD : Support vector method for outlier detection via
+        a separating soft-margin hypesphere implemented with libsvm with
+        a parameter to control the number of support vectors.
+
     Examples
     --------
     >>> from sklearn.svm import OneClassSVM
@@ -1933,6 +1937,12 @@ class SVDD(OutlierMixin, BaseLibSVM):
     >>> clf.score_samples(X)
     array([0.5298..., 0.8047..., 0.8056..., 0.8061..., 0.4832...])
 
+    See Also
+    --------
+    OneClassSVM : Support vector method for outlier detection via a separating
+        soft-margin hyperplane implemented with libsvm with a parameter to
+        control the number of support vectors.
+
     References
     ----------
     .. [1] Tax, D.M. and Duin, R.P., 2004. "Support vector data
@@ -2042,7 +2052,7 @@ def predict(self, X):
     def _more_tags(self):
         return {
             '_xfail_checks': {
-                'check_sample_weights_invariance(kind=zeros)':
+                'check_sample_weights_invariance':
                 'zero sample_weight is not equivalent to removing samples',
             }
         }

From 6bb003fb171f1c36275f1d9650d1a2069e8eac81 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Thu, 25 Feb 2021 23:55:41 +0300
Subject: [PATCH 33/41] moved SVDD announcement from v0.23 to v1.0

---
 doc/whats_new/v0.23.rst | 4 ----
 doc/whats_new/v1.0.rst  | 7 +++++++
 sklearn/svm/_classes.py | 2 +-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst
index 4fef8e3e0a90c..ebf63eac5b8a3 100644
--- a/doc/whats_new/v0.23.rst
+++ b/doc/whats_new/v0.23.rst
@@ -721,10 +721,6 @@ Changelog
   `probB_`, are now deprecated as they were not useful. :pr:`15558` by
   `Thomas Fan`_.
 
-- |Feature| Added the :class:`svm.SVDD` class for novelty detection based on
-  soft minimal volume hypersphere around the sample data.
-  :user:`Ivan Nazarov <ivannz>`.
-
 :mod:`sklearn.tree`
 ...................
 
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 6ece2f16b6e93..b47ebc9123c23 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -1213,6 +1213,13 @@ Changelog
   now deprecated. Use `scipy.sparse.csgraph.shortest_path` instead. :pr:`20531`
   by `Tom Dupre la Tour`_.
 
+:mod:`sklearn.svm`
+..................
+
+- |Feature| Added the :class:`svm.SVDD` class for novelty detection based
+  on soft minimal volume hypersphere around the sample data. :pr:`7910`
+  by :user:`Ivan Nazarov <ivannz>`.
+
 Code and Documentation Contributors
 -----------------------------------
 
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 3908d67356fb5..988d01d542a71 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1837,7 +1837,7 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     Read more in the :ref:`User Guide <svm_outlier_detection>`.
 
-    ..versionadded: 0.23
+    ..versionadded: 1.0
 
     Parameters
     ----------

From fd436052a15437e6ea2936e123faf22c76780a3e Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Tue, 15 Jun 2021 14:48:46 +0300
Subject: [PATCH 34/41] docfix in SVDD related to #20236

---
 sklearn/svm/_classes.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 988d01d542a71..a114899f7693e 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1909,6 +1909,11 @@ class SVDD(OutlierMixin, BaseLibSVM):
     intercept_ : ndarray of shape (1,)
         The constant in the decision function.
 
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
     n_support_ : ndarray of shape (n_classes,), dtype=int32
         Number of support vectors for each class.
 

From b0f4926d40aa43b1663b7333cb616a022ca7e7b2 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Fri, 23 Jul 2021 12:49:26 +0300
Subject: [PATCH 35/41] migrate svdd code style to Black (#18948) ensure SVDD
 passes numpydoc validation (#20463) check for svdd in
 `test_sparse.py:check_svm_model_equal` to avoid calling `.predict_proba`

---
 sklearn/svm/__init__.py          |  3 +-
 sklearn/svm/_base.py             |  3 +-
 sklearn/svm/_classes.py          | 80 +++++++++++++++++++++-----------
 sklearn/svm/tests/test_sparse.py | 18 ++++---
 sklearn/svm/tests/test_svm.py    | 43 ++++++++---------
 5 files changed, 88 insertions(+), 59 deletions(-)

diff --git a/sklearn/svm/__init__.py b/sklearn/svm/__init__.py
index 34976e71e797a..fad79458656d1 100644
--- a/sklearn/svm/__init__.py
+++ b/sklearn/svm/__init__.py
@@ -10,8 +10,7 @@
 #         of their respective owners.
 # License: BSD 3 clause (C) INRIA 2010
 
-from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, \
-        LinearSVR, SVDD
+from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR, SVDD
 from ._bounds import l1_min_c
 
 __all__ = [
diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
index c18589d9a14bc..52ac82797afb9 100644
--- a/sklearn/svm/_base.py
+++ b/sklearn/svm/_base.py
@@ -27,8 +27,7 @@
 from ..exceptions import NotFittedError
 
 
-LIBSVM_IMPL = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr',
-               'svdd_l1']
+LIBSVM_IMPL = ["c_svc", "nu_svc", "one_class", "epsilon_svr", "nu_svr", "svdd_l1"]
 
 
 def _one_vs_one_coef(dual_coef, n_support, support_vectors):
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index a114899f7693e..4747854001999 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1932,16 +1932,6 @@ class SVDD(OutlierMixin, BaseLibSVM):
     support_vectors_ : ndarray of shape (n_SV, n_features)
         Support vectors.
 
-    Examples
-    --------
-    >>> from sklearn.svm import SVDD
-    >>> X = [[0], [0.44], [0.45], [0.46], [1]]
-    >>> clf = SVDD(gamma='auto').fit(X)
-    >>> clf.predict(X)
-    array([-1,  1,  1,  1, -1])
-    >>> clf.score_samples(X)
-    array([0.5298..., 0.8047..., 0.8056..., 0.8061..., 0.4832...])
-
     See Also
     --------
     OneClassSVM : Support vector method for outlier detection via a separating
@@ -1958,22 +1948,55 @@ class SVDD(OutlierMixin, BaseLibSVM):
            to support vector data description (SVDD)." Technical
            Report, Department of Computer Science, National Taiwan
            University.
+
+    Examples
+    --------
+    >>> from sklearn.svm import SVDD
+    >>> X = [[0], [0.44], [0.45], [0.46], [1]]
+    >>> clf = SVDD(gamma='auto').fit(X)
+    >>> clf.predict(X)
+    array([-1,  1,  1,  1, -1])
+    >>> clf.score_samples(X)
+    array([0.5298..., 0.8047..., 0.8056..., 0.8061..., 0.4832...])
     """
 
-    _impl = 'svdd_l1'
+    _impl = "svdd_l1"
 
-    def __init__(self, *, kernel='rbf', degree=3, gamma='scale',
-                 coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
-                 verbose=False, max_iter=-1):
+    def __init__(
+        self,
+        *,
+        kernel="rbf",
+        degree=3,
+        gamma="scale",
+        coef0=0.0,
+        tol=1e-3,
+        nu=0.5,
+        shrinking=True,
+        cache_size=200,
+        verbose=False,
+        max_iter=-1,
+    ):
 
         super().__init__(
-            kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,
-            tol=tol, C=0., nu=nu, epsilon=0., shrinking=shrinking,
-            probability=False, cache_size=cache_size, class_weight=None,
-            verbose=verbose, max_iter=max_iter, random_state=None)
+            kernel=kernel,
+            degree=degree,
+            gamma=gamma,
+            coef0=coef0,
+            tol=tol,
+            C=0.0,
+            nu=nu,
+            epsilon=0.0,
+            shrinking=shrinking,
+            probability=False,
+            cache_size=cache_size,
+            class_weight=None,
+            verbose=verbose,
+            max_iter=max_iter,
+            random_state=None,
+        )
 
     def fit(self, X, y=None, sample_weight=None, **params):
-        """Learns the soft minimum volume hypersphere around the sample X.
+        """Learn a soft minimum-volume hypersphere around the sample X.
 
         Parameters
         ----------
@@ -1981,24 +2004,26 @@ def fit(self, X, y=None, sample_weight=None, **params):
             Set of samples, where n_samples is the number of samples and
             n_features is the number of features.
 
+        y : Ignored
+            Not used, present for API consistency by convention.
+
         sample_weight : array-like of shape (n_samples,), default=None
             Per-sample weights. Rescale C per sample. Higher weights
             force the classifier to put more emphasis on these points.
 
-        y : Ignored
-            not used, present for API consistency by convention.
+        **params : dict
+            Additional fit parameters.
 
         Returns
         -------
         self : object
+            Fitted estimator.
 
         Notes
         -----
         If X is not a C-ordered contiguous array it is copied.
-
         """
-        super().fit(X, np.ones(_num_samples(X)),
-                    sample_weight=sample_weight, **params)
+        super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight, **params)
         self.offset_ = -self._intercept_
         return self
 
@@ -2056,8 +2081,9 @@ def predict(self, X):
 
     def _more_tags(self):
         return {
-            '_xfail_checks': {
-                'check_sample_weights_invariance':
-                'zero sample_weight is not equivalent to removing samples',
+            "_xfail_checks": {
+                "check_sample_weights_invariance": (
+                    "zero sample_weight is not equivalent to removing samples"
+                ),
             }
         }
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index 893cddff71b6c..0ab99d557125c 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -74,6 +74,8 @@ def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test):
     )
     if isinstance(dense_svm, svm.OneClassSVM):
         msg = "cannot use sparse input in 'OneClassSVM' trained on dense data"
+    elif isinstance(dense_svm, svm.SVDD):
+        msg = "cannot use sparse input in 'SVDD' trained on dense data"
     else:
         assert_array_almost_equal(
             dense_svm.predict_proba(X_test_dense), sparse_svm.predict_proba(X_test), 4
@@ -336,20 +338,22 @@ def test_sparse_oneclasssvm(datasets_index, kernel):
 
 
 def test_sparse_svdd():
-    """Check that sparse SVDD gives the same result as dense SVDD
-    """
+    """Check that sparse SVDD gives the same result as dense SVDD"""
     # many class dataset:
     X_blobs, _ = make_blobs(n_samples=100, centers=10, random_state=0)
     X_blobs = sparse.csr_matrix(X_blobs)
 
-    datasets = [[X_sp, None, T], [X2_sp, None, T2],
-                [X_blobs[:80], None, X_blobs[80:]],
-                [iris.data, None, iris.data]]
+    datasets = [
+        [X_sp, None, T],
+        [X2_sp, None, T2],
+        [X_blobs[:80], None, X_blobs[80:]],
+        [iris.data, None, iris.data],
+    ]
     kernels = ["linear", "poly", "rbf", "sigmoid"]
     for dataset in datasets:
         for kernel in kernels:
-            clf = svm.SVDD(gamma='scale', kernel=kernel)
-            sp_clf = svm.SVDD(gamma='scale', kernel=kernel)
+            clf = svm.SVDD(gamma="scale", kernel=kernel)
+            sp_clf = svm.SVDD(gamma="scale", kernel=kernel)
             check_svm_model_equal(clf, sp_clf, *dataset)
 
 
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 30a453ef8f4ca..83fc4bf379e01 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -364,16 +364,16 @@ def test_oneclass_fit_params_is_deprecated():
 
 def test_svdd():
     # Test the output of libsvm for the SVDD problem with default parameters
-    clf = svm.SVDD(gamma='scale')
+    clf = svm.SVDD(gamma="scale")
     clf.fit(X)
     pred = clf.predict(T)
 
     assert_array_equal(pred, [+1, -1, -1])
-    assert pred.dtype == np.dtype('intp')
+    assert pred.dtype == np.dtype("intp")
     assert_array_almost_equal(clf.intercept_, [0.2817], decimal=3)
-    assert_array_almost_equal(clf.dual_coef_,
-                              [[0.7500, 0.7499, 0.7499, 0.7500]],
-                              decimal=3)
+    assert_array_almost_equal(
+        clf.dual_coef_, [[0.7500, 0.7499, 0.7499, 0.7500]], decimal=3
+    )
     assert not hasattr(clf, "coef_")
 
 
@@ -397,15 +397,15 @@ def test_svdd_decision_function():
     X_outliers = rnd.uniform(low=-4, high=4, size=(20, 2))
 
     # fit the model
-    clf = svm.SVDD(gamma='scale', nu=0.1,
-                   kernel="poly", degree=2, coef0=1.0).fit(X_train)
+    clf = svm.SVDD(gamma="scale", nu=0.1, kernel="poly", degree=2, coef0=1.0)
+    clf.fit(X_train)
 
     # predict and validate things
     y_pred_test = clf.predict(X_test)
-    assert np.mean(y_pred_test == 1) > .9
+    assert np.mean(y_pred_test == 1) > 0.9
 
     y_pred_outliers = clf.predict(X_outliers)
-    assert np.mean(y_pred_outliers == -1) > .65
+    assert np.mean(y_pred_outliers == -1) > 0.65
 
     dec_func_test = clf.decision_function(X_test)
     assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1)
@@ -436,17 +436,17 @@ def test_svdd_score_samples():
     X_train = np.r_[X + 2, X - 2]
 
     # Evaluate the scores on a small uniform 2-d mesh
-    xx, yy = np.meshgrid(np.linspace(-5, 5, num=26),
-                         np.linspace(-5, 5, num=26))
+    xx, yy = np.meshgrid(np.linspace(-5, 5, num=26), np.linspace(-5, 5, num=26))
     X_test = np.c_[xx.ravel(), yy.ravel()]
 
     # Fit the model for at least 10% support vectors
-    clf = svm.SVDD(nu=0.1, kernel="poly", gamma='scale', degree=2, coef0=1.0)
+    clf = svm.SVDD(nu=0.1, kernel="poly", gamma="scale", degree=2, coef0=1.0)
     clf.fit(X_train)
 
     # Check score_samples() implementation
-    assert_array_almost_equal(clf.score_samples(X_test),
-                              clf.decision_function(X_test) + clf.offset_)
+    assert_array_almost_equal(
+        clf.score_samples(X_test), clf.decision_function(X_test) + clf.offset_
+    )
 
     # Test the gamma="scale": use .var() for scaling (c.f. issue #12741)
     gamma = 1.0 / (X.shape[1] * X_train.var())
@@ -454,10 +454,12 @@ def test_svdd_score_samples():
     assert_almost_equal(clf._gamma, gamma)
 
     # Compute the kernel matrices
-    k_zx = polynomial_kernel(X_train[clf.support_], X_test,
-                             gamma=gamma, degree=clf.degree, coef0=clf.coef0)
-    k_xx = polynomial_kernel(X_test, gamma=gamma,
-                             degree=clf.degree, coef0=clf.coef0).diagonal()
+    k_zx = polynomial_kernel(
+        X_train[clf.support_], X_test, gamma=gamma, degree=clf.degree, coef0=clf.coef0
+    )
+    k_xx = polynomial_kernel(
+        X_test, gamma=gamma, degree=clf.degree, coef0=clf.coef0
+    ).diagonal()
 
     # Compute the sample scores = decision scores without `-\rho`
     scores_ = np.dot(clf.dual_coef_, k_zx - k_xx[np.newaxis] / 2).ravel()
@@ -497,8 +499,7 @@ def test_oneclass_and_svdd():
     assert_array_almost_equal(svdd.intercept_, svdd_intercept, decimal=3)
 
     # Evaluate the decision function on a uniformly spaced 2-d mesh
-    xx, yy = np.meshgrid(np.linspace(-5, 5, num=101),
-                         np.linspace(-5, 5, num=101))
+    xx, yy = np.meshgrid(np.linspace(-5, 5, num=101), np.linspace(-5, 5, num=101))
     mesh = np.c_[xx.ravel(), yy.ravel()]
 
     svdd_df = svdd.decision_function(mesh)
@@ -1114,7 +1115,7 @@ def test_immutable_coef_property():
         svm.SVR(kernel="linear").fit(iris.data, iris.target),
         svm.NuSVR(kernel="linear").fit(iris.data, iris.target),
         svm.OneClassSVM(kernel="linear").fit(iris.data),
-        svm.SVDD(kernel='linear').fit(iris.data),
+        svm.SVDD(kernel="linear").fit(iris.data),
     ]
     for clf in svms:
         with pytest.raises(AttributeError):

From 742954a50185fd8102fc6c2591c7c219b709003b Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannz@yandex.ru>
Date: Wed, 10 Nov 2021 16:48:21 +0300
Subject: [PATCH 36/41] update version in svdd docs to 1.1, relocate from 1.0
 to 1.1 in whats_new add backticks (#20914), deprecate **params in fit
 (#20843), add feature_names_in_ (#20787) uncompromisingly reformat
 plot_oneclass_vs_svdd with black

---
 doc/whats_new/v1.0.rst                |  7 ---
 doc/whats_new/v1.1.rst                |  4 ++
 examples/svm/plot_oneclass_vs_svdd.py | 80 ++++++++++++++++-----------
 sklearn/svm/_classes.py               | 15 +++--
 4 files changed, 64 insertions(+), 42 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index b47ebc9123c23..6ece2f16b6e93 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -1213,13 +1213,6 @@ Changelog
   now deprecated. Use `scipy.sparse.csgraph.shortest_path` instead. :pr:`20531`
   by `Tom Dupre la Tour`_.
 
-:mod:`sklearn.svm`
-..................
-
-- |Feature| Added the :class:`svm.SVDD` class for novelty detection based
-  on soft minimal volume hypersphere around the sample data. :pr:`7910`
-  by :user:`Ivan Nazarov <ivannz>`.
-
 Code and Documentation Contributors
 -----------------------------------
 
diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 952d2867360a3..d4f18f0c06c26 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -1234,6 +1234,10 @@ Changelog
   parameters in `fit` instead of `__init__`.
   :pr:`21436` by :user:`Haidar Almubarak <Haidar13 >`.
 
+- |Feature| Added the :class:`svm.SVDD` class for novelty detection based
+  on soft minimal volume hypersphere around the sample data. :pr:`7910`
+  by :user:`Ivan Nazarov <ivannz>`.
+
 :mod:`sklearn.tree`
 ...................
 
diff --git a/examples/svm/plot_oneclass_vs_svdd.py b/examples/svm/plot_oneclass_vs_svdd.py
index 7353a1d09aae3..6c57b018b27eb 100644
--- a/examples/svm/plot_oneclass_vs_svdd.py
+++ b/examples/svm/plot_oneclass_vs_svdd.py
@@ -41,22 +41,26 @@
 X_outliers = random_state.uniform(low=-4, high=4, size=(20, 2))
 
 # Define the models
-nu = .1
-kernels = [("RBF", dict(kernel="rbf", gamma=0.1)),
-           ("Poly", dict(kernel="poly", degree=2, coef0=1.0)),
-           ]
+nu = 0.1
+kernels = [
+    ("RBF", dict(kernel="rbf", gamma=0.1)),
+    ("Poly", dict(kernel="poly", degree=2, coef0=1.0)),
+]
 
 for kernel_name, kernel in kernels:
 
     # Use low tolerance to ensure better precision of the SVM
     # optimization procedure.
-    classifiers = [("OCSVM", svm.OneClassSVM(nu=nu, tol=1e-8, **kernel)),
-                   ("SVDD", svm.SVDD(nu=nu, tol=1e-8, **kernel)),
-                   ]
+    classifiers = [
+        ("OCSVM", svm.OneClassSVM(nu=nu, tol=1e-8, **kernel)),
+        ("SVDD", svm.SVDD(nu=nu, tol=1e-8, **kernel)),
+    ]
 
     fig = plt.figure(figsize=(12, 5))
-    fig.suptitle("One-Class SVM versus SVDD "
-                 "(error train, error novel regular, error novel abnormal)")
+    fig.suptitle(
+        "One-Class SVM versus SVDD "
+        "(error train, error novel regular, error novel abnormal)"
+    )
 
     for i, (model_name, clf) in enumerate(classifiers):
         clf.fit(X_train)
@@ -74,32 +78,46 @@
         Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
         Z = Z.reshape(xx.shape)
 
-        ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7),
-                    cmap=plt.cm.PuBu, zorder=-99)
-        ax.contourf(xx, yy, Z, levels=[0, Z.max()], colors='palevioletred',
-                    zorder=-98)
-        a = ax.contour(xx, yy, Z, levels=[0], linewidths=2, colors='darkred',
-                       zorder=-97)
+        ax.contourf(
+            xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu, zorder=-99
+        )
+        ax.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred", zorder=-98)
+        a = ax.contour(
+            xx, yy, Z, levels=[0], linewidths=2, colors="darkred", zorder=-97
+        )
 
         s = 40
-        b1 = ax.scatter(X_train[:, 0], X_train[:, 1], s=s,
-                        c='white', edgecolors='k')
-        b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c='blueviolet', s=s)
-        c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c='gold', s=s)
-        ax.axis('tight')
+        b1 = ax.scatter(X_train[:, 0], X_train[:, 1], s=s, c="white", edgecolors="k")
+        b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s)
+        c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s)
+        ax.axis("tight")
         ax.set_xlim((-6, 6))
         ax.set_ylim((-6, 6))
 
-        ax.set_title("%s %s (%d/%d, %d/%d, %d/%d)"
-                     % (model_name, kernel_name,
-                        n_error_train, len(X_train),
-                        n_error_test, len(X_test),
-                        n_error_outliers, len(X_outliers)))
-
-        ax.legend([a.collections[0], b1, b2, c],
-                  ["learned frontier", "training observations",
-                   "new regular observations", "new abnormal observations"],
-                  loc="lower right",
-                  prop=matplotlib.font_manager.FontProperties(size=10))
+        ax.set_title(
+            "%s %s (%d/%d, %d/%d, %d/%d)"
+            % (
+                model_name,
+                kernel_name,
+                n_error_train,
+                len(X_train),
+                n_error_test,
+                len(X_test),
+                n_error_outliers,
+                len(X_outliers),
+            )
+        )
+
+        ax.legend(
+            [a.collections[0], b1, b2, c],
+            [
+                "learned frontier",
+                "training observations",
+                "new regular observations",
+                "new abnormal observations",
+            ],
+            loc="lower right",
+            prop=matplotlib.font_manager.FontProperties(size=10),
+        )
 
 plt.show()
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 4747854001999..3143c5aa76e79 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1837,7 +1837,7 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     Read more in the :ref:`User Guide <svm_outlier_detection>`.
 
-    ..versionadded: 1.0
+    ..versionadded: 1.1
 
     Parameters
     ----------
@@ -1912,7 +1912,9 @@ class SVDD(OutlierMixin, BaseLibSVM):
     n_features_in_ : int
         Number of features seen during :term:`fit`.
 
-        .. versionadded:: 0.24
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
 
     n_support_ : ndarray of shape (n_classes,), dtype=int32
         Number of support vectors for each class.
@@ -2001,8 +2003,8 @@ def fit(self, X, y=None, sample_weight=None, **params):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            Set of samples, where n_samples is the number of samples and
-            n_features is the number of features.
+            Set of samples, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
 
         y : Ignored
             Not used, present for API consistency by convention.
@@ -2014,6 +2016,11 @@ def fit(self, X, y=None, sample_weight=None, **params):
         **params : dict
             Additional fit parameters.
 
+            .. deprecated:: 1.0
+                The `fit` method will not longer accept extra keyword
+                parameters in 1.2. These keyword parameters were
+                already discarded.
+
         Returns
         -------
         self : object

From 9c95eeab8ca1d12b6fc6a1c13ccad461fbc54f9e Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Sun, 15 May 2022 11:07:47 +0300
Subject: [PATCH 37/41] move feature announcement from 1.1 to 1.2

---
 doc/whats_new/v1.1.rst | 4 ----
 doc/whats_new/v1.2.rst | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index d4f18f0c06c26..952d2867360a3 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -1234,10 +1234,6 @@ Changelog
   parameters in `fit` instead of `__init__`.
   :pr:`21436` by :user:`Haidar Almubarak <Haidar13 >`.
 
-- |Feature| Added the :class:`svm.SVDD` class for novelty detection based
-  on soft minimal volume hypersphere around the sample data. :pr:`7910`
-  by :user:`Ivan Nazarov <ivannz>`.
-
 :mod:`sklearn.tree`
 ...................
 
diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
index d1ab9c8ed1b36..a2bd9f73422f2 100644
--- a/doc/whats_new/v1.2.rst
+++ b/doc/whats_new/v1.2.rst
@@ -327,6 +327,10 @@ Changelog
   :class:`svm.NuSVR`, :class:`svm.SVR`, :class:`svm.OneClassSVM`.
   :pr:`22898` by :user:`Meekail Zain <micky774>`.
 
+- |Feature| Added the :class:`svm.SVDD` class for novelty detection based
+  on soft minimal volume hypersphere around the sample data. :pr:`7910`
+  by :user:`Ivan Nazarov <ivannz>`.
+
 :mod:`sklearn.tree`
 ...................
 

From 36778b4e9042ae6c7bdd4d5e66d2de39fc851d7d Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Sun, 15 May 2022 11:25:17 +0300
Subject: [PATCH 38/41] fixed user guide ref in SVDD docstring, copied kernel
 parameter docs from ocSVM, and bumped versionadded; added SVDD to tests which
 involved ocSVM

---
 sklearn/svm/_classes.py       | 47 ++++++++++++++++++-----------------
 sklearn/svm/tests/test_svm.py | 11 ++++----
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 3143c5aa76e79..3a24d17cd63a4 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1653,7 +1653,6 @@ class OneClassSVM(OutlierMixin, BaseLibSVM):
     sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection using
         Local Outlier Factor (LOF).
     sklearn.ensemble.IsolationForest : Isolation Forest Algorithm.
-
     sklearn.svm.SVDD : Support vector method for outlier detection via
         a separating soft-margin hypesphere implemented with libsvm with
         a parameter to control the number of support vectors.
@@ -1830,21 +1829,20 @@ class SVDD(OutlierMixin, BaseLibSVM):
     """Support Vector Data Description for Unsupervised Outlier Detection.
 
     Estimate the support of a high-dimensional distribution by finding the
-    tightest soft hypersphere around a data set, which permits at most a
-    fraction ``nu`` (``0 < nu <= 1``) of the data as outliers.
+    tightest soft boundary hypersphere around a data set, which permits at
+    most a fraction ``nu`` (``0 < nu <= 1``) of the data as outliers.
 
     The implementation is based on libsvm.
 
-    Read more in the :ref:`User Guide <svm_outlier_detection>`.
+    Read more in the :ref:`User Guide <outlier_detection>`.
 
-    ..versionadded: 1.1
+    ..versionadded: 1.2
 
     Parameters
     ----------
-    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \
+        default='rbf'
          Specifies the kernel type to be used in the algorithm.
-         It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or
-         a callable.
          If none is given, 'rbf' will be used. If a callable is given it is
          used to precompute the kernel matrix.
 
@@ -1916,6 +1914,9 @@ class SVDD(OutlierMixin, BaseLibSVM):
         Names of features seen during :term:`fit`. Defined only when `X`
         has feature names that are all strings.
 
+    n_iter_ : int
+        Number of iterations run by the optimization routine to fit the model.
+
     n_support_ : ndarray of shape (n_classes,), dtype=int32
         Number of support vectors for each class.
 
@@ -1980,20 +1981,20 @@ def __init__(
     ):
 
         super().__init__(
-            kernel=kernel,
-            degree=degree,
-            gamma=gamma,
-            coef0=coef0,
-            tol=tol,
-            C=0.0,
-            nu=nu,
-            epsilon=0.0,
-            shrinking=shrinking,
-            probability=False,
-            cache_size=cache_size,
-            class_weight=None,
-            verbose=verbose,
-            max_iter=max_iter,
+            kernel,
+            degree,
+            gamma,
+            coef0,
+            tol,
+            0.0,
+            nu,
+            0.0,
+            shrinking,
+            False,
+            cache_size,
+            None,
+            verbose,
+            max_iter,
             random_state=None,
         )
 
@@ -2030,7 +2031,7 @@ def fit(self, X, y=None, sample_weight=None, **params):
         -----
         If X is not a C-ordered contiguous array it is copied.
         """
-        super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight, **params)
+        super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight)
         self.offset_ = -self._intercept_
         return self
 
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 83fc4bf379e01..d8a760bedc3ed 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -14,7 +14,7 @@
 from numpy.testing import assert_allclose
 from scipy import sparse
 from sklearn import svm, linear_model, datasets, metrics, base
-from sklearn.svm import LinearSVC, OneClassSVM, SVR, NuSVR, LinearSVR
+from sklearn.svm import LinearSVC, OneClassSVM, SVR, NuSVR, LinearSVR, SVDD
 from sklearn.model_selection import train_test_split
 from sklearn.datasets import make_classification, make_blobs
 from sklearn.metrics import f1_score
@@ -1441,9 +1441,9 @@ def test_linearsvm_liblinear_sample_weight(SVM, params):
             assert_allclose(X_est_no_weight, X_est_with_weight)
 
 
-@pytest.mark.parametrize("Klass", (OneClassSVM, SVR, NuSVR))
+@pytest.mark.parametrize("Klass", (OneClassSVM, SVR, NuSVR, SVDD))
 def test_n_support(Klass):
-    # Make n_support is correct for oneclass and SVR (used to be
+    # Make sure n_support is correct for oneclass, SVDD and SVR (used to be
     # non-initialized)
     # this is a non regression test for issue #14774
     X = np.array([[0], [0.44], [0.45], [0.46], [1]])
@@ -1514,6 +1514,7 @@ def test_svc_raises_error_internal_representation():
         (svm.SVR, int),
         (svm.NuSVR, int),
         (svm.OneClassSVM, int),
+        (svm.SVDD, int),
     ],
 )
 @pytest.mark.parametrize(
@@ -1527,8 +1528,8 @@ def test_svc_raises_error_internal_representation():
 def test_n_iter_libsvm(estimator, expected_n_iter_type, dataset):
     # Check that the type of n_iter_ is correct for the classes that inherit
     # from BaseSVC.
-    # Note that for SVC, and NuSVC this is an ndarray; while for SVR, NuSVR, and
-    # OneClassSVM, it is an int.
+    # Note that for SVC, and NuSVC this is an ndarray; while for SVR, NuSVR,
+    # SVDD and OneClassSVM, it is an int.
     # For SVC and NuSVC also check the shape of n_iter_.
     X, y = dataset
     n_iter = estimator(kernel="linear").fit(X, y).n_iter_

From 4e5ca417b0b2b58f478e989a5392c10f28a24426 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Tue, 14 Jun 2022 20:13:52 +0300
Subject: [PATCH 39/41] Removed deprecated `class_weight_` from the docs of
 SVDD (related to #22898)

---
 sklearn/svm/_classes.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 3a24d17cd63a4..6ed99418b9d46 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1887,10 +1887,6 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     Attributes
     ----------
-    class_weight_ : ndarray of shape (n_classes,)
-        Multipliers of parameter C for each class.
-        Computed based on the ``class_weight`` parameter.
-
     coef_ : ndarray of shape (1, n_features)
         Weights assigned to the features (coefficients in the primal
         problem). This is only available in the case of a linear kernel.

From 3cc3610d2d1600ee44ba61feafaebece5c22ebc4 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Tue, 30 Aug 2022 00:16:55 +0300
Subject: [PATCH 40/41] add parameter validation to SVDD and update dunder-docs
 (similar to ocSVM #24001) finish v1.2 deprecation of params kwargs in `.fit`
 of SVDD (similar to ocSVM #20843) TST ensure SVDD passes param-validation
 test_common.py due to #23462 (#22722)

---
 sklearn/svm/_classes.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 6ed99418b9d46..420c932edd419 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1848,7 +1848,7 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     degree : int, default=3
         Degree of the polynomial kernel function ('poly').
-        Ignored by all other kernels.
+        Must be non-negative. Ignored by all other kernels.
 
     gamma : {'scale', 'auto'} or float, default='scale'
         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
@@ -1856,6 +1856,7 @@ class SVDD(OutlierMixin, BaseLibSVM):
         - if ``gamma='scale'`` (default) is passed then it uses
           1 / (n_features * X.var()) as value of gamma,
         - if 'auto', uses 1 / n_features.
+        - if float, must be non-negative.
 
     coef0 : float, default=0.0
         Independent term in kernel function.
@@ -1933,9 +1934,9 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     See Also
     --------
-    OneClassSVM : Support vector method for outlier detection via a separating
-        soft-margin hyperplane implemented with libsvm with a parameter to
-        control the number of support vectors.
+    sklearn.svm.OneClassSVM : Support vector method for outlier detection via
+        a separating soft-margin hyperplane implemented with libsvm with
+        a parameter to control the number of support vectors.
 
     References
     ----------
@@ -1961,6 +1962,10 @@ class SVDD(OutlierMixin, BaseLibSVM):
 
     _impl = "svdd_l1"
 
+    _parameter_constraints = {**BaseLibSVM._parameter_constraints}  # type: ignore
+    for unused_param in ["C", "class_weight", "epsilon", "probability", "random_state"]:
+        _parameter_constraints.pop(unused_param)
+
     def __init__(
         self,
         *,
@@ -1994,7 +1999,7 @@ def __init__(
             random_state=None,
         )
 
-    def fit(self, X, y=None, sample_weight=None, **params):
+    def fit(self, X, y=None, sample_weight=None):
         """Learn a soft minimum-volume hypersphere around the sample X.
 
         Parameters
@@ -2010,14 +2015,6 @@ def fit(self, X, y=None, sample_weight=None, **params):
             Per-sample weights. Rescale C per sample. Higher weights
             force the classifier to put more emphasis on these points.
 
-        **params : dict
-            Additional fit parameters.
-
-            .. deprecated:: 1.0
-                The `fit` method will not longer accept extra keyword
-                parameters in 1.2. These keyword parameters were
-                already discarded.
-
         Returns
         -------
         self : object

From 80a172529f7f7a1596a18c8cee1ceab741e0f8d7 Mon Sep 17 00:00:00 2001
From: ivannz <ivannz@yandex.ru>
Date: Sun, 4 Sep 2022 13:40:02 +0300
Subject: [PATCH 41/41] clarify the parent space of the SVDD hypersphere
 (#r374672496) add SVDD announcement to svm.cpp, fix stray trailing spaces
 (#r374671161)

---
 doc/modules/svm.rst            |   7 +-
 sklearn/svm/src/libsvm/svm.cpp | 199 +++++++++++++++++----------------
 2 files changed, 108 insertions(+), 98 deletions(-)

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index 2f2ef8474a776..9203f44abfc10 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -851,10 +851,11 @@ SVDD
 
 Support Vector Data Description (SVDD), proposed by Tax and Duin (2004),
 aims at finding a spherically shaped boundary around a data set. Specifically,
-it computes a minimum volume hypersphere containing the most of the data with
-the number of outliers controlled by the parameter of the model.
+it computes a minimum volume hypersphere (in the feature space induced by the
+kernel) containing the most of the data with the number of outliers controlled
+by the parameter of the model.
 
-The original formulation suffered from non-convexity issues related to optimality of 
+The original formulation suffered from non-convexity issues related to optimality of
 the attained solution for certain values of the regularization parameter :math:`C`.
 Chang, Lee, and Lin (2013) suggested a reformulation of the SVDD model
 which had a well-defined and provably unique global solution for any :math:`C>0`.
diff --git a/sklearn/svm/src/libsvm/svm.cpp b/sklearn/svm/src/libsvm/svm.cpp
index 21bec8bd93d7d..5d04e735a002e 100644
--- a/sklearn/svm/src/libsvm/svm.cpp
+++ b/sklearn/svm/src/libsvm/svm.cpp
@@ -31,7 +31,7 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-/* 
+/*
    Modified 2010:
 
    - Support for dense data by Ming-Fang Weng
@@ -59,6 +59,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    - Exposed number of iterations run in optimization, Juan Martín Loyola.
      See <https://github.com/scikit-learn/scikit-learn/pull/21408/>
+
+   Modified 2022:
+
+   - Implemented the Support Vector Data Description based on the works
+     by Tax and Duin (2004) and Chang, Lee, and Lin (2013). The model was
+     extended to support weighted observations and reparameterized to the
+     fraction of outliers (nu).
+     Nazarov Ivan
+     See <https://github.com/scikit-learn/scikit-learn/pull/7910>
  */
 
 #include <math.h>
@@ -129,7 +138,7 @@ static void info(const char *fmt,...)
 and dense versions of this library */
 #ifdef _DENSE_REP
   #ifdef PREFIX
-    #undef PREFIX  
+    #undef PREFIX
   #endif
   #ifdef NAMESPACE
     #undef NAMESPACE
@@ -140,7 +149,7 @@ and dense versions of this library */
 #else
   /* sparse representation */
   #ifdef PREFIX
-    #undef PREFIX  
+    #undef PREFIX
   #endif
   #ifdef NAMESPACE
     #undef NAMESPACE
@@ -167,7 +176,7 @@ class Cache
 	// return some position p where [p,len) need to be filled
 	// (p >= len if nothing needs to be filled)
 	int get_data(const int index, Qfloat **data, int len);
-	void swap_index(int i, int j);	
+	void swap_index(int i, int j);
 private:
 	int l;
 	long int size;
@@ -443,7 +452,7 @@ double Kernel::dot(const PREFIX(node) *px, const PREFIX(node) *py, BlasFunctions
 				++py;
 			else
 				++px;
-		}			
+		}
 	}
 	return sum;
 }
@@ -487,7 +496,7 @@ double Kernel::k_function(const PREFIX(node) *x, const PREFIX(node) *y,
 				else
 				{
 					if(x->index > y->index)
-					{	
+					{
 						sum += y->value * y->value;
 						++y;
 					}
@@ -524,7 +533,7 @@ double Kernel::k_function(const PREFIX(node) *x, const PREFIX(node) *y,
 #endif
                     }
 		default:
-			return 0;  // Unreachable 
+			return 0;  // Unreachable
 	}
 }
 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
@@ -602,7 +611,7 @@ class Solver {
 	virtual double calculate_rho();
 	virtual void do_shrinking();
 private:
-	bool be_shrunk(int i, double Gmax1, double Gmax2);	
+	bool be_shrunk(int i, double Gmax1, double Gmax2);
 };
 
 void Solver::swap_index(int i, int j)
@@ -750,11 +759,11 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
 			else
 				counter = 1;	// do shrinking next iteration
 		}
-		
+
 		++iter;
 
 		// update alpha[i] and alpha[j], handle bounds carefully
-		
+
 		const Qfloat *Q_i = Q.get_Q(i,active_size);
 		const Qfloat *Q_j = Q.get_Q(j,active_size);
 
@@ -773,7 +782,7 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
 			double diff = alpha[i] - alpha[j];
 			alpha[i] += delta;
 			alpha[j] += delta;
-			
+
 			if(diff > 0)
 			{
 				if(alpha[j] < 0)
@@ -855,7 +864,7 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
 
 		double delta_alpha_i = alpha[i] - old_alpha_i;
 		double delta_alpha_j = alpha[j] - old_alpha_j;
-		
+
 		for(int k=0;k<active_size;k++)
 		{
 			G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
@@ -947,7 +956,7 @@ int Solver::select_working_set(int &out_i, int &out_j)
 	// j: minimizes the decrease of obj value
 	//    (if quadratic coefficient <= 0, replace it with tau)
 	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
-	
+
 	double Gmax = -INF;
 	double Gmax2 = -INF;
 	int Gmax_idx = -1;
@@ -955,7 +964,7 @@ int Solver::select_working_set(int &out_i, int &out_j)
 	double obj_diff_min = INF;
 
 	for(int t=0;t<active_size;t++)
-		if(y[t]==+1)	
+		if(y[t]==+1)
 		{
 			if(!is_upper_bound(t))
 				if(-G[t] >= Gmax)
@@ -990,7 +999,7 @@ int Solver::select_working_set(int &out_i, int &out_j)
 					Gmax2 = G[j];
 				if (grad_diff > 0)
 				{
-					double obj_diff; 
+					double obj_diff;
 					double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
 					if (quad_coef > 0)
 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
@@ -1014,7 +1023,7 @@ int Solver::select_working_set(int &out_i, int &out_j)
 					Gmax2 = -G[j];
 				if (grad_diff > 0)
 				{
-					double obj_diff; 
+					double obj_diff;
 					double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
 					if (quad_coef > 0)
 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
@@ -1052,7 +1061,7 @@ bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
 	{
 		if(y[i]==+1)
 			return(G[i] > Gmax2);
-		else	
+		else
 			return(G[i] > Gmax1);
 	}
 	else
@@ -1068,27 +1077,27 @@ void Solver::do_shrinking()
 	// find maximal violating pair first
 	for(i=0;i<active_size;i++)
 	{
-		if(y[i]==+1)	
+		if(y[i]==+1)
 		{
-			if(!is_upper_bound(i))	
+			if(!is_upper_bound(i))
 			{
 				if(-G[i] >= Gmax1)
 					Gmax1 = -G[i];
 			}
-			if(!is_lower_bound(i))	
+			if(!is_lower_bound(i))
 			{
 				if(G[i] >= Gmax2)
 					Gmax2 = G[i];
 			}
 		}
-		else	
+		else
 		{
-			if(!is_upper_bound(i))	
+			if(!is_upper_bound(i))
 			{
 				if(-G[i] >= Gmax2)
 					Gmax2 = -G[i];
 			}
-			if(!is_lower_bound(i))	
+			if(!is_lower_bound(i))
 			{
 				if(G[i] >= Gmax1)
 					Gmax1 = G[i];
@@ -1096,7 +1105,7 @@ void Solver::do_shrinking()
 		}
 	}
 
-	if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 
+	if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
 	{
 		unshrink = true;
 		reconstruct_gradient();
@@ -1235,14 +1244,14 @@ int Solver_NU::select_working_set(int &out_i, int &out_j)
 	{
 		if(y[j]==+1)
 		{
-			if (!is_lower_bound(j))	
+			if (!is_lower_bound(j))
 			{
 				double grad_diff=Gmaxp+G[j];
 				if (G[j] >= Gmaxp2)
 					Gmaxp2 = G[j];
 				if (grad_diff > 0)
 				{
-					double obj_diff; 
+					double obj_diff;
 					double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
 					if (quad_coef > 0)
 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
@@ -1266,7 +1275,7 @@ int Solver_NU::select_working_set(int &out_i, int &out_j)
 					Gmaxn2 = -G[j];
 				if (grad_diff > 0)
 				{
-					double obj_diff; 
+					double obj_diff;
 					double quad_coef = QD[in]+QD[j]-2*Q_in[j];
 					if (quad_coef > 0)
 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
@@ -1301,14 +1310,14 @@ bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, doubl
 	{
 		if(y[i]==+1)
 			return(-G[i] > Gmax1);
-		else	
+		else
 			return(-G[i] > Gmax4);
 	}
 	else if(is_lower_bound(i))
 	{
 		if(y[i]==+1)
 			return(G[i] > Gmax2);
-		else	
+		else
 			return(G[i] > Gmax3);
 	}
 	else
@@ -1337,14 +1346,14 @@ void Solver_NU::do_shrinking()
 		if(!is_lower_bound(i))
 		{
 			if(y[i]==+1)
-			{	
+			{
 				if(G[i] > Gmax2) Gmax2 = G[i];
 			}
 			else	if(G[i] > Gmax3) Gmax3 = G[i];
 		}
 	}
 
-	if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 
+	if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
 	{
 		unshrink = true;
 		reconstruct_gradient();
@@ -1407,12 +1416,12 @@ double Solver_NU::calculate_rho()
 		r1 = sum_free1/nr_free1;
 	else
 		r1 = (ub1+lb1)/2;
-	
+
 	if(nr_free2 > 0)
 		r2 = sum_free2/nr_free2;
 	else
 		r2 = (ub2+lb2)/2;
-	
+
 	si->r = (r1+r2)/2;
 	return (r1-r2)/2;
 }
@@ -1421,7 +1430,7 @@ double Solver_NU::calculate_rho()
 // Q matrices for various formulations
 //
 class SVC_Q: public Kernel
-{ 
+{
 public:
 	SVC_Q(const PREFIX(problem)& prob, const svm_parameter& param, const schar *y_, BlasFunctions *blas_functions)
 	:Kernel(prob.l, prob.x, param, blas_functions)
@@ -1432,7 +1441,7 @@ class SVC_Q: public Kernel
 		for(int i=0;i<prob.l;i++)
 			QD[i] = (this->*kernel_function)(i,i);
 	}
-	
+
 	Qfloat *get_Q(int i, int len) const
 	{
 		Qfloat *data;
@@ -1481,7 +1490,7 @@ class ONE_CLASS_Q: public Kernel
 		for(int i=0;i<prob.l;i++)
 			QD[i] = (this->*kernel_function)(i,i);
 	}
-	
+
 	Qfloat *get_Q(int i, int len) const
 	{
 		Qfloat *data;
@@ -1517,7 +1526,7 @@ class ONE_CLASS_Q: public Kernel
 };
 
 class SVR_Q: public Kernel
-{ 
+{
 public:
 	SVR_Q(const PREFIX(problem)& prob, const svm_parameter& param, BlasFunctions *blas_functions)
 	:Kernel(prob.l, prob.x, param, blas_functions)
@@ -1547,7 +1556,7 @@ class SVR_Q: public Kernel
 		swap(index[i],index[j]);
 		swap(QD[i],QD[j]);
 	}
-	
+
 	Qfloat *get_Q(int i, int len) const
 	{
 		Qfloat *data;
@@ -1663,7 +1672,7 @@ static void solve_nu_svc(
 
 		C[i] = prob->W[i];
 	}
-	
+
 	double nu_l = 0;
 	for(i=0;i<l;i++) nu_l += nu*C[i];
 	double sum_pos = nu_l/2;
@@ -1696,7 +1705,7 @@ static void solve_nu_svc(
 	for(i=0;i<l;i++)
         {
 		alpha[i] *= y[i]/r;
-		si->upper_bound[i] /= r;                
+		si->upper_bound[i] /= r;
         }
 
 	si->rho /= r;
@@ -1956,7 +1965,7 @@ static void solve_svdd_l1(
 struct decision_function
 {
 	double *alpha;
-	double rho;	
+	double rho;
 	int n_iter;
 };
 
@@ -1969,23 +1978,23 @@ static decision_function svm_train_one(
 	switch(param->svm_type)
 	{
  		case C_SVC:
-			si.upper_bound = Malloc(double,prob->l); 
+			si.upper_bound = Malloc(double,prob->l);
  			solve_c_svc(prob,param,alpha,&si,Cp,Cn,blas_functions);
  			break;
  		case NU_SVC:
-			si.upper_bound = Malloc(double,prob->l); 
+			si.upper_bound = Malloc(double,prob->l);
  			solve_nu_svc(prob,param,alpha,&si,blas_functions);
  			break;
  		case ONE_CLASS:
-			si.upper_bound = Malloc(double,prob->l); 
+			si.upper_bound = Malloc(double,prob->l);
  			solve_one_class(prob,param,alpha,&si,blas_functions);
  			break;
  		case EPSILON_SVR:
-			si.upper_bound = Malloc(double,2*prob->l); 
+			si.upper_bound = Malloc(double,2*prob->l);
  			solve_epsilon_svr(prob,param,alpha,&si,blas_functions);
  			break;
  		case NU_SVR:
-			si.upper_bound = Malloc(double,2*prob->l); 
+			si.upper_bound = Malloc(double,2*prob->l);
  			solve_nu_svr(prob,param,alpha,&si,blas_functions);
  			break;
 		case SVDD_L1:
@@ -2033,7 +2042,7 @@ static decision_function svm_train_one(
 
 // Platt's binary SVM Probabilistic Output: an improvement from Lin et al.
 static void sigmoid_train(
-	int l, const double *dec_values, const double *labels, 
+	int l, const double *dec_values, const double *labels,
 	double& A, double& B)
 {
 	double prior1=0, prior0 = 0;
@@ -2042,7 +2051,7 @@ static void sigmoid_train(
 	for (i=0;i<l;i++)
 		if (labels[i] > 0) prior1+=1;
 		else prior0+=1;
-	
+
 	int max_iter=100;	// Maximal number of iterations
 	double min_step=1e-10;	// Minimal step taken in line search
 	double sigma=1e-12;	// For numerically strict PD of Hessian
@@ -2052,8 +2061,8 @@ static void sigmoid_train(
 	double *t=Malloc(double,l);
 	double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
 	double newA,newB,newf,d1,d2;
-	int iter; 
-	
+	int iter;
+
 	// Initial Point and Initial Fun Value
 	A=0.0; B=log((prior0+1.0)/(prior1+1.0));
 	double fval = 0.0;
@@ -2163,7 +2172,7 @@ static void multiclass_probability(int k, double **r, double *p)
 	double **Q=Malloc(double *,k);
 	double *Qp=Malloc(double,k);
 	double pQp, eps=0.005/k;
-	
+
 	for (t=0;t<k;t++)
 	{
 		p[t]=1.0/k;  // Valid if k = 1
@@ -2199,7 +2208,7 @@ static void multiclass_probability(int k, double **r, double *p)
 				max_error=error;
 		}
 		if (max_error<eps) break;
-		
+
 		for (t=0;t<k;t++)
 		{
 			double diff=(-Qp[t]+pQp)/Q[t][t];
@@ -2251,7 +2260,7 @@ static void svm_binary_svc_probability(
 #endif
 		subprob.y = Malloc(double,subprob.l);
                 subprob.W = Malloc(double,subprob.l);
-			
+
 		k=0;
 		for(j=0;j<begin;j++)
 		{
@@ -2299,26 +2308,26 @@ static void svm_binary_svc_probability(
 			for(j=begin;j<end;j++)
 			{
 #ifdef _DENSE_REP
-                                PREFIX(predict_values)(submodel,(prob->x+perm[j]),&(dec_values[perm[j]]), blas_functions); 
+                                PREFIX(predict_values)(submodel,(prob->x+perm[j]),&(dec_values[perm[j]]), blas_functions);
 #else
-				PREFIX(predict_values)(submodel,prob->x[perm[j]],&(dec_values[perm[j]]), blas_functions); 
+				PREFIX(predict_values)(submodel,prob->x[perm[j]],&(dec_values[perm[j]]), blas_functions);
 #endif
 				// ensure +1 -1 order; reason not using CV subroutine
 				dec_values[perm[j]] *= submodel->label[0];
-			}		
+			}
 			PREFIX(free_and_destroy_model)(&submodel);
 			PREFIX(destroy_param)(&subparam);
 		}
 		free(subprob.x);
 		free(subprob.y);
                 free(subprob.W);
-	}		
+	}
 	sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
 	free(dec_values);
 	free(perm);
 }
 
-// Return parameter of a Laplace distribution 
+// Return parameter of a Laplace distribution
 static double svm_svr_probability(
 	const PREFIX(problem) *prob, const svm_parameter *param, BlasFunctions *blas_functions)
 {
@@ -2336,15 +2345,15 @@ static double svm_svr_probability(
 	{
 		ymv[i]=prob->y[i]-ymv[i];
 		mae += fabs(ymv[i]);
-	}		
+	}
 	mae /= prob->l;
 	double std=sqrt(2*mae*mae);
 	int count=0;
 	mae=0;
 	for(i=0;i<prob->l;i++)
-		if (fabs(ymv[i]) > 5*std) 
+		if (fabs(ymv[i]) > 5*std)
 			count=count+1;
-		else 
+		else
 			mae+=fabs(ymv[i]);
 	mae /= (prob->l-count);
 	info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
@@ -2363,7 +2372,7 @@ static void svm_group_classes(const PREFIX(problem) *prob, int *nr_class_ret, in
 	int nr_class = 0;
 	int *label = Malloc(int,max_nr_class);
 	int *count = Malloc(int,max_nr_class);
-	int *data_label = Malloc(int,l);	
+	int *data_label = Malloc(int,l);
 	int i, j, this_label, this_count;
 
 	for(i=0;i<l;i++)
@@ -2391,7 +2400,7 @@ static void svm_group_classes(const PREFIX(problem) *prob, int *nr_class_ret, in
 		}
 	}
 
-        /* 
+        /*
          * Sort labels by straight insertion and apply the same
          * transformation to array count.
          */
@@ -2418,7 +2427,7 @@ static void svm_group_classes(const PREFIX(problem) *prob, int *nr_class_ret, in
                         j ++;
                 }
                 data_label[i] = j;
-        }                
+        }
 
 	int *start = Malloc(int,nr_class);
 	start[0] = 0;
@@ -2445,7 +2454,7 @@ static void svm_group_classes(const PREFIX(problem) *prob, int *nr_class_ret, in
 
 // Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
 //
-static void remove_zero_weight(PREFIX(problem) *newprob, const PREFIX(problem) *prob) 
+static void remove_zero_weight(PREFIX(problem) *newprob, const PREFIX(problem) *prob)
 {
 	int i;
 	int l = 0;
@@ -2503,7 +2512,7 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 		model->probA = NULL; model->probB = NULL;
 		model->sv_coef = Malloc(double *,1);
 
-		if(param->probability && 
+		if(param->probability &&
 		   (param->svm_type == EPSILON_SVR ||
 		    param->svm_type == NU_SVR))
 		{
@@ -2537,7 +2546,7 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
                                 model->sv_ind[j] = i;
 				model->sv_coef[0][j] = f.alpha[i];
 				++j;
-			}		
+			}
 
 		free(f.alpha);
 	}
@@ -2552,7 +2561,7 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 		int *perm = Malloc(int,l);
 
 		// group training data of the same class
-                NAMESPACE::svm_group_classes(prob,&nr_class,&label,&start,&count,perm);		
+                NAMESPACE::svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
 #ifdef _DENSE_REP
 		PREFIX(node) *x = Malloc(PREFIX(node),l);
 #else
@@ -2573,7 +2582,7 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 		for(i=0;i<nr_class;i++)
 			weighted_C[i] = param->C;
 		for(i=0;i<param->nr_weight;i++)
-		{	
+		{
 			int j;
 			for(j=0;j<nr_class;j++)
 				if(param->weight_label[i] == label[j])
@@ -2585,7 +2594,7 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 		}
 
 		// train k*(k-1)/2 models
-		
+
 		bool *nonzero = Malloc(bool,l);
 		for(i=0;i<l;i++)
 			nonzero[i] = false;
@@ -2646,11 +2655,11 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 		// build output
 
 		model->nr_class = nr_class;
-		
+
 		model->label = Malloc(int,nr_class);
 		for(i=0;i<nr_class;i++)
 			model->label[i] = label[i];
-		
+
 		model->rho = Malloc(double,nr_class*(nr_class-1)/2);
 		model->n_iter = Malloc(int,nr_class*(nr_class-1)/2);
 		for(i=0;i<nr_class*(nr_class-1)/2;i++)
@@ -2683,7 +2692,7 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 			int nSV = 0;
 			for(int j=0;j<count[i];j++)
 				if(nonzero[start[i]+j])
-				{	
+				{
 					++nSV;
 					++total_sv;
 				}
@@ -2702,7 +2711,7 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 #endif
 		p = 0;
 		for(i=0;i<l;i++) {
-			if(nonzero[i]) { 
+			if(nonzero[i]) {
                                 model->SV[p] = x[i];
                                 model->sv_ind[p] = perm[i];
                                 ++p;
@@ -2730,7 +2739,7 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 				int sj = start[j];
 				int ci = count[i];
 				int cj = count[j];
-				
+
 				int q = nz_start[i];
 				int k;
 				for(k=0;k<ci;k++)
@@ -2742,7 +2751,7 @@ PREFIX(model) *PREFIX(train)(const PREFIX(problem) *prob, const svm_parameter *p
 						model->sv_coef[i][q++] = f[p].alpha[ci+k];
 				++p;
 			}
-		
+
 		free(label);
 		free(probA);
 		free(probB);
@@ -2794,7 +2803,7 @@ void PREFIX(cross_validation)(const PREFIX(problem) *prob, const svm_parameter *
 		int *index = Malloc(int,l);
 		for(i=0;i<l;i++)
 			index[i]=perm[i];
-		for (c=0; c<nr_class; c++) 
+		for (c=0; c<nr_class; c++)
 			for(i=0;i<count[c];i++)
 			{
 				int j = i+bounded_rand_int(count[c]-i);
@@ -2823,9 +2832,9 @@ void PREFIX(cross_validation)(const PREFIX(problem) *prob, const svm_parameter *
 		fold_start[0]=0;
 		for (i=1;i<=nr_fold;i++)
 			fold_start[i] = fold_start[i-1]+fold_count[i-1];
-		free(start);	
+		free(start);
 		free(label);
-		free(count);	
+		free(count);
 		free(index);
 		free(fold_count);
 	}
@@ -2856,7 +2865,7 @@ void PREFIX(cross_validation)(const PREFIX(problem) *prob, const svm_parameter *
 #endif
 		subprob.y = Malloc(double,subprob.l);
 		subprob.W = Malloc(double,subprob.l);
-			
+
 		k=0;
 		for(j=0;j<begin;j++)
 		{
@@ -2874,7 +2883,7 @@ void PREFIX(cross_validation)(const PREFIX(problem) *prob, const svm_parameter *
 		}
                 int dummy_status = 0; // IGNORES TIMEOUT ERRORS
 		struct PREFIX(model) *submodel = PREFIX(train)(&subprob,param, &dummy_status, blas_functions);
-		if(param->probability && 
+		if(param->probability &&
 		   (param->svm_type == C_SVC || param->svm_type == NU_SVC))
 		{
 			double *prob_estimates=Malloc(double, PREFIX(get_nr_class)(submodel));
@@ -2884,7 +2893,7 @@ void PREFIX(cross_validation)(const PREFIX(problem) *prob, const svm_parameter *
 #else
                                 target[perm[j]] = PREFIX(predict_probability)(submodel,prob->x[perm[j]],prob_estimates, blas_functions);
 #endif
-			free(prob_estimates);			
+			free(prob_estimates);
 		}
 		else
 			for(j=begin;j<end;j++)
@@ -2897,9 +2906,9 @@ void PREFIX(cross_validation)(const PREFIX(problem) *prob, const svm_parameter *
 		free(subprob.x);
 		free(subprob.y);
                 free(subprob.W);
-	}		
+	}
 	free(fold_start);
-	free(perm);	
+	free(perm);
 }
 
 
@@ -2969,7 +2978,7 @@ double PREFIX(predict_values)(const PREFIX(model) *model, const PREFIX(node) *x,
 	{
 		int nr_class = model->nr_class;
 		int l = model->l;
-		
+
 		double *kvalue = Malloc(double,l);
 		for(i=0;i<l;i++)
 #ifdef _DENSE_REP
@@ -2996,7 +3005,7 @@ double PREFIX(predict_values)(const PREFIX(model) *model, const PREFIX(node) *x,
 				int sj = start[j];
 				int ci = model->nSV[i];
 				int cj = model->nSV[j];
-				
+
 				int k;
 				double *coef1 = model->sv_coef[j-1];
 				double *coef2 = model->sv_coef[i];
@@ -3035,7 +3044,7 @@ double PREFIX(predict)(const PREFIX(model) *model, const PREFIX(node) *x, BlasFu
 	   model->param.svm_type == NU_SVR ||
 	   model->param.svm_type == SVDD_L1)
 		dec_values = Malloc(double, 1);
-	else 
+	else
 		dec_values = Malloc(double, nr_class*(nr_class-1)/2);
 	double pred_result = PREFIX(predict_values)(model, x, dec_values, blas_functions);
 	free(dec_values);
@@ -3074,10 +3083,10 @@ double PREFIX(predict_probability)(
 		for(i=0;i<nr_class;i++)
 			free(pairwise_prob[i]);
 		free(dec_values);
-		free(pairwise_prob);	     
+		free(pairwise_prob);
 		return model->label[prob_max_idx];
 	}
-	else 
+	else
 		return PREFIX(predict)(model, x, blas_functions);
 }
 
@@ -3154,9 +3163,9 @@ const char *PREFIX(check_parameter)(const PREFIX(problem) *prob, const svm_param
 	   svm_type != NU_SVR &&
 	   svm_type != SVDD_L1)
 		return "unknown svm type";
-	
+
 	// kernel_type, degree
-	
+
 	int kernel_type = param->kernel_type;
 	if(kernel_type != LINEAR &&
 	   kernel_type != POLY &&
@@ -3210,7 +3219,7 @@ const char *PREFIX(check_parameter)(const PREFIX(problem) *prob, const svm_param
 
 
 	// check whether nu-svc is feasible
-	
+
 	if(svm_type == NU_SVC)
 	{
 		int l = prob->l;
@@ -3244,7 +3253,7 @@ const char *PREFIX(check_parameter)(const PREFIX(problem) *prob, const svm_param
 				++nr_class;
 			}
 		}
-	
+
 		for(i=0;i<nr_class;i++)
 		{
 			double n1 = count[i];
@@ -3270,14 +3279,14 @@ const char *PREFIX(check_parameter)(const PREFIX(problem) *prob, const svm_param
 	   svm_type == SVDD_L1)
 	{
 		PREFIX(problem) newprob;
-		// filter samples with negative and null weights 
+		// filter samples with negative and null weights
 		remove_zero_weight(&newprob, prob);
 
 		char* msg = NULL;
 		// all samples were removed
 		if(newprob.l == 0)
 			msg =  "Invalid input - all samples have zero or negative weights.";
-		else if(prob->l != newprob.l && 
+		else if(prob->l != newprob.l &&
 		        svm_type == C_SVC)
 		{
 			bool only_one_label = true;