scikit-learn · vene · Jun 6, 2017 · Dec 19, 2016 · Dec 19, 2016 · Dec 19, 2016
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -173,7 +173,7 @@ Here is an example of building custom scorers, and of using the
     >>> #  and predictions defined below.
     >>> loss  = make_scorer(my_custom_loss_func, greater_is_better=False)
     >>> score = make_scorer(my_custom_loss_func, greater_is_better=True)
-    >>> ground_truth = [[1, 1]]
+    >>> ground_truth = [[1], [1]]
     >>> predictions  = [0, 1]
     >>> from sklearn.dummy import DummyClassifier
     >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -213,7 +213,8 @@ Bug fixes
 
    - Fixed a bug where :class:`sklearn.linear_model.LassoLars` does not give
      the same result as the LassoLars implementation available
-     in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`
+     in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`.
+
    - Some ``fetch_`` functions in `sklearn.datasets` were ignoring the
      ``download_if_missing`` keyword.  This was fixed in :issue:`7944` by
      :user:`Ralf Gommers <rgommers>`.
@@ -223,9 +224,9 @@ Bug fixes
      where a float being compared to ``0.0`` using ``==`` caused a divide by zero
      error. This was fixed in :issue:`7970` by :user:`He Chen <chenhe95>`.
 
-   - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a
-     sparse array X and initial centroids, where X's means were unnecessarily
-     being subtracted from the centroids. :issue:`7872` by `Josh Karnofsky <https://github.com/jkarno>`_.
+   - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a sparse
+     array X and initial centroids, where X's means were unnecessarily being
+     subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky <jkarno>`.
 
    - Fix estimators to accept a ``sample_weight`` parameter of type
      ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
@@ -249,6 +250,20 @@ Bug fixes
      ``min_impurity_split`` parameter.
      :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
 
+   - Fixes to the input validation in
+     :class:`sklearn.covariance.EllipticEnvelope`.
+     :issue:`8086` by `Andreas Müller`_.
+
+   - Fix output shape and bugs with n_jobs > 1 in  
+     :class:`sklearn.decomposition.SparseCoder` transform and :func:`sklarn.decomposition.sparse_encode`
+     for one-dimensional data and one component.
+     This also impacts the output shape of :class:`sklearn.decomposition.DictionaryLearning`.
+     :issue:`8086` by `Andreas Müller`_.
+
+   - Several fixes to input validation in
+     :class:`multiclass.OutputCodeClassifier`
+     :issue:`8086` by `Andreas Müller`_.
+
    - Fix a bug where
      :class:`sklearn.ensemble.gradient_boosting.QuantileLossFunction` computed
      negative errors for negative values of ``ytrue - ypred`` leading to
@@ -336,6 +351,20 @@ API changes summary
      :func:`sklearn.model_selection.cross_val_predict`.
      :issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.
 
+
+   - Gradient boosting base models are no longer estimators. By `Andreas Müller`_.
+
+   - :class:`feature_selection.SelectFromModel` now validates the ``threshold``
+     parameter and sets the ``threshold_`` attribute during the call to
+     ``fit``, and no longer during the call to ``transform```, by `Andreas
+     Müller`_.
+
+   - :class:`feature_selection.SelectFromModel` now has a ``partial_fit``
+     method only if the underlying estimator does. By `Andreas Müller`_.
+
+   - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method
+     only if the underlying estimator does.  By `Andreas Müller`_. 
+
    - Estimators with both methods ``decision_function`` and ``predict_proba``
      are now required to have a monotonic relation between them. The
      method ``check_decision_proba_consistency`` has been added in

diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
@@ -404,7 +404,7 @@ def test_minibatch_sensible_reassign_partial_fit():
 def test_minibatch_reassign():
     # Give a perfect initialization, but a large reassignment_ratio,
     # as a result all the centers should be reassigned and the model
-    # should not longer be good
+    # should no longer be good
     for this_X in (X, X_csr):
         mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, batch_size=100,
                                      random_state=42)

diff --git a/sklearn/covariance/outlier_detection.py b/sklearn/covariance/outlier_detection.py
@@ -15,8 +15,8 @@
 import numpy as np
 import scipy as sp
 from . import MinCovDet
-from ..base import ClassifierMixin
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, check_array
+from ..metrics import accuracy_score
 
 
 class OutlierDetectionMixin(object):
@@ -63,11 +63,11 @@ def decision_function(self, X, raw_values=False):
 
         """
         check_is_fitted(self, 'threshold_')
+        X = check_array(X)
         mahal_dist = self.mahalanobis(X)
         if raw_values:
             decision = mahal_dist
         else:
-            check_is_fitted(self, 'threshold_')
             transformed_mahal_dist = mahal_dist ** 0.33
             decision = self.threshold_ ** 0.33 - transformed_mahal_dist
 
@@ -91,6 +91,7 @@ def predict(self, X):
 
         """
         check_is_fitted(self, 'threshold_')
+        X = check_array(X)
         is_inlier = -np.ones(X.shape[0], dtype=int)
         if self.contamination is not None:
             values = self.decision_function(X, raw_values=True)
@@ -100,8 +101,34 @@ def predict(self, X):
 
         return is_inlier
 
+    def score(self, X, y, sample_weight=None):
+        """Returns the mean accuracy on the given test data and labels.
 
-class EllipticEnvelope(ClassifierMixin, OutlierDetectionMixin, MinCovDet):
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+            Test samples.
+
+        y : array-like, shape = (n_samples,) or (n_samples, n_outputs)
+            True labels for X.
+
+        sample_weight : array-like, shape = (n_samples,), optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of self.predict(X) wrt. y.
+
+        """
+        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
+
+
+class EllipticEnvelope(OutlierDetectionMixin, MinCovDet):
     """An object for detecting outliers in a Gaussian distributed dataset.
 
     Read more in the :ref:`User Guide <outlier_detection>`.

diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
@@ -94,6 +94,11 @@ def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars',
     if X.ndim == 1:
         X = X[:, np.newaxis]
     n_samples, n_features = X.shape
+    n_components = dictionary.shape[0]
+    if dictionary.shape[1] != X.shape[1]:
+        raise ValueError("Dictionary and X have different numbers of features:"
+                         "dictionary.shape: {} X.shape{}".format(
+                             dictionary.shape, X.shape))
     if cov is None and algorithm != 'lasso_cd':
         # overwriting cov is safe
         copy_cov = False
@@ -157,6 +162,8 @@ def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars',
         raise ValueError('Sparse coding method must be "lasso_lars" '
                          '"lasso_cd",  "lasso", "threshold" or "omp", got %s.'
                          % algorithm)
+    if new_code.ndim != 2:
+        return new_code.reshape(n_samples, n_components)
     return new_code
 
 
@@ -281,10 +288,6 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
                               max_iter=max_iter,
                               check_input=False,
                               verbose=verbose)
-        # This ensure that dimensionality of code is always 2,
-        # consistant with the case n_jobs > 1
-        if code.ndim == 1:
-            code = code[np.newaxis, :]
         return code
 
     # Enter parallel code block
@@ -731,8 +734,8 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
             sys.stdout.flush()
         elif verbose:
             if verbose > 10 or ii % ceil(100. / verbose) == 0:
-                print ("Iteration % 3i (elapsed time: % 3is, % 4.1fmn)"
-                       % (ii, dt, dt / 60))
+                print("Iteration % 3i (elapsed time: % 3is, % 4.1fmn)"
+                      % (ii, dt, dt / 60))
 
         this_code = sparse_encode(this_X, dictionary.T, algorithm=method,
                                   alpha=alpha, n_jobs=n_jobs).T
@@ -820,7 +823,6 @@ def transform(self, X, y=None):
         """
         check_is_fitted(self, 'components_')
 
-        # XXX : kwargs is not documented
         X = check_array(X)
         n_samples, n_features = X.shape
 
@@ -906,6 +908,7 @@ class SparseCoder(BaseEstimator, SparseCodingMixin):
     MiniBatchSparsePCA
     sparse_encode
     """
+    _required_parameters = ["dictionary"]
 
     def __init__(self, dictionary, transform_algorithm='omp',
                  transform_n_nonzero_coefs=None, transform_alpha=None,

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
@@ -1,4 +1,5 @@
 import numpy as np
+import itertools
 
 from sklearn.exceptions import ConvergenceWarning
 
@@ -25,10 +26,27 @@
 X = rng_global.randn(n_samples, n_features)
 
 
+def test_sparse_encode_shapes_omp():
+    rng = np.random.RandomState(0)
+    algorithms = ['omp', 'lasso_lars', 'lasso_cd', 'lars', 'threshold']
+    for n_components, n_samples in itertools.product([1, 5], [1, 9]):
+        X_ = rng.randn(n_samples, n_features)
+        dictionary = rng.randn(n_components, n_features)
+        for algorithm, n_jobs in itertools.product(algorithms, [1, 3]):
+            code = sparse_encode(X_, dictionary, algorithm=algorithm,
+                                 n_jobs=n_jobs)
+            assert_equal(code.shape, (n_samples, n_components))
+
+
 def test_dict_learning_shapes():
     n_components = 5
     dico = DictionaryLearning(n_components, random_state=0).fit(X)
-    assert_true(dico.components_.shape == (n_components, n_features))
+    assert_equal(dico.components_.shape, (n_components, n_features))
+
+    n_components = 1
+    dico = DictionaryLearning(n_components, random_state=0).fit(X)
+    assert_equal(dico.components_.shape, (n_components, n_features))
+    assert_equal(dico.transform(X).shape, (X.shape[0], n_components))
 
 
 def test_dict_learning_overcomplete():

diff --git a/sklearn/decomposition/truncated_svd.py b/sklearn/decomposition/truncated_svd.py
@@ -11,7 +11,7 @@
 from scipy.sparse.linalg import svds
 
 from ..base import BaseEstimator, TransformerMixin
-from ..utils import check_array, as_float_array, check_random_state
+from ..utils import check_array, check_random_state
 from ..utils.extmath import randomized_svd, safe_sparse_dot, svd_flip
 from ..utils.sparsefuncs import mean_variance_axis
 
@@ -153,13 +153,9 @@ def fit_transform(self, X, y=None):
         X_new : array, shape (n_samples, n_components)
             Reduced version of X. This will always be a dense array.
         """
-        X = as_float_array(X, copy=False)
+        X = check_array(X, accept_sparse=['csr', 'csc'])
         random_state = check_random_state(self.random_state)
 
-        # If sparse and not csr or csc, convert to csr
-        if sp.issparse(X) and X.getformat() not in ["csr", "csc"]:
-            X = X.tocsr()
-
         if self.algorithm == "arpack":
             U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol)
             # svds doesn't abide by scipy.linalg.svd/randomized_svd

diff --git a/sklearn/dummy.py b/sklearn/dummy.py
@@ -120,6 +120,8 @@ def fit(self, X, y, sample_weight=None):
 
         self.sparse_output_ = sp.issparse(y)
 
+        check_consistent_length(X, y)
+
         if not self.sparse_output_:
             y = np.atleast_1d(y)
 
@@ -184,7 +186,7 @@ def predict(self, X):
         classes_ = self.classes_
         class_prior_ = self.class_prior_
         constant = self.constant
-        if self.n_outputs_ == 1:
+        if self.n_outputs_ == 1 and not self.output_2d_:
             # Get same type even for self.n_outputs_ == 1
             n_classes_ = [n_classes_]
             classes_ = [classes_]
@@ -193,7 +195,7 @@ def predict(self, X):
         # Compute probability only once
         if self.strategy == "stratified":
             proba = self.predict_proba(X)
-            if self.n_outputs_ == 1:
+            if self.n_outputs_ == 1 and not self.output_2d_:
                 proba = [proba]
 
         if self.sparse_output_:
@@ -399,6 +401,7 @@ def fit(self, X, y, sample_weight=None):
                              % self.strategy)
 
         y = check_array(y, ensure_2d=False)
+
         if len(y) == 0:
             raise ValueError("y must not be empty.")
 

diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
@@ -12,6 +12,8 @@
 from ..base import BaseEstimator
 from ..base import MetaEstimatorMixin
 from ..utils import _get_n_jobs, check_random_state
+from ..externals import six
+from abc import ABCMeta, abstractmethod
 
 MAX_RAND_SEED = np.iinfo(np.int32).max
 
@@ -55,7 +57,8 @@ def _set_random_states(estimator, random_state=None):
         estimator.set_params(**to_set)
 
 
-class BaseEnsemble(BaseEstimator, MetaEstimatorMixin):
+class BaseEnsemble(six.with_metaclass(ABCMeta, BaseEstimator,
+                                      MetaEstimatorMixin)):
     """Base class for all ensemble classes.
 
     Warning: This class should not be used directly. Use derived classes
@@ -82,6 +85,7 @@ class BaseEnsemble(BaseEstimator, MetaEstimatorMixin):
         The collection of fitted base estimators.
     """
 
+    @abstractmethod
     def __init__(self, base_estimator, n_estimators=10,
                  estimator_params=tuple()):
         # Set parameters

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
@@ -64,7 +64,7 @@
 from ..exceptions import NotFittedError
 
 
-class QuantileEstimator(BaseEstimator):
+class QuantileEstimator(object):
     """An estimator predicting the alpha-quantile of the training targets."""
     def __init__(self, alpha=0.9):
         if not 0 < alpha < 1.0:
@@ -86,7 +86,7 @@ def predict(self, X):
         return y
 
 
-class MeanEstimator(BaseEstimator):
+class MeanEstimator(object):
     """An estimator predicting the mean of the training targets."""
     def fit(self, X, y, sample_weight=None):
         if sample_weight is None:
@@ -102,7 +102,7 @@ def predict(self, X):
         return y
 
 
-class LogOddsEstimator(BaseEstimator):
+class LogOddsEstimator(object):
     """An estimator predicting the log odds ratio."""
     scale = 1.0
 
@@ -132,7 +132,7 @@ class ScaledLogOddsEstimator(LogOddsEstimator):
     scale = 0.5
 
 
-class PriorProbabilityEstimator(BaseEstimator):
+class PriorProbabilityEstimator(object):
     """An estimator predicting the probability of each
     class in the training data.
     """
@@ -150,7 +150,7 @@ def predict(self, X):
         return y
 
 
-class ZeroEstimator(BaseEstimator):
+class ZeroEstimator(object):
     """An estimator that simply predicts zero. """
 
     def fit(self, X, y, sample_weight=None):

diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
@@ -420,7 +420,7 @@ def test_vectorizer():
     # test tf alone
     t2 = TfidfTransformer(norm='l1', use_idf=False)
     tf = t2.fit(counts_train).transform(counts_train).toarray()
-    assert_equal(t2.idf_, None)
+    assert_false(hasattr(t2, "idf_"))
 
     # test idf transform with unlearned idf vector
     t3 = TfidfTransformer(use_idf=True)