diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 0a404d2d07946..7d65806acb807 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -173,7 +173,7 @@ Here is an example of building custom scorers, and of using the
     >>> #  and predictions defined below.
     >>> loss  = make_scorer(my_custom_loss_func, greater_is_better=False)
     >>> score = make_scorer(my_custom_loss_func, greater_is_better=True)
-    >>> ground_truth = [[1, 1]]
+    >>> ground_truth = [[1], [1]]
     >>> predictions  = [0, 1]
     >>> from sklearn.dummy import DummyClassifier
     >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index dfc2be923eff2..bb52411e2fba4 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -213,7 +213,8 @@ Bug fixes
 
    - Fixed a bug where :class:`sklearn.linear_model.LassoLars` does not give
      the same result as the LassoLars implementation available
-     in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`
+     in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`.
+
    - Some ``fetch_`` functions in `sklearn.datasets` were ignoring the
      ``download_if_missing`` keyword.  This was fixed in :issue:`7944` by
      :user:`Ralf Gommers <rgommers>`.
@@ -223,9 +224,9 @@ Bug fixes
      where a float being compared to ``0.0`` using ``==`` caused a divide by zero
      error. This was fixed in :issue:`7970` by :user:`He Chen <chenhe95>`.
 
-   - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a
-     sparse array X and initial centroids, where X's means were unnecessarily
-     being subtracted from the centroids. :issue:`7872` by `Josh Karnofsky <https://github.com/jkarno>`_.
+   - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a sparse
+     array X and initial centroids, where X's means were unnecessarily being
+     subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky <jkarno>`.
 
    - Fix estimators to accept a ``sample_weight`` parameter of type
      ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
@@ -249,6 +250,20 @@ Bug fixes
      ``min_impurity_split`` parameter.
      :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
 
+   - Fixes to the input validation in
+     :class:`sklearn.covariance.EllipticEnvelope`.
+     :issue:`8086` by `Andreas Müller`_.
+
+   - Fix output shape and bugs with n_jobs > 1 in  
+     :class:`sklearn.decomposition.SparseCoder` transform and :func:`sklarn.decomposition.sparse_encode`
+     for one-dimensional data and one component.
+     This also impacts the output shape of :class:`sklearn.decomposition.DictionaryLearning`.
+     :issue:`8086` by `Andreas Müller`_.
+
+   - Several fixes to input validation in
+     :class:`multiclass.OutputCodeClassifier`
+     :issue:`8086` by `Andreas Müller`_.
+
    - Fix a bug where
      :class:`sklearn.ensemble.gradient_boosting.QuantileLossFunction` computed
      negative errors for negative values of ``ytrue - ypred`` leading to
@@ -336,6 +351,20 @@ API changes summary
      :func:`sklearn.model_selection.cross_val_predict`.
      :issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.
 
+
+   - Gradient boosting base models are no longer estimators. By `Andreas Müller`_.
+
+   - :class:`feature_selection.SelectFromModel` now validates the ``threshold``
+     parameter and sets the ``threshold_`` attribute during the call to
+     ``fit``, and no longer during the call to ``transform```, by `Andreas
+     Müller`_.
+
+   - :class:`feature_selection.SelectFromModel` now has a ``partial_fit``
+     method only if the underlying estimator does. By `Andreas Müller`_.
+
+   - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method
+     only if the underlying estimator does.  By `Andreas Müller`_. 
+
    - Estimators with both methods ``decision_function`` and ``predict_proba``
      are now required to have a monotonic relation between them. The
      method ``check_decision_proba_consistency`` has been added in
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 31307e55801a5..38fcff94d7505 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -404,7 +404,7 @@ def test_minibatch_sensible_reassign_partial_fit():
 def test_minibatch_reassign():
     # Give a perfect initialization, but a large reassignment_ratio,
     # as a result all the centers should be reassigned and the model
-    # should not longer be good
+    # should no longer be good
     for this_X in (X, X_csr):
         mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, batch_size=100,
                                      random_state=42)
diff --git a/sklearn/covariance/outlier_detection.py b/sklearn/covariance/outlier_detection.py
index 1cafe885fdd47..9fe219ba5d0b6 100644
--- a/sklearn/covariance/outlier_detection.py
+++ b/sklearn/covariance/outlier_detection.py
@@ -15,8 +15,8 @@
 import numpy as np
 import scipy as sp
 from . import MinCovDet
-from ..base import ClassifierMixin
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, check_array
+from ..metrics import accuracy_score
 
 
 class OutlierDetectionMixin(object):
@@ -63,11 +63,11 @@ def decision_function(self, X, raw_values=False):
 
         """
         check_is_fitted(self, 'threshold_')
+        X = check_array(X)
         mahal_dist = self.mahalanobis(X)
         if raw_values:
             decision = mahal_dist
         else:
-            check_is_fitted(self, 'threshold_')
             transformed_mahal_dist = mahal_dist ** 0.33
             decision = self.threshold_ ** 0.33 - transformed_mahal_dist
 
@@ -91,6 +91,7 @@ def predict(self, X):
 
         """
         check_is_fitted(self, 'threshold_')
+        X = check_array(X)
         is_inlier = -np.ones(X.shape[0], dtype=int)
         if self.contamination is not None:
             values = self.decision_function(X, raw_values=True)
@@ -100,8 +101,34 @@ def predict(self, X):
 
         return is_inlier
 
+    def score(self, X, y, sample_weight=None):
+        """Returns the mean accuracy on the given test data and labels.
 
-class EllipticEnvelope(ClassifierMixin, OutlierDetectionMixin, MinCovDet):
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+            Test samples.
+
+        y : array-like, shape = (n_samples,) or (n_samples, n_outputs)
+            True labels for X.
+
+        sample_weight : array-like, shape = (n_samples,), optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of self.predict(X) wrt. y.
+
+        """
+        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
+
+
+class EllipticEnvelope(OutlierDetectionMixin, MinCovDet):
     """An object for detecting outliers in a Gaussian distributed dataset.
 
     Read more in the :ref:`User Guide <outlier_detection>`.
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 154987a6279c4..b9bb0fcea864c 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -94,6 +94,11 @@ def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars',
     if X.ndim == 1:
         X = X[:, np.newaxis]
     n_samples, n_features = X.shape
+    n_components = dictionary.shape[0]
+    if dictionary.shape[1] != X.shape[1]:
+        raise ValueError("Dictionary and X have different numbers of features:"
+                         "dictionary.shape: {} X.shape{}".format(
+                             dictionary.shape, X.shape))
     if cov is None and algorithm != 'lasso_cd':
         # overwriting cov is safe
         copy_cov = False
@@ -157,6 +162,8 @@ def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars',
         raise ValueError('Sparse coding method must be "lasso_lars" '
                          '"lasso_cd",  "lasso", "threshold" or "omp", got %s.'
                          % algorithm)
+    if new_code.ndim != 2:
+        return new_code.reshape(n_samples, n_components)
     return new_code
 
 
@@ -281,10 +288,6 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
                               max_iter=max_iter,
                               check_input=False,
                               verbose=verbose)
-        # This ensure that dimensionality of code is always 2,
-        # consistant with the case n_jobs > 1
-        if code.ndim == 1:
-            code = code[np.newaxis, :]
         return code
 
     # Enter parallel code block
@@ -731,8 +734,8 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
             sys.stdout.flush()
         elif verbose:
             if verbose > 10 or ii % ceil(100. / verbose) == 0:
-                print ("Iteration % 3i (elapsed time: % 3is, % 4.1fmn)"
-                       % (ii, dt, dt / 60))
+                print("Iteration % 3i (elapsed time: % 3is, % 4.1fmn)"
+                      % (ii, dt, dt / 60))
 
         this_code = sparse_encode(this_X, dictionary.T, algorithm=method,
                                   alpha=alpha, n_jobs=n_jobs).T
@@ -820,7 +823,6 @@ def transform(self, X, y=None):
         """
         check_is_fitted(self, 'components_')
 
-        # XXX : kwargs is not documented
         X = check_array(X)
         n_samples, n_features = X.shape
 
@@ -906,6 +908,7 @@ class SparseCoder(BaseEstimator, SparseCodingMixin):
     MiniBatchSparsePCA
     sparse_encode
     """
+    _required_parameters = ["dictionary"]
 
     def __init__(self, dictionary, transform_algorithm='omp',
                  transform_n_nonzero_coefs=None, transform_alpha=None,
diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index b7ed5c4703492..5bf9836aa6a9e 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -1,4 +1,5 @@
 import numpy as np
+import itertools
 
 from sklearn.exceptions import ConvergenceWarning
 
@@ -25,10 +26,27 @@
 X = rng_global.randn(n_samples, n_features)
 
 
+def test_sparse_encode_shapes_omp():
+    rng = np.random.RandomState(0)
+    algorithms = ['omp', 'lasso_lars', 'lasso_cd', 'lars', 'threshold']
+    for n_components, n_samples in itertools.product([1, 5], [1, 9]):
+        X_ = rng.randn(n_samples, n_features)
+        dictionary = rng.randn(n_components, n_features)
+        for algorithm, n_jobs in itertools.product(algorithms, [1, 3]):
+            code = sparse_encode(X_, dictionary, algorithm=algorithm,
+                                 n_jobs=n_jobs)
+            assert_equal(code.shape, (n_samples, n_components))
+
+
 def test_dict_learning_shapes():
     n_components = 5
     dico = DictionaryLearning(n_components, random_state=0).fit(X)
-    assert_true(dico.components_.shape == (n_components, n_features))
+    assert_equal(dico.components_.shape, (n_components, n_features))
+
+    n_components = 1
+    dico = DictionaryLearning(n_components, random_state=0).fit(X)
+    assert_equal(dico.components_.shape, (n_components, n_features))
+    assert_equal(dico.transform(X).shape, (X.shape[0], n_components))
 
 
 def test_dict_learning_overcomplete():
diff --git a/sklearn/decomposition/truncated_svd.py b/sklearn/decomposition/truncated_svd.py
index c1fc1f3b18435..87b8b45e1543a 100644
--- a/sklearn/decomposition/truncated_svd.py
+++ b/sklearn/decomposition/truncated_svd.py
@@ -11,7 +11,7 @@
 from scipy.sparse.linalg import svds
 
 from ..base import BaseEstimator, TransformerMixin
-from ..utils import check_array, as_float_array, check_random_state
+from ..utils import check_array, check_random_state
 from ..utils.extmath import randomized_svd, safe_sparse_dot, svd_flip
 from ..utils.sparsefuncs import mean_variance_axis
 
@@ -153,13 +153,9 @@ def fit_transform(self, X, y=None):
         X_new : array, shape (n_samples, n_components)
             Reduced version of X. This will always be a dense array.
         """
-        X = as_float_array(X, copy=False)
+        X = check_array(X, accept_sparse=['csr', 'csc'])
         random_state = check_random_state(self.random_state)
 
-        # If sparse and not csr or csc, convert to csr
-        if sp.issparse(X) and X.getformat() not in ["csr", "csc"]:
-            X = X.tocsr()
-
         if self.algorithm == "arpack":
             U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol)
             # svds doesn't abide by scipy.linalg.svd/randomized_svd
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 90a43791c81b6..9d139454d6e2c 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -120,6 +120,8 @@ def fit(self, X, y, sample_weight=None):
 
         self.sparse_output_ = sp.issparse(y)
 
+        check_consistent_length(X, y)
+
         if not self.sparse_output_:
             y = np.atleast_1d(y)
 
@@ -184,7 +186,7 @@ def predict(self, X):
         classes_ = self.classes_
         class_prior_ = self.class_prior_
         constant = self.constant
-        if self.n_outputs_ == 1:
+        if self.n_outputs_ == 1 and not self.output_2d_:
             # Get same type even for self.n_outputs_ == 1
             n_classes_ = [n_classes_]
             classes_ = [classes_]
@@ -193,7 +195,7 @@ def predict(self, X):
         # Compute probability only once
         if self.strategy == "stratified":
             proba = self.predict_proba(X)
-            if self.n_outputs_ == 1:
+            if self.n_outputs_ == 1 and not self.output_2d_:
                 proba = [proba]
 
         if self.sparse_output_:
@@ -399,6 +401,7 @@ def fit(self, X, y, sample_weight=None):
                              % self.strategy)
 
         y = check_array(y, ensure_2d=False)
+
         if len(y) == 0:
             raise ValueError("y must not be empty.")
 
diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
index 5e9d6e2e1fc3c..2477cc1c21c7d 100644
--- a/sklearn/ensemble/base.py
+++ b/sklearn/ensemble/base.py
@@ -12,6 +12,8 @@
 from ..base import BaseEstimator
 from ..base import MetaEstimatorMixin
 from ..utils import _get_n_jobs, check_random_state
+from ..externals import six
+from abc import ABCMeta, abstractmethod
 
 MAX_RAND_SEED = np.iinfo(np.int32).max
 
@@ -55,7 +57,8 @@ def _set_random_states(estimator, random_state=None):
         estimator.set_params(**to_set)
 
 
-class BaseEnsemble(BaseEstimator, MetaEstimatorMixin):
+class BaseEnsemble(six.with_metaclass(ABCMeta, BaseEstimator,
+                                      MetaEstimatorMixin)):
     """Base class for all ensemble classes.
 
     Warning: This class should not be used directly. Use derived classes
@@ -82,6 +85,7 @@ class BaseEnsemble(BaseEstimator, MetaEstimatorMixin):
         The collection of fitted base estimators.
     """
 
+    @abstractmethod
     def __init__(self, base_estimator, n_estimators=10,
                  estimator_params=tuple()):
         # Set parameters
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 9ef973b8bbc56..8c16ccf78ffa6 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -64,7 +64,7 @@
 from ..exceptions import NotFittedError
 
 
-class QuantileEstimator(BaseEstimator):
+class QuantileEstimator(object):
     """An estimator predicting the alpha-quantile of the training targets."""
     def __init__(self, alpha=0.9):
         if not 0 < alpha < 1.0:
@@ -86,7 +86,7 @@ def predict(self, X):
         return y
 
 
-class MeanEstimator(BaseEstimator):
+class MeanEstimator(object):
     """An estimator predicting the mean of the training targets."""
     def fit(self, X, y, sample_weight=None):
         if sample_weight is None:
@@ -102,7 +102,7 @@ def predict(self, X):
         return y
 
 
-class LogOddsEstimator(BaseEstimator):
+class LogOddsEstimator(object):
     """An estimator predicting the log odds ratio."""
     scale = 1.0
 
@@ -132,7 +132,7 @@ class ScaledLogOddsEstimator(LogOddsEstimator):
     scale = 0.5
 
 
-class PriorProbabilityEstimator(BaseEstimator):
+class PriorProbabilityEstimator(object):
     """An estimator predicting the probability of each
     class in the training data.
     """
@@ -150,7 +150,7 @@ def predict(self, X):
         return y
 
 
-class ZeroEstimator(BaseEstimator):
+class ZeroEstimator(object):
     """An estimator that simply predicts zero. """
 
     def fit(self, X, y, sample_weight=None):
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 88382f7d13c0b..341486abd3b1c 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -420,7 +420,7 @@ def test_vectorizer():
     # test tf alone
     t2 = TfidfTransformer(norm='l1', use_idf=False)
     tf = t2.fit(counts_train).transform(counts_train).toarray()
-    assert_equal(t2.idf_, None)
+    assert_false(hasattr(t2, "idf_"))
 
     # test idf transform with unlearned idf vector
     t3 = TfidfTransformer(use_idf=True)
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 2484be7166cfa..539e88973bcc0 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -29,7 +29,6 @@
 from ..preprocessing import normalize
 from .hashing import FeatureHasher
 from .stop_words import ENGLISH_STOP_WORDS
-from ..utils import deprecated
 from ..utils.fixes import frombuffer_empty, bincount
 from ..utils.validation import check_is_fitted
 
@@ -1036,7 +1035,7 @@ def fit(self, X, y=None):
             # log+1 instead of log makes sure terms with zero idf don't get
             # suppressed entirely.
             idf = np.log(float(n_samples) / df) + 1.0
-            self._idf_diag = sp.spdiags(idf, diags=0, m=n_features, 
+            self._idf_diag = sp.spdiags(idf, diags=0, m=n_features,
                                         n=n_features, format='csr')
 
         return self
@@ -1088,10 +1087,9 @@ def transform(self, X, copy=True):
 
     @property
     def idf_(self):
-        if hasattr(self, "_idf_diag"):
-            return np.ravel(self._idf_diag.sum(axis=0))
-        else:
-            return None
+        # if _idf_diag is not set, this will raise an attribute error,
+        # which means hasattr(self, "idf_") is False
+        return np.ravel(self._idf_diag.sum(axis=0))
 
 
 class TfidfVectorizer(CountVectorizer):
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index 7fe4456ccd390..dada33e9a75cc 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -4,11 +4,12 @@
 import numpy as np
 
 from .base import SelectorMixin
-from ..base import BaseEstimator, clone
+from ..base import BaseEstimator, clone, MetaEstimatorMixin
 from ..externals import six
 
 from ..exceptions import NotFittedError
 from ..utils.fixes import norm
+from ..utils.metaestimators import if_delegate_has_method
 
 
 def _get_feature_importances(estimator, norm_order=1):
@@ -76,7 +77,7 @@ def _calculate_threshold(estimator, importances, threshold):
     return threshold
 
 
-class SelectFromModel(BaseEstimator, SelectorMixin):
+class SelectFromModel(BaseEstimator, SelectorMixin, MetaEstimatorMixin):
     """Meta-transformer for selecting features based on importance weights.
 
     .. versionadded:: 0.17
@@ -121,7 +122,6 @@ class SelectFromModel(BaseEstimator, SelectorMixin):
     threshold_ : float
         The threshold value used for feature selection.
     """
-
     def __init__(self, estimator, threshold=None, prefit=False, norm_order=1):
         self.estimator = estimator
         self.threshold = threshold
@@ -136,12 +136,11 @@ def _get_support_mask(self):
             estimator = self.estimator_
         else:
             raise ValueError(
-                'Either fit the model before transform or set "prefit=True"'
-                ' while passing the fitted estimator to the constructor.')
+                'Either fit SelectFromModel before transform or set "prefit='
+                'True" and pass a fitted estimator to the constructor.')
         scores = _get_feature_importances(estimator, self.norm_order)
-        self.threshold_ = _calculate_threshold(estimator, scores,
-                                               self.threshold)
-        return scores >= self.threshold_
+        threshold = _calculate_threshold(estimator, scores, self.threshold)
+        return scores >= threshold
 
     def fit(self, X, y=None, **fit_params):
         """Fit the SelectFromModel meta-transformer.
@@ -169,6 +168,12 @@ def fit(self, X, y=None, **fit_params):
         self.estimator_.fit(X, y, **fit_params)
         return self
 
+    @property
+    def threshold_(self):
+        scores = _get_feature_importances(self.estimator_, self.norm_order)
+        return _calculate_threshold(self.estimator, scores, self.threshold)
+
+    @if_delegate_has_method('estimator')
     def partial_fit(self, X, y=None, **fit_params):
         """Fit the SelectFromModel meta-transformer only once.
 
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index d92e341676371..31ff0057d8d8e 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -30,6 +30,7 @@ def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer):
         X_train, y_train, lambda estimator, features:
         _score(estimator, X_test[:, features], y_test, scorer)).scores_
 
+
 class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin):
     """Feature ranking with recursive feature elimination.
 
@@ -293,8 +294,8 @@ class RFECV(RFE, MetaEstimatorMixin):
         - An iterable yielding train/test splits.
 
         For integer/None inputs, if ``y`` is binary or multiclass,
-        :class:`sklearn.model_selection.StratifiedKFold` is used. If the 
-        estimator is a classifier or if ``y`` is neither binary nor multiclass, 
+        :class:`sklearn.model_selection.StratifiedKFold` is used. If the
+        estimator is a classifier or if ``y`` is neither binary nor multiclass,
         :class:`sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index 6efb6f405bb1c..6ef0d824b587c 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from sklearn.utils.testing import assert_true
+from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_greater
@@ -120,6 +121,10 @@ def test_partial_fit():
     transformer.fit(np.vstack((data, data)), np.concatenate((y, y)))
     assert_array_equal(X_transform, transformer.transform(data))
 
+    # check that if est doesn't have partial_fit, neither does SelectFromModel
+    transformer = SelectFromModel(estimator=RandomForestClassifier())
+    assert_false(hasattr(transformer, "partial_fit"))
+
 
 def test_calling_fit_reinitializes():
     est = LinearSVC(random_state=0)
@@ -171,10 +176,10 @@ def test_threshold_string():
 def test_threshold_without_refitting():
     """Test that the threshold can be set without refitting the model."""
     clf = SGDClassifier(alpha=0.1, n_iter=10, shuffle=True, random_state=0)
-    model = SelectFromModel(clf, threshold=0.1)
+    model = SelectFromModel(clf, threshold="0.1 * mean")
     model.fit(data, y)
     X_transform = model.transform(data)
 
     # Set a higher threshold to filter out more features.
-    model.threshold = 1.0
+    model.threshold = "1.0 * mean"
     assert_greater(X_transform.shape[1], model.transform(data).shape[1])
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index 8f9788e6a425c..712e8573fa469 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -46,7 +46,7 @@
 from .utils import check_random_state
 from .utils.validation import _num_samples
 from .utils.validation import check_is_fitted
-from .utils.validation import check_X_y
+from .utils.validation import check_X_y, check_array
 from .utils.multiclass import (_check_partial_fit_first_call,
                                check_classification_targets,
                                _ovr_decision_function)
@@ -176,7 +176,6 @@ class OneVsRestClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
     multilabel_ : boolean
         Whether a OneVsRestClassifier is a multilabel classifier.
     """
-
     def __init__(self, estimator, n_jobs=1):
         self.estimator = estimator
         self.n_jobs = n_jobs
@@ -217,6 +216,7 @@ def fit(self, X, y):
 
         return self
 
+    @if_delegate_has_method('estimator')
     def partial_fit(self, X, y, classes=None):
         """Partially fit underlying estimators
 
@@ -488,8 +488,12 @@ def fit(self, X, y):
         self
         """
         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
+        check_classification_targets(y)
 
         self.classes_ = np.unique(y)
+        if len(self.classes_) == 1:
+            raise ValueError("OneVsOneClassifier can not be fit when only one"
+                             " class is present.")
         n_classes = self.classes_.shape[0]
         estimators_indices = list(zip(*(Parallel(n_jobs=self.n_jobs)(
             delayed(_fit_ovo_binary)
@@ -498,13 +502,14 @@ def fit(self, X, y):
 
         self.estimators_ = estimators_indices[0]
         try:
-            self.pairwise_indices_ = estimators_indices[1] \
-                                     if self._pairwise else None
+            self.pairwise_indices_ = (
+                estimators_indices[1] if self._pairwise else None)
         except AttributeError:
             self.pairwise_indices_ = None
 
         return self
 
+    @if_delegate_has_method(delegate='estimator')
     def partial_fit(self, X, y, classes=None):
         """Partially fit underlying estimators
 
@@ -544,8 +549,8 @@ def partial_fit(self, X, y, classes=None):
             n_jobs=self.n_jobs)(
                 delayed(_partial_fit_ovo_binary)(
                     estimator, X, y, self.classes_[i], self.classes_[j])
-                for estimator, (i, j) in izip(
-                        self.estimators_, (combinations)))
+                for estimator, (i, j) in izip(self.estimators_,
+                                              (combinations)))
 
         self.pairwise_indices_ = None
 
@@ -703,12 +708,14 @@ def fit(self, X, y):
         -------
         self
         """
+        X, y = check_X_y(X, y)
         if self.code_size <= 0:
             raise ValueError("code_size should be greater than 0, got {1}"
                              "".format(self.code_size))
 
         _check_estimator(self.estimator)
         random_state = check_random_state(self.random_state)
+        check_classification_targets(y)
 
         self.classes_ = np.unique(y)
         n_classes = self.classes_.shape[0]
@@ -749,6 +756,7 @@ def predict(self, X):
             Predicted multi-class targets.
         """
         check_is_fitted(self, 'estimators_')
+        X = check_array(X)
         Y = np.array([_predict_binary(e, X) for e in self.estimators_]).T
         pred = euclidean_distances(Y, self.code_book_).argmin(axis=1)
         return self.classes_[pred]
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index 826ece6d50d98..f608936e952ab 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -16,13 +16,14 @@
 
 import numpy as np
 
-from abc import ABCMeta
-from .base import BaseEstimator, clone
+from abc import ABCMeta, abstractmethod
+from .base import BaseEstimator, clone, MetaEstimatorMixin
 from .base import RegressorMixin, ClassifierMixin
 from .utils import check_array, check_X_y
 from .utils.fixes import parallel_helper
 from .utils.validation import check_is_fitted, has_fit_parameter
 from .utils.metaestimators import if_delegate_has_method
+from .utils.multiclass import check_classification_targets
 from .externals.joblib import Parallel, delayed
 from .externals import six
 
@@ -57,8 +58,9 @@ def _partial_fit_estimator(estimator, X, y, classes=None, sample_weight=None,
     return estimator
 
 
-class MultiOutputEstimator(six.with_metaclass(ABCMeta, BaseEstimator)):
-
+class MultiOutputEstimator(six.with_metaclass(ABCMeta, BaseEstimator,
+                                              MetaEstimatorMixin)):
+    @abstractmethod
     def __init__(self, estimator, n_jobs=1):
         self.estimator = estimator
         self.n_jobs = n_jobs
@@ -149,6 +151,9 @@ def fit(self, X, y, sample_weight=None):
                          multi_output=True,
                          accept_sparse=True)
 
+        if isinstance(self, ClassifierMixin):
+            check_classification_targets(y)
+
         if y.ndim == 1:
             raise ValueError("y must have at least two dimensions for "
                              "multi-output regression but has only one.")
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 1320e2996b6e2..c351d1169f4b2 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -484,13 +484,13 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
         y : array-like, shape = [n_samples]
             Target values.
 
-        classes : array-like, shape = [n_classes], optional (default=None)
+        classes : array-like, shape = [n_classes] (default=None)
             List of all the classes that can possibly appear in the y vector.
 
             Must be provided at the first call to partial_fit, can be omitted
             in subsequent calls.
 
-        sample_weight : array-like, shape = [n_samples], optional (default=None)
+        sample_weight : array-like, shape = [n_samples] (default=None)
             Weights applied to individual samples (1. for unweighted).
 
         Returns
@@ -555,7 +555,7 @@ def fit(self, X, y, sample_weight=None):
         y : array-like, shape = [n_samples]
             Target values.
 
-        sample_weight : array-like, shape = [n_samples], optional (default=None)
+        sample_weight : array-like, shape = [n_samples], (default=None)
             Weights applied to individual samples (1. for unweighted).
 
         Returns
diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py
index 7f7321abdfb1c..c19cb408d643d 100644
--- a/sklearn/neighbors/approximate.py
+++ b/sklearn/neighbors/approximate.py
@@ -93,7 +93,7 @@ class GaussianRandomProjectionHash(ProjectionToHashMixin,
                                    GaussianRandomProjection):
     """Use GaussianRandomProjection to produce a cosine LSH fingerprint"""
     def __init__(self,
-                 n_components=8,
+                 n_components=32,
                  random_state=None):
         super(GaussianRandomProjectionHash, self).__init__(
             n_components=n_components,
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index b62e78e87c223..20ec4b132fc7f 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -13,7 +13,8 @@
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.multiclass import OneVsOneClassifier
 from sklearn.multiclass import OutputCodeClassifier
-from sklearn.utils.multiclass import check_classification_targets, type_of_target
+from sklearn.utils.multiclass import (check_classification_targets,
+                                      type_of_target)
 from sklearn.utils import shuffle
 
 from sklearn.metrics import precision_score
@@ -104,6 +105,10 @@ def test_ovr_partial_fit():
     pred1 = ovr1.fit(X, y).predict(X)
     assert_equal(np.mean(pred == y), np.mean(pred1 == y))
 
+    # test partial_fit only exists if estimator has it:
+    ovr = OneVsRestClassifier(SVC())
+    assert_false(hasattr(ovr, "partial_fit"))
+
 
 def test_ovr_partial_fit_exceptions():
     ovr = OneVsRestClassifier(MultinomialNB())
@@ -428,7 +433,8 @@ def test_ovr_pipeline():
 
 
 def test_ovr_coef_():
-    for base_classifier in [SVC(kernel='linear', random_state=0), LinearSVC(random_state=0)]:
+    for base_classifier in [SVC(kernel='linear', random_state=0),
+                            LinearSVC(random_state=0)]:
         # SVC has sparse coef with sparse input data
 
         ovr = OneVsRestClassifier(base_classifier)
@@ -439,7 +445,8 @@ def test_ovr_coef_():
             assert_equal(shape[0], n_classes)
             assert_equal(shape[1], iris.data.shape[1])
             # don't densify sparse coefficients
-            assert_equal(sp.issparse(ovr.estimators_[0].coef_), sp.issparse(ovr.coef_))
+            assert_equal(sp.issparse(ovr.estimators_[0].coef_),
+                         sp.issparse(ovr.coef_))
 
 
 def test_ovr_coef_exceptions():
@@ -508,6 +515,10 @@ def test_ovo_partial_fit_predict():
     assert_equal(len(ovo1.estimators_), len(np.unique(iris.target)))
     assert_greater(np.mean(iris.target == pred1), 0.65)
 
+    # test partial_fit only exists if estimator has it:
+    ovr = OneVsOneClassifier(SVC())
+    assert_false(hasattr(ovr, "partial_fit"))
+
 
 def test_ovo_decision_function():
     n_samples = iris.data.shape[0]
@@ -606,6 +617,24 @@ def test_ovo_string_y():
     assert_array_equal(y, ovo.predict(X))
 
 
+def test_ovo_one_class():
+    # Test error for OvO with one class
+    X = np.eye(4)
+    y = np.array(['a'] * 4)
+
+    ovo = OneVsOneClassifier(LinearSVC())
+    assert_raise_message(ValueError, "when only one class", ovo.fit, X, y)
+
+
+def test_ovo_float_y():
+    # Test that the OvO errors on float targets
+    X = iris.data
+    y = iris.data[:, 0]
+
+    ovo = OneVsOneClassifier(LinearSVC())
+    assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y)
+
+
 def test_ecoc_exceptions():
     ecoc = OutputCodeClassifier(LinearSVC(random_state=0))
     assert_raises(ValueError, ecoc.predict, [])
@@ -634,6 +663,15 @@ def test_ecoc_gridsearch():
     assert_true(best_C in Cs)
 
 
+def test_ecoc_float_y():
+    # Test that the OCC errors on float targets
+    X = iris.data
+    y = iris.data[:, 0]
+
+    ovo = OutputCodeClassifier(LinearSVC())
+    assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y)
+
+
 def test_pairwise_indices():
     clf_precomputed = svm.SVC(kernel='precomputed')
     X, y = iris.data, iris.target
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index 163363155ca3d..a4217bea63a7c 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -6,6 +6,7 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_raises_regex
+from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_not_equal
@@ -336,3 +337,5 @@ def test_multi_output_exceptions():
     y_new = np.column_stack((y1, y2))
     moc.fit(X, y)
     assert_raises(ValueError, moc.score, X, y_new)
+    # ValueError when y is continuous
+    assert_raise_message(ValueError, "Unknown label type", moc.fit, X, X[:, 1])
diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 2a2cfe1c30fbf..2d3c80510db0d 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -23,6 +23,7 @@
 from ..utils.fixes import bincount
 from ..utils.fixes import array_equal
 
+
 def _unique_multiclass(y):
     if hasattr(y, '__array__'):
         return np.unique(np.asarray(y))
@@ -155,6 +156,7 @@ def is_multilabel(y):
         return len(labels) < 3 and (y.dtype.kind in 'biu' or  # bool, int, uint
                                     _is_integral_float(labels))
 
+
 def check_classification_targets(y):
     """Ensure that target y is of a non-regression type.
 
@@ -168,11 +170,10 @@ def check_classification_targets(y):
     """
     y_type = type_of_target(y)
     if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
-            'multilabel-indicator', 'multilabel-sequences']:
+                      'multilabel-indicator', 'multilabel-sequences']:
         raise ValueError("Unknown label type: %r" % y_type)
 
 
-
 def type_of_target(y):
     """Determine the type of data indicated by target `y`