scikit-learn · amueller · Feb 13, 2015 · Jan 20, 2015 · Feb 10, 2015 · Feb 13, 2015
diff --git a/sklearn/cluster/spectral.py b/sklearn/cluster/spectral.py
@@ -243,7 +243,7 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
     This algorithm solves the normalized cut for k=2: it is a
     normalized spectral clustering.
     """
-    if not assign_labels in ('kmeans', 'discretize'):
+    if assign_labels not in ('kmeans', 'discretize'):
         raise ValueError("The 'assign_labels' parameter should be "
                          "'kmeans' or 'discretize', but '%s' was given"
                          % assign_labels)
@@ -415,7 +415,8 @@ def fit(self, X, y=None):
             OR, if affinity==`precomputed`, a precomputed affinity
             matrix of shape (n_samples, n_samples)
         """
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
+                        dtype=np.float64)
         if X.shape[0] == X.shape[1] and self.affinity != "precomputed":
             warnings.warn("The spectral clustering API has changed. ``fit``"
                           "now constructs an affinity matrix from data. To use"

diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py
@@ -52,7 +52,7 @@ def empirical_covariance(X, assume_centered=False):
 
     Parameters
     ----------
-    X : 2D ndarray, shape (n_samples, n_features)
+    X : ndarray, shape (n_samples, n_features)
         Data from which to compute the covariance estimate
 
     assume_centered : Boolean
@@ -70,6 +70,7 @@ def empirical_covariance(X, assume_centered=False):
     X = np.asarray(X)
     if X.ndim == 1:
         X = np.reshape(X, (1, -1))
+    if X.shape[0] == 1:
         warnings.warn("Only one sample available. "
                       "You may want to reshape your data array")
 
@@ -164,6 +165,7 @@ def fit(self, X, y=None):
             Returns self.
 
         """
+        X = check_array(X)
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
         else:

diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
@@ -18,7 +18,7 @@
 
 from ..utils import ConvergenceWarning
 from ..utils.extmath import pinvh
-from ..utils.validation import check_random_state
+from ..utils.validation import check_random_state, check_array
 from ..linear_model import lars_path
 from ..linear_model import cd_fast
 from ..cross_validation import _check_cv as check_cv, cross_val_score
@@ -191,6 +191,9 @@ def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4,
     else:
         errors = dict(invalid='raise')
     try:
+        # be robust to the max_iter=0 edge case, see:
+        # https://github.com/scikit-learn/scikit-learn/issues/4134
+        d_gap = np.inf
         for i in range(max_iter):
             for idx in range(n_features):
                 sub_covariance = covariance_[indices != idx].T[indices != idx]
@@ -314,7 +317,7 @@ def __init__(self, alpha=.01, mode='cd', tol=1e-4, max_iter=100,
         self.store_precision = True
 
     def fit(self, X, y=None):
-        X = np.asarray(X)
+        X = check_array(X)
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
         else:
@@ -514,7 +517,14 @@ def __init__(self, alphas=4, n_refinements=4, cv=None, tol=1e-4,
         self.store_precision = True
 
     def fit(self, X, y=None):
-        X = np.asarray(X)
+        """Fits the GraphLasso covariance model to X.
+
+        Parameters
+        ----------
+        X : ndarray, shape (n_samples, n_features)
+            Data from which to compute the covariance estimate
+        """
+        X = check_array(X)
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
         else:

diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py
@@ -15,7 +15,7 @@
 
 from . import empirical_covariance, EmpiricalCovariance
 from ..utils.extmath import fast_logdet, pinvh
-from ..utils import check_random_state
+from ..utils import check_random_state, check_array
 
 
 # Minimum Covariance Determinant
@@ -605,6 +605,7 @@ def fit(self, X, y=None):
             Returns self.
 
         """
+        X = check_array(X)
         random_state = check_random_state(self.random_state)
         n_samples, n_features = X.shape
         # check that the empirical covariance is full rank

diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
@@ -126,6 +126,7 @@ def fit(self, X, y=None):
             Returns self.
 
         """
+        X = check_array(X)
         # Not calling the parent object to fit, to avoid a potential
         # matrix inversion when setting the precision
         if self.assume_centered:
@@ -181,12 +182,11 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
         return 0.
     if X.ndim == 1:
         X = np.reshape(X, (1, -1))
+
+    if X.shape[0] == 1:
         warnings.warn("Only one sample available. "
                       "You may want to reshape your data array")
-        n_samples = 1
-        n_features = X.size
-    else:
-        n_samples, n_features = X.shape
+    n_samples, n_features = X.shape
 
     # optionaly center data
     if not assume_centered:
@@ -228,8 +228,7 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
     # get final beta as the min between beta and delta
     beta = min(beta, delta)
     # finally get shrinkage
-    shrinkage = beta / delta
-
+    shrinkage = 0 if beta == 0 else beta / delta
     return shrinkage
 
 
@@ -384,6 +383,7 @@ def fit(self, X, y=None):
         """
         # Not calling the parent object to fit, to avoid computing the
         # covariance matrix (and potentially the precision)
+        X = check_array(X)
         if self.assume_centered:
             self.location_ = np.zeros(X.shape[1])
         else:
@@ -460,7 +460,7 @@ def oas(X, assume_centered=False):
     num = alpha + mu ** 2
     den = (n_samples + 1.) * (alpha - (mu ** 2) / n_features)
 
-    shrinkage = min(num / den, 1.)
+    shrinkage = 1. if den == 0 else min(num / den, 1.)
     shrunk_cov = (1. - shrinkage) * emp_cov
     shrunk_cov.flat[::n_features + 1] += shrinkage * mu
 
@@ -536,6 +536,7 @@ def fit(self, X, y=None):
             Returns self.
 
         """
+        X = check_array(X)
         # Not calling the parent object to fit, to avoid computing the
         # covariance matrix (and potentially the precision)
         if self.assume_centered:

diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
@@ -57,10 +57,12 @@ def test_covariance():
         cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)
 
     # test with one sample
+    # FIXME I don't know what this test does
     X_1sample = np.arange(5)
     cov = EmpiricalCovariance()
-
     assert_warns(UserWarning, cov.fit, X_1sample)
+    assert_array_almost_equal(cov.covariance_,
+                              np.zeros(shape=(5, 5), dtype=np.float64))
 
     # test integer type
     X_integer = np.asarray([[0, 1], [1, 0]])
@@ -180,9 +182,12 @@ def test_ledoit_wolf():
     assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
 
     # test with one sample
+    # FIXME I don't know what this test does
     X_1sample = np.arange(5)
     lw = LedoitWolf()
     assert_warns(UserWarning, lw.fit, X_1sample)
+    assert_array_almost_equal(lw.covariance_,
+                              np.zeros(shape=(5, 5), dtype=np.float64))
 
     # test shrinkage coeff on a simple data set (without saving precision)
     lw = LedoitWolf(store_precision=False)
@@ -251,9 +256,12 @@ def test_oas():
     assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)
 
     # test with one sample
+    # FIXME I don't know what this test does
     X_1sample = np.arange(5)
     oa = OAS()
     assert_warns(UserWarning, oa.fit, X_1sample)
+    assert_array_almost_equal(oa.covariance_,
+                              np.zeros(shape=(5, 5), dtype=np.float64))
 
     # test shrinkage coeff on a simple data set (without saving precision)
     oa = OAS(store_precision=False)

diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
@@ -348,6 +348,9 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
     ElasticNetCV
     """
     X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X)
+    if Xy is not None:
+        Xy = check_array(Xy, 'csc', dtype=np.float64, order='F', copy=False,
+                         ensure_2d=False)
     n_samples, n_features = X.shape
 
     multi_output = False
@@ -389,7 +392,6 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
     if selection not in ['random', 'cyclic']:
         raise ValueError("selection should be either random or cyclic.")
     random = (selection == 'random')
-    models = []
 
     if not multi_output:
         coefs = np.empty((n_features, n_alphas), dtype=np.float64)
@@ -414,6 +416,7 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
             model = cd_fast.enet_coordinate_descent_multi_task(
                 coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random)
         elif isinstance(precompute, np.ndarray):
+            precompute = check_array(precompute, 'csc', dtype=np.float64, order='F')
             model = cd_fast.enet_coordinate_descent_gram(
                 coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,
                 tol, rng, random, positive)
@@ -1418,6 +1421,7 @@ def __init__(self, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
         self.random_state = random_state
         self.selection = selection
 
+
 ###############################################################################
 # Multi Task ElasticNet and Lasso models (with joint feature selection)
 

diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py
@@ -806,6 +806,7 @@ def fit(self, X, y):
             returns an instance of self.
         """
         X, y = check_X_y(X, y)
+        X = as_float_array(X, copy=False, force_all_finite=False)
         cv = check_cv(self.cv, X, y, classifier=False)
         max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1])
                     if not self.max_iter

diff --git a/sklearn/manifold/mds.py b/sklearn/manifold/mds.py
@@ -357,7 +357,7 @@ def __init__(self, n_components=2, metric=True, n_init=4,
     def _pairwise(self):
         return self.kernel == "precomputed"
 
-    def fit(self, X, init=None, y=None):
+    def fit(self, X, y=None, init=None):
         """
         Computes the position of the points in the embedding space
 
@@ -374,7 +374,7 @@ def fit(self, X, init=None, y=None):
         self.fit_transform(X, init=init)
         return self
 
-    def fit_transform(self, X, init=None, y=None):
+    def fit_transform(self, X, y=None, init=None):
         """
         Fit the data from X, and returns the embedded coordinates
 
@@ -389,6 +389,7 @@ def fit_transform(self, X, init=None, y=None):
             if ndarray, initialize the SMACOF algorithm with this array.
 
         """
+        X = check_array(X)
         if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":
             warnings.warn("The MDS API has changed. ``fit`` now constructs an"
                           " dissimilarity matrix from data. To use a custom "

diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
@@ -215,7 +215,7 @@ def _gradient_descent(objective, p0, it, n_iter, n_iter_without_progress=30,
         update = momentum * update - learning_rate * grad
         p += update
 
-        if verbose >= 2 and (i+1) % 10 == 0:
+        if verbose >= 2 and (i + 1) % 10 == 0:
             print("[t-SNE] Iteration %d: error = %.7f, gradient norm = %.7f"
                   % (i + 1, error, grad_norm))
 
@@ -404,7 +404,7 @@ def __init__(self, n_components=2, perplexity=30.0,
         self.verbose = verbose
         self.random_state = random_state
 
-    def _fit(self, X):
+    def fit(self, X, y=None):
         """Fit the model using X as training data.
 
         Parameters
@@ -413,7 +413,7 @@ def _fit(self, X):
             If the metric is 'precomputed' X must be a square distance
             matrix. Otherwise it contains a sample per row.
         """
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=np.float64)
         random_state = check_random_state(self.random_state)
 
         if self.early_exaggeration < 1.0:
@@ -507,7 +507,7 @@ def _tsne(self, P, alpha, n_samples, random_state, X_embedded=None):
 
         return X_embedded
 
-    def fit_transform(self, X):
+    def fit_transform(self, X, y=None):
         """Transform X to the embedded space.
 
         Parameters
@@ -521,5 +521,5 @@ def fit_transform(self, X):
         X_new : array, shape (n_samples, n_components)
             Embedding of the training data in low-dimensional space.
         """
-        self._fit(X)
+        self.fit(X)
         return self.embedding_
diff --git a/sklearn/mixture/dpgmm.py b/sklearn/mixture/dpgmm.py
@@ -16,7 +16,7 @@
 from scipy.spatial.distance import cdist
 
 from ..externals.six.moves import xrange
-from ..utils import check_random_state
+from ..utils import check_random_state, check_array
 from ..utils.extmath import logsumexp, pinvh, squared_norm
 from ..utils.validation import check_is_fitted
 from .. import cluster
@@ -148,12 +148,12 @@ class DPGMM(GMM):
     n_iter : int, default 10
         Maximum number of iterations to perform before convergence.
 
-    params : string, default 'wmc' 
+    params : string, default 'wmc'
         Controls which parameters are updated in the training
         process.  Can contain any combination of 'w' for weights,
         'm' for means, and 'c' for covars.
 
-    init_params : string, default 'wmc' 
+    init_params : string, default 'wmc'
         Controls which parameters are updated in the initialization
         process.  Can contain any combination of 'w' for weights,
         'm' for means, and 'c' for covars.  Defaults to 'wmc'.
@@ -250,7 +250,7 @@ def score_samples(self, X):
         """
         check_is_fitted(self, 'gamma_')
 
-        X = np.asarray(X)
+        X = check_array(X)
         if X.ndim == 1:
             X = X[:, np.newaxis]
         z = np.zeros((X.shape[0], self.n_components))
@@ -461,7 +461,7 @@ def _logprior(self, z):
     def lower_bound(self, X, z):
         """returns a lower bound on model evidence based on X and membership"""
         check_is_fitted(self, 'means_')
-        
+
         if self.covariance_type not in ['full', 'tied', 'diag', 'spherical']:
             raise NotImplementedError("This ctype is not implemented: %s"
                                       % self.covariance_type)
@@ -480,7 +480,7 @@ def _set_weights(self):
                                                     + self.gamma_[i, 2])
         self.weights_ /= np.sum(self.weights_)
 
-    def fit(self, X):
+    def fit(self, X, y=None):
         """Estimate model parameters with the variational
         algorithm.
 
@@ -501,10 +501,10 @@ def fit(self, X):
             List of n_features-dimensional data points.  Each row
             corresponds to a single data point.
         """
-        self.random_state = check_random_state(self.random_state)
+        self.random_state_ = check_random_state(self.random_state)
 
         ## initialization step
-        X = np.asarray(X)
+        X = check_array(X)
         if X.ndim == 1:
             X = X[:, np.newaxis]
 
@@ -521,7 +521,7 @@ def fit(self, X):
         if 'm' in self.init_params or not hasattr(self, 'means_'):
             self.means_ = cluster.KMeans(
                 n_clusters=self.n_components,
-                random_state=self.random_state).fit(X).cluster_centers_[::-1]
+                random_state=self.random_state_).fit(X).cluster_centers_[::-1]
 
         if 'w' in self.init_params or not hasattr(self, 'weights_'):
             self.weights_ = np.tile(1.0 / self.n_components, self.n_components)
@@ -705,7 +705,7 @@ def score_samples(self, X):
         """
         check_is_fitted(self, 'gamma_')
 
-        X = np.asarray(X)
+        X = check_array(X)
         if X.ndim == 1:
             X = X[:, np.newaxis]
         dg = digamma(self.gamma_) - digamma(np.sum(self.gamma_))