scikit-learn · amueller · Jul 31, 2015 · May 27, 2015 · GaelVaroquaux · May 30, 2015
diff --git a/examples/decomposition/plot_image_denoising.py b/examples/decomposition/plot_image_denoising.py
@@ -46,7 +46,6 @@
 
 ###############################################################################
 # Load Lena image and extract patches
-
 lena = lena() / 256.0
 
 # downsample for higher speed

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
@@ -1,12 +1,14 @@
 import numpy as np
 
+
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import ignore_warnings
+from sklearn.utils.testing import TempMemmap
 
 from sklearn.decomposition import DictionaryLearning
 from sklearn.decomposition import MiniBatchDictionaryLearning
@@ -60,6 +62,15 @@ def test_dict_learning_reconstruction_parallel():
     assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
 
 
+def test_dict_learning_lassocd_readonly_data():
+    n_components = 12
+    with TempMemmap(X) as X_read_only:
+        dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd',
+                                  transform_alpha=0.001, random_state=0, n_jobs=-1)
+        code = dico.fit(X_read_only).transform(X_read_only)
+        assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
+
+
 def test_dict_learning_nonzero_coefs():
     n_components = 4
     dico = DictionaryLearning(n_components, transform_algorithm='lars',
@@ -214,4 +225,4 @@ def test_sparse_coder_estimator():
     code = SparseCoder(dictionary=V, transform_algorithm='lasso_lars',
                        transform_alpha=0.001).transform(X)
     assert_true(not np.all(code == 0))
-    assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
+    assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
diff --git a/sklearn/linear_model/cd_fast.c b/sklearn/linear_model/cd_fast.c
diff --git a/sklearn/linear_model/cd_fast.pyx b/sklearn/linear_model/cd_fast.pyx
@@ -289,8 +289,9 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
 @cython.cdivision(True)
 def sparse_enet_coordinate_descent(double[:] w,
                             double alpha, double beta,
-                            double[:] X_data, int[:] X_indices,
-                            int[:] X_indptr, double[:] y,
+                            np.ndarray[double, ndim=1] X_data,
+                            np.ndarray[int, ndim=1] X_indices,
+                            np.ndarray[int, ndim=1] X_indptr, np.ndarray[double, ndim=1] y,
                             double[:] X_mean, int max_iter,
                             double tol, object rng, bint random=0,
                             bint positive=0):
@@ -487,7 +488,9 @@ def sparse_enet_coordinate_descent(double[:] w,
 @cython.wraparound(False)
 @cython.cdivision(True)
 def enet_coordinate_descent_gram(double[:] w, double alpha, double beta,
-                                 double[:, :] Q, double[:] q, double[:] y,
+                                 np.ndarray[double, ndim=2] Q,
+                                 np.ndarray[double, ndim=1] q,
+                                 np.ndarray[double, ndim=1] y,
                                  int max_iter, double tol, object rng,
                                  bint random=0, bint positive=0):
     """Cython version of the coordinate descent algorithm
@@ -628,8 +631,8 @@ def enet_coordinate_descent_gram(double[:] w, double alpha, double beta,
 @cython.wraparound(False)
 @cython.cdivision(True)
 def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
-                                       double l2_reg, double[::1, :] X,
-                                       double[:, :] Y, int max_iter,
+                                       double l2_reg, np.ndarray[double, ndim=2] X,
+                                       np.ndarray[double, ndim=2] Y, int max_iter,
                                        double tol, object rng,
                                        bint random=0):
     """Cython version of the coordinate descent algorithm

diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
@@ -359,6 +359,7 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
     ElasticNetCV
     """
     X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X)
+    y = check_array(y, 'csc', dtype=np.float64, order='F', copy=False, ensure_2d=False)
     if Xy is not None:
         Xy = check_array(Xy, 'csc', dtype=np.float64, order='F', copy=False,
                          ensure_2d=False)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -19,6 +19,7 @@
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_array_equal
+from sklearn.utils.testing import TempMemmap
 
 from sklearn.linear_model.coordinate_descent import Lasso, \
     LassoCV, ElasticNet, ElasticNetCV, MultiTaskLasso, MultiTaskElasticNet, \
@@ -388,6 +389,29 @@ def test_multi_task_lasso_and_enet():
     assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
 
 
+def test_lasso_readonly_data():
+    X = np.array([[-1], [0], [1]])
+    Y = np.array([-1, 0, 1])   # just a straight line
+    T = np.array([[2], [3], [4]])  # test sample
+    with TempMemmap((X, Y)) as (X, Y):
+        clf = Lasso(alpha=0.5)
+        clf.fit(X, Y)
+        pred = clf.predict(T)
+        assert_array_almost_equal(clf.coef_, [.25])
+        assert_array_almost_equal(pred, [0.5, 0.75, 1.])
+        assert_almost_equal(clf.dual_gap_, 0)
+
+
+def test_multi_task_lasso_readonly_data():
+    X, y, X_test, y_test = build_dataset()
+    Y = np.c_[y, y]
+    with TempMemmap((X, Y)) as (X, Y):
+        Y = np.c_[y, y]
+        clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
+        assert_true(0 < clf.dual_gap_ < 1e-5)
+        assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
+
+
 def test_enet_multitarget():
     n_targets = 3
     X, y, _, _ = build_dataset(n_samples=10, n_features=8,

diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py
@@ -1,7 +1,3 @@
-import tempfile
-import shutil
-import os.path as op
-import warnings
 from nose.tools import assert_equal
 
 import numpy as np
@@ -16,6 +12,7 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_no_warnings, assert_warns
+from sklearn.utils.testing import TempMemmap
 from sklearn.utils import ConvergenceWarning
 from sklearn import linear_model, datasets
 from sklearn.linear_model.least_angle import _lars_path_residues
@@ -440,19 +437,6 @@ def test_lars_path_readonly_data():
     # This is a non-regression test for:
     # https://github.com/scikit-learn/scikit-learn/issues/4597
     splitted_data = train_test_split(X, y, random_state=42)
-    temp_folder = tempfile.mkdtemp()
-    try:
-        fpath = op.join(temp_folder, 'data.pkl')
-        joblib.dump(splitted_data, fpath)
-        X_train, X_test, y_train, y_test = joblib.load(fpath, mmap_mode='r')
-
+    with TempMemmap(splitted_data) as (X_train, X_test, y_train, y_test):
         # The following should not fail despite copy=False
-        _lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
-    finally:
-        # try to release the mmap file handle in time to be able to delete
-        # the temporary folder under windows
-        del X_train, X_test, y_train, y_test
-        try:
-            shutil.rmtree(temp_folder)
-        except shutil.WindowsError:
-            warnings.warn("Could not delete temporary folder %s" % temp_folder)
+        _lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
@@ -28,8 +28,20 @@
     from urllib.request import urlopen
     from urllib.error import HTTPError
 
+import tempfile
+import shutil
+import os.path as op
+import atexit
+
+# WindowsError only exist on Windows
+try:
+    WindowsError
+except NameError:
+    WindowsError = None
+
 import sklearn
 from sklearn.base import BaseEstimator
+from sklearn.externals import joblib
 
 # Conveniently import all assertions in one place.
 from nose.tools import assert_equal
@@ -697,5 +709,36 @@ def check_skip_travis():
     if os.environ.get('TRAVIS') == "true":
         raise SkipTest("This test needs to be skipped on Travis")
 
+
+def _delete_folder(folder_path, warn=False):
+    """Utility function to cleanup a temporary folder if still existing.
+    Copy from joblib.pool (for independance)"""
+    try:
+        if os.path.exists(folder_path):
+            # This can fail under windows,
+            #  but will succeed when called by atexit
+            shutil.rmtree(folder_path)
+    except WindowsError:
+        if warn:
+            warnings.warn("Could not delete temporary folder %s" % folder_path)
+
+
+class TempMemmap(object):
+    def __init__(self, data, mmap_mode='r'):
+        self.temp_folder = tempfile.mkdtemp(prefix='sklearn_testing_')
+        self.mmap_mode = mmap_mode
+        self.data = data
+
+    def __enter__(self):
+        fpath = op.join(self.temp_folder, 'data.pkl')
+        joblib.dump(self.data, fpath)
+        data_read_only = joblib.load(fpath, mmap_mode=self.mmap_mode)
+        atexit.register(lambda: _delete_folder(self.temp_folder, warn=True))
+        return data_read_only
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        _delete_folder(self.temp_folder)
+
+
 with_network = with_setup(check_skip_network)
 with_travis = with_setup(check_skip_travis)