scikit-learn · ogrisel · Jun 23, 2018 · Jun 22, 2018 · Jun 22, 2018 · Jun 23, 2018
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -501,6 +501,11 @@ Decomposition, manifold learning and clustering
   :class:`mixture.BayesianGaussianMixture`. :issue:`10740` by :user:`Erich
   Schubert <kno10>` and :user:`Guillaume Lemaitre <glemaitre>`.
 
+- Fixed a bug in :class:`decomposition.SparseCoder` when running OMP sparse
+  coding in parallel using readonly memory mapped datastructures. :issue:`5956`
+  by :user:`Vighnesh Birodkar <vighneshbirodkar>` and
+  :user:`Olivier Grisel <ogrisel>`.
+
 Metrics
 
 - Fixed a bug in :func:`metrics.precision_recall_fscore_support`

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
@@ -1,3 +1,4 @@
+from __future__ import division
 import pytest
 
 import numpy as np
@@ -366,3 +367,22 @@ def test_sparse_coder_estimator():
                        transform_alpha=0.001).transform(X)
     assert_true(not np.all(code == 0))
     assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
+
+
+def test_sparse_coder_parallel_mmap():
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/5956
+    # Test that SparseCoder does not error by passing reading only
+    # arrays to child processes
+
+    rng = np.random.RandomState(777)
+    n_components, n_features = 40, 64
+    init_dict = rng.rand(n_components, n_features)
+    # Ensure that `data` is >2M. Joblib memory maps arrays
+    # if they are larger than 1MB. The 4 accounts for float32
+    # data type
+    n_samples = int(2e6) // (4 * n_features)
+    data = np.random.rand(n_samples, n_features).astype(np.float32)
+
+    sc = SparseCoder(init_dict, transform_algorithm='omp', n_jobs=2)
+    sc.fit_transform(data)
diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py
@@ -191,7 +191,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
     """
     Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram)
 
-    if copy_Xy:
+    if copy_Xy or not Xy.flags.writeable:
         Xy = Xy.copy()
 
     min_float = np.finfo(Gram.dtype).eps
@@ -491,6 +491,9 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
         Xy = Xy[:, np.newaxis]
         if tol is not None:
             norms_squared = [norms_squared]
+    if copy_Xy or not Xy.flags.writeable:
+        # Make the copy once instead of many times in _gram_omp itself.
+        Xy = Xy.copy()
 
     if n_nonzero_coefs is None and tol is None:
         n_nonzero_coefs = int(0.1 * len(Gram))
@@ -515,7 +518,7 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
         out = _gram_omp(
             Gram, Xy[:, k], n_nonzero_coefs,
             norms_squared[k] if tol is not None else None, tol,
-            copy_Gram=copy_Gram, copy_Xy=copy_Xy,
+            copy_Gram=copy_Gram, copy_Xy=False,
             return_path=return_path)
         if return_path:
             _, idx, coefs, n_iter = out

diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py
@@ -104,6 +104,20 @@ def test_perfect_signal_recovery():
     assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
 
 
+def test_orthogonal_mp_gram_readonly():
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/5956
+    idx, = gamma[:, 0].nonzero()
+    G_readonly = G.copy()
+    G_readonly.setflags(write=False)
+    Xy_readonly = Xy.copy()
+    Xy_readonly.setflags(write=False)
+    gamma_gram = orthogonal_mp_gram(G_readonly, Xy_readonly[:, 0], 5,
+                                    copy_Gram=False, copy_Xy=False)
+    assert_array_equal(idx, np.flatnonzero(gamma_gram))
+    assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
+
+
 def test_estimator():
     omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
     omp.fit(X, y[:, 0])