scikit-learn · ogrisel · Jun 20, 2024 · May 14, 2024 · May 14, 2024 · May 14, 2024
diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
@@ -20,15 +20,19 @@ Version 1.5.1
 
 **TODO**
 
-Changelog
----------
-
 Changes impacting many modules
 ------------------------------
 
+- |Fix| Fixed a regression in the validation of the input data of all estimators where
+  an unexpected error was raised when passing a DataFrame backed by a read-only buffer.
+  :pr:`29018` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
 - |Fix| Fixed a regression causing a dead-lock at import time in some settings.
   :pr:`29235` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
+Changelog
+---------
+
 :mod:`sklearn.metrics`
 ......................
 
@@ -37,6 +41,10 @@ Changes impacting many modules
   instead of implicitly converting those inputs as regular NumPy arrays.
   :pr:`29119` by :user:`Olivier Grisel`.
 
+- |Fix| Fix a regression in :func:`metrics.zero_one_loss` causing an error
+  for Array API dispatch with multilabel inputs.
+  :pr:`29269` by :user:`Yaroslav Korobko <Tialo>`.
+
 :mod:`sklearn.model_selection`
 ..............................
 
@@ -48,12 +56,14 @@ Changes impacting many modules
   grids that have estimators as parameter values.
   :pr:`29179` by :user:`Marco Gorelli<MarcoGorelli>`.
 
-:mod:`sklearn.metrics`
-..............................
+:mod:`sklearn.utils`
+....................
 
-- |Fix| Fix a regression in :func:`metrics.zero_one_loss` causing an error
-  for Array API dispatch with multilabel inputs.
-  :pr:`29269` by :user:`Yaroslav Korobko <Tialo>`.
+- |API| :func:`utils.validation.check_array` has a new parameter, `force_writeable`, to
+  control the writeability of the output array. If set to `True`, the output array will
+  be guaranteed to be writeable and a copy will be made if the input array is read-only.
+  If set to `False`, no guarantee is made about the writeability of the output array.
+  :pr:`29018` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 .. _changes_1_5:
 

diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
@@ -502,13 +502,10 @@ def fit(self, X, y=None):
             Returns the instance itself.
         """
         if self.affinity == "precomputed":
-            accept_sparse = False
-        else:
-            accept_sparse = "csr"
-        X = self._validate_data(X, accept_sparse=accept_sparse)
-        if self.affinity == "precomputed":
-            self.affinity_matrix_ = X.copy() if self.copy else X
+            X = self._validate_data(X, copy=self.copy, force_writeable=True)
+            self.affinity_matrix_ = X
         else:  # self.affinity == "euclidean"
+            X = self._validate_data(X, accept_sparse="csr")
             self.affinity_matrix_ = -euclidean_distances(X, squared=True)
 
         if self.affinity_matrix_.shape[0] != self.affinity_matrix_.shape[1]:

diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py
@@ -770,14 +770,17 @@ def fit(self, X, y=None):
                 X,
                 accept_sparse=["csr", "lil"],
                 dtype=np.float64,
+                force_writeable=True,
             )
         else:
             # Only non-sparse, precomputed distance matrices are handled here
             # and thereby allowed to contain numpy.inf for missing distances
 
             # Perform data validation after removing infinite values (numpy.inf)
             # from the given distance matrix.
-            X = self._validate_data(X, force_all_finite=False, dtype=np.float64)
+            X = self._validate_data(
+                X, force_all_finite=False, dtype=np.float64, force_writeable=True
+            )
             if np.isnan(X).any():
                 # TODO: Support np.nan in Cython implementation for precomputed
                 # dense HDBSCAN

diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py
@@ -263,10 +263,19 @@ def fit(self, X, y=None, Y=None):
 
         check_consistent_length(X, y)
         X = self._validate_data(
-            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
+            X,
+            dtype=np.float64,
+            force_writeable=True,
+            copy=self.copy,
+            ensure_min_samples=2,
         )
         y = check_array(
-            y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False
+            y,
+            input_name="y",
+            dtype=np.float64,
+            force_writeable=True,
+            copy=self.copy,
+            ensure_2d=False,
         )
         if y.ndim == 1:
             self._predict_1d = True
@@ -1056,10 +1065,19 @@ def fit(self, X, y=None, Y=None):
         y = _deprecate_Y_when_required(y, Y)
         check_consistent_length(X, y)
         X = self._validate_data(
-            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
+            X,
+            dtype=np.float64,
+            force_writeable=True,
+            copy=self.copy,
+            ensure_min_samples=2,
         )
         y = check_array(
-            y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False
+            y,
+            input_name="y",
+            dtype=np.float64,
+            force_writeable=True,
+            copy=self.copy,
+            ensure_2d=False,
         )
         if y.ndim == 1:
             y = y.reshape(-1, 1)

diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py
@@ -216,7 +216,9 @@ def fit(self, X, y=None):
         self : object
             FactorAnalysis class instance.
         """
-        X = self._validate_data(X, copy=self.copy, dtype=np.float64)
+        X = self._validate_data(
+            X, copy=self.copy, dtype=np.float64, force_writeable=True
+        )
 
         n_samples, n_features = X.shape
         n_components = self.n_components

diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py
@@ -228,6 +228,7 @@ def fit(self, X, y=None):
             accept_sparse=["csr", "csc", "lil"],
             copy=self.copy,
             dtype=[np.float64, np.float32],
+            force_writeable=True,
         )
         n_samples, n_features = X.shape
 
@@ -277,7 +278,11 @@ def partial_fit(self, X, y=None, check_input=True):
                     "or use IncrementalPCA.fit to do so in batches."
                 )
             X = self._validate_data(
-                X, copy=self.copy, dtype=[np.float64, np.float32], reset=first_pass
+                X,
+                copy=self.copy,
+                dtype=[np.float64, np.float32],
+                force_writeable=True,
+                reset=first_pass,
             )
         n_samples, n_features = X.shape
         if first_pass:

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
@@ -505,6 +505,7 @@ def _fit(self, X):
         X = self._validate_data(
             X,
             dtype=[xp.float64, xp.float32],
+            force_writeable=True,
             accept_sparse=("csr", "csc"),
             ensure_2d=True,
             copy=False,

diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
@@ -333,6 +333,7 @@ def _validate_input(self, X, in_fit):
                 reset=in_fit,
                 accept_sparse="csc",
                 dtype=dtype,
+                force_writeable=True if not in_fit else None,
                 force_all_finite=force_all_finite,
                 copy=self.copy,
             )

diff --git a/sklearn/impute/_knn.py b/sklearn/impute/_knn.py
@@ -269,6 +269,7 @@ def transform(self, X):
             X,
             accept_sparse=False,
             dtype=FLOAT_DTYPES,
+            force_writeable=True,
             force_all_finite=force_all_finite,
             copy=self.copy,
             reset=False,

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
@@ -598,7 +598,12 @@ def fit(self, X, y, sample_weight=None):
         accept_sparse = False if self.positive else ["csr", "csc", "coo"]
 
         X, y = self._validate_data(
-            X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True
+            X,
+            y,
+            accept_sparse=accept_sparse,
+            y_numeric=True,
+            multi_output=True,
+            force_writeable=True,
         )
 
         has_sw = sample_weight is not None

diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py
@@ -235,7 +235,9 @@ def fit(self, X, y, sample_weight=None):
         self : object
             Returns the instance itself.
         """
-        X, y = self._validate_data(X, y, dtype=[np.float64, np.float32], y_numeric=True)
+        X, y = self._validate_data(
+            X, y, dtype=[np.float64, np.float32], force_writeable=True, y_numeric=True
+        )
         dtype = X.dtype
 
         if sample_weight is not None:
@@ -620,7 +622,12 @@ def fit(self, X, y):
             Fitted estimator.
         """
         X, y = self._validate_data(
-            X, y, dtype=[np.float64, np.float32], y_numeric=True, ensure_min_samples=2
+            X,
+            y,
+            dtype=[np.float64, np.float32],
+            force_writeable=True,
+            y_numeric=True,
+            ensure_min_samples=2,
         )
         dtype = X.dtype
 

diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
@@ -979,6 +979,7 @@ def fit(self, X, y, sample_weight=None, check_input=True):
                 accept_sparse="csc",
                 order="F",
                 dtype=[np.float64, np.float32],
+                force_writeable=True,
                 accept_large_sparse=False,
                 copy=X_copied,
                 multi_output=True,
@@ -1607,6 +1608,7 @@ def fit(self, X, y, sample_weight=None, **params):
             check_X_params = dict(
                 accept_sparse="csc",
                 dtype=[np.float64, np.float32],
+                force_writeable=True,
                 copy=False,
                 accept_large_sparse=False,
             )
@@ -1632,6 +1634,7 @@ def fit(self, X, y, sample_weight=None, **params):
                 accept_sparse="csc",
                 dtype=[np.float64, np.float32],
                 order="F",
+                force_writeable=True,
                 copy=copy_X,
             )
             X, y = self._validate_data(
@@ -2508,6 +2511,7 @@ def fit(self, X, y):
         check_X_params = dict(
             dtype=[np.float64, np.float32],
             order="F",
+            force_writeable=True,
             copy=self.copy_X and self.fit_intercept,
         )
         check_y_params = dict(ensure_2d=False, order="F")

diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py
@@ -1177,7 +1177,9 @@ def fit(self, X, y, Xy=None):
         self : object
             Returns an instance of self.
         """
-        X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)
+        X, y = self._validate_data(
+            X, y, force_writeable=True, y_numeric=True, multi_output=True
+        )
 
         alpha = getattr(self, "alpha", 0.0)
         if hasattr(self, "n_nonzero_coefs"):
@@ -1718,7 +1720,7 @@ def fit(self, X, y, **params):
         """
         _raise_for_params(params, self, "fit")
 
-        X, y = self._validate_data(X, y, y_numeric=True)
+        X, y = self._validate_data(X, y, force_writeable=True, y_numeric=True)
         X = as_float_array(X, copy=self.copy_X)
         y = as_float_array(y, copy=self.copy_X)
 
@@ -2235,7 +2237,7 @@ def fit(self, X, y, copy_X=None):
         """
         if copy_X is None:
             copy_X = self.copy_X
-        X, y = self._validate_data(X, y, y_numeric=True)
+        X, y = self._validate_data(X, y, force_writeable=True, y_numeric=True)
 
         X, y, Xmean, ymean, Xstd = _preprocess_data(
             X, y, fit_intercept=self.fit_intercept, copy=copy_X

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
@@ -1241,6 +1241,7 @@ def fit(self, X, y, sample_weight=None):
             y,
             accept_sparse=_accept_sparse,
             dtype=[xp.float64, xp.float32],
+            force_writeable=True,
             multi_output=True,
             y_numeric=True,
         )
@@ -1290,6 +1291,7 @@ def _prepare_data(self, X, y, sample_weight, solver):
             accept_sparse=accept_sparse,
             multi_output=True,
             y_numeric=False,
+            force_writeable=True,
         )
 
         self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)