From 94c6c4d241bc2d9c424099ef7f4f9fb4abb0cc73 Mon Sep 17 00:00:00 2001
From: Peter Piontek <piontek0@gmail.com>
Date: Tue, 28 Mar 2023 21:00:29 +0200
Subject: [PATCH 1/4] DOC Added the meanings of default=None for PatchExtractor
 parameters

---
 sklearn/feature_extraction/image.py | 41 +++++++++--------------------
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py
index 06cbe3b423276..86421767f2df0 100644
--- a/sklearn/feature_extraction/image.py
+++ b/sklearn/feature_extraction/image.py
@@ -92,9 +92,7 @@ def _mask_edges_weights(mask, edges, weights=None):
         return edges, weights
 
 
-def _to_graph(
-    n_x, n_y, n_z, mask=None, img=None, return_as=sparse.coo_matrix, dtype=None
-):
+def _to_graph(n_x, n_y, n_z, mask=None, img=None, return_as=sparse.coo_matrix, dtype=None):
     """Auxiliary function for img_to_graph and grid_to_graph"""
     edges = _make_edges_3d(n_x, n_y, n_z)
 
@@ -198,9 +196,7 @@ def img_to_graph(img, *, mask=None, return_as=sparse.coo_matrix, dtype=None):
         "dtype": "no_validation",  # validation delegated to numpy
     }
 )
-def grid_to_graph(
-    n_x, n_y, n_z=1, *, mask=None, return_as=sparse.coo_matrix, dtype=int
-):
+def grid_to_graph(n_x, n_y, n_z=1, *, mask=None, return_as=sparse.coo_matrix, dtype=int):
     """Graph of the pixel-to-pixel connections.
 
     Edges exist if 2 voxels are connected.
@@ -328,9 +324,7 @@ def _extract_patches(arr, patch_shape=8, extraction_step=1):
     slices = tuple(slice(None, None, st) for st in extraction_step)
     indexing_strides = arr[slices].strides
 
-    patch_indices_shape = (
-        (np.array(arr.shape) - np.array(patch_shape)) // np.array(extraction_step)
-    ) + 1
+    patch_indices_shape = ((np.array(arr.shape) - np.array(patch_shape)) // np.array(extraction_step)) + 1
 
     shape = tuple(list(patch_indices_shape) + list(patch_shape))
     strides = tuple(list(indexing_strides) + list(patch_strides))
@@ -414,22 +408,16 @@ def extract_patches_2d(image, patch_size, *, max_patches=None, random_state=None
     p_h, p_w = patch_size
 
     if p_h > i_h:
-        raise ValueError(
-            "Height of the patch should be less than the height of the image."
-        )
+        raise ValueError("Height of the patch should be less than the height of the image.")
 
     if p_w > i_w:
-        raise ValueError(
-            "Width of the patch should be less than the width of the image."
-        )
+        raise ValueError("Width of the patch should be less than the width of the image.")
 
     image = check_array(image, allow_nd=True)
     image = image.reshape((i_h, i_w, -1))
     n_colors = image.shape[-1]
 
-    extracted_patches = _extract_patches(
-        image, patch_shape=(p_h, p_w, n_colors), extraction_step=1
-    )
+    extracted_patches = _extract_patches(image, patch_shape=(p_h, p_w, n_colors), extraction_step=1)
 
     n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, max_patches)
     if max_patches:
@@ -502,17 +490,19 @@ class PatchExtractor(TransformerMixin, BaseEstimator):
     Parameters
     ----------
     patch_size : tuple of int (patch_height, patch_width), default=None
-        The dimensions of one patch.
+        The dimensions of one patch. If set to None, the patch size will be
+        automatically set to (img_height // 10, img_width // 10), where
+        img_height and img_width are the dimensions of the input images.
 
     max_patches : int or float, default=None
         The maximum number of patches per image to extract. If `max_patches` is
         a float in (0, 1), it is taken to mean a proportion of the total number
-        of patches.
+        of patches. If set to None, extract all possible patches.
 
     random_state : int, RandomState instance, default=None
         Determines the random number generator used for random sampling when
         `max_patches is not None`. Use an int to make the randomness
-        deterministic.
+        deterministic. If set to None, use the RandomState from np.random.
         See :term:`Glossary <random_state>`.
 
     See Also
@@ -612,10 +602,7 @@ def transform(self, X):
             patch_size = img_height // 10, img_width // 10
         else:
             if len(self.patch_size) != 2:
-                raise ValueError(
-                    f"patch_size must be a tuple of two integers. Got {self.patch_size}"
-                    " instead."
-                )
+                raise ValueError(f"patch_size must be a tuple of two integers. Got {self.patch_size} instead.")
             patch_size = self.patch_size
 
         n_imgs, img_height, img_width = X.shape[:3]
@@ -624,9 +611,7 @@ def transform(self, X):
 
         # compute the dimensions of the patches array
         patch_height, patch_width = patch_size
-        n_patches = _compute_n_patches(
-            img_height, img_width, patch_height, patch_width, self.max_patches
-        )
+        n_patches = _compute_n_patches(img_height, img_width, patch_height, patch_width, self.max_patches)
         patches_shape = (n_imgs * n_patches,) + patch_size
         if n_channels > 1:
             patches_shape += (n_channels,)

From cbbd1d7af64fc5a86ac09e87dd7c8ba0fedf408e Mon Sep 17 00:00:00 2001
From: Peter Piontek <piontek0@gmail.com>
Date: Tue, 28 Mar 2023 21:18:44 +0200
Subject: [PATCH 2/4] fixed linter errors

---
 sklearn/feature_extraction/image.py | 46 ++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py
index 86421767f2df0..be5ad7f8d2dcd 100644
--- a/sklearn/feature_extraction/image.py
+++ b/sklearn/feature_extraction/image.py
@@ -45,7 +45,9 @@ def _make_edges_3d(n_x, n_y, n_z=1):
         The size of the grid in the z direction, defaults to 1
     """
     vertices = np.arange(n_x * n_y * n_z).reshape((n_x, n_y, n_z))
-    edges_deep = np.vstack((vertices[:, :, :-1].ravel(), vertices[:, :, 1:].ravel()))
+    edges_deep = np.vstack(
+        (vertices[:, :, :-1].ravel(), vertices[:, :, 1:].ravel())
+    )
     edges_right = np.vstack((vertices[:, :-1].ravel(), vertices[:, 1:].ravel()))
     edges_down = np.vstack((vertices[:-1].ravel(), vertices[1:].ravel()))
     edges = np.hstack((edges_deep, edges_right, edges_down))
@@ -92,7 +94,9 @@ def _mask_edges_weights(mask, edges, weights=None):
         return edges, weights
 
 
-def _to_graph(n_x, n_y, n_z, mask=None, img=None, return_as=sparse.coo_matrix, dtype=None):
+def _to_graph(
+    n_x, n_y, n_z, mask=None, img=None, return_as=sparse.coo_matrix, dtype=None
+):
     """Auxiliary function for img_to_graph and grid_to_graph"""
     edges = _make_edges_3d(n_x, n_y, n_z)
 
@@ -196,7 +200,9 @@ def img_to_graph(img, *, mask=None, return_as=sparse.coo_matrix, dtype=None):
         "dtype": "no_validation",  # validation delegated to numpy
     }
 )
-def grid_to_graph(n_x, n_y, n_z=1, *, mask=None, return_as=sparse.coo_matrix, dtype=int):
+def grid_to_graph(
+    n_x, n_y, n_z=1, *, mask=None, return_as=sparse.coo_matrix, dtype=int
+):
     """Graph of the pixel-to-pixel connections.
 
     Edges exist if 2 voxels are connected.
@@ -324,7 +330,10 @@ def _extract_patches(arr, patch_shape=8, extraction_step=1):
     slices = tuple(slice(None, None, st) for st in extraction_step)
     indexing_strides = arr[slices].strides
 
-    patch_indices_shape = ((np.array(arr.shape) - np.array(patch_shape)) // np.array(extraction_step)) + 1
+    patch_indices_shape = (
+        (np.array(arr.shape) - np.array(patch_shape))
+        // np.array(extraction_step)
+    ) + 1
 
     shape = tuple(list(patch_indices_shape) + list(patch_shape))
     strides = tuple(list(indexing_strides) + list(patch_strides))
@@ -345,7 +354,9 @@ def _extract_patches(arr, patch_shape=8, extraction_step=1):
         "random_state": ["random_state"],
     }
 )
-def extract_patches_2d(image, patch_size, *, max_patches=None, random_state=None):
+def extract_patches_2d(
+    image, patch_size, *, max_patches=None, random_state=None
+):
     """Reshape a 2D image into a collection of patches.
 
     The resulting patches are allocated in a dedicated array.
@@ -408,16 +419,22 @@ def extract_patches_2d(image, patch_size, *, max_patches=None, random_state=None
     p_h, p_w = patch_size
 
     if p_h > i_h:
-        raise ValueError("Height of the patch should be less than the height of the image.")
+        raise ValueError(
+            "Height of the patch should be less than the height of the image."
+        )
 
     if p_w > i_w:
-        raise ValueError("Width of the patch should be less than the width of the image.")
+        raise ValueError(
+            "Width of the patch should be less than the width of the image."
+        )
 
     image = check_array(image, allow_nd=True)
     image = image.reshape((i_h, i_w, -1))
     n_colors = image.shape[-1]
 
-    extracted_patches = _extract_patches(image, patch_shape=(p_h, p_w, n_colors), extraction_step=1)
+    extracted_patches = _extract_patches(
+        image, patch_shape=(p_h, p_w, n_colors), extraction_step=1
+    )
 
     n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, max_patches)
     if max_patches:
@@ -476,7 +493,9 @@ def reconstruct_from_patches_2d(patches, image_size):
         for j in range(i_w):
             # divide by the amount of overlap
             # XXX: is this the most efficient way? memory-wise yes, cpu wise?
-            img[i, j] /= float(min(i + 1, p_h, i_h - i) * min(j + 1, p_w, i_w - j))
+            img[i, j] /= float(
+                min(i + 1, p_h, i_h - i) * min(j + 1, p_w, i_w - j)
+            )
     return img
 
 
@@ -602,7 +621,10 @@ def transform(self, X):
             patch_size = img_height // 10, img_width // 10
         else:
             if len(self.patch_size) != 2:
-                raise ValueError(f"patch_size must be a tuple of two integers. Got {self.patch_size} instead.")
+                raise ValueError(
+                    "patch_size must be a tuple of two integers. Got"
+                    f" {self.patch_size} instead."
+                )
             patch_size = self.patch_size
 
         n_imgs, img_height, img_width = X.shape[:3]
@@ -611,7 +633,9 @@ def transform(self, X):
 
         # compute the dimensions of the patches array
         patch_height, patch_width = patch_size
-        n_patches = _compute_n_patches(img_height, img_width, patch_height, patch_width, self.max_patches)
+        n_patches = _compute_n_patches(
+            img_height, img_width, patch_height, patch_width, self.max_patches
+        )
         patches_shape = (n_imgs * n_patches,) + patch_size
         if n_channels > 1:
             patches_shape += (n_channels,)

From e9276f29216fa4feebe840db1f61e4998a769395 Mon Sep 17 00:00:00 2001
From: Peter Piontek <piontek0@gmail.com>
Date: Tue, 28 Mar 2023 21:48:00 +0200
Subject: [PATCH 3/4] fix linter errors

---
 sklearn/feature_extraction/image.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py
index be5ad7f8d2dcd..c44c81f666940 100644
--- a/sklearn/feature_extraction/image.py
+++ b/sklearn/feature_extraction/image.py
@@ -45,9 +45,7 @@ def _make_edges_3d(n_x, n_y, n_z=1):
         The size of the grid in the z direction, defaults to 1
     """
     vertices = np.arange(n_x * n_y * n_z).reshape((n_x, n_y, n_z))
-    edges_deep = np.vstack(
-        (vertices[:, :, :-1].ravel(), vertices[:, :, 1:].ravel())
-    )
+    edges_deep = np.vstack((vertices[:, :, :-1].ravel(), vertices[:, :, 1:].ravel()))
     edges_right = np.vstack((vertices[:, :-1].ravel(), vertices[:, 1:].ravel()))
     edges_down = np.vstack((vertices[:-1].ravel(), vertices[1:].ravel()))
     edges = np.hstack((edges_deep, edges_right, edges_down))
@@ -331,8 +329,7 @@ def _extract_patches(arr, patch_shape=8, extraction_step=1):
     indexing_strides = arr[slices].strides
 
     patch_indices_shape = (
-        (np.array(arr.shape) - np.array(patch_shape))
-        // np.array(extraction_step)
+        (np.array(arr.shape) - np.array(patch_shape)) // np.array(extraction_step)
     ) + 1
 
     shape = tuple(list(patch_indices_shape) + list(patch_shape))
@@ -354,9 +351,7 @@ def _extract_patches(arr, patch_shape=8, extraction_step=1):
         "random_state": ["random_state"],
     }
 )
-def extract_patches_2d(
-    image, patch_size, *, max_patches=None, random_state=None
-):
+def extract_patches_2d(image, patch_size, *, max_patches=None, random_state=None):
     """Reshape a 2D image into a collection of patches.
 
     The resulting patches are allocated in a dedicated array.
@@ -493,9 +488,7 @@ def reconstruct_from_patches_2d(patches, image_size):
         for j in range(i_w):
             # divide by the amount of overlap
             # XXX: is this the most efficient way? memory-wise yes, cpu wise?
-            img[i, j] /= float(
-                min(i + 1, p_h, i_h - i) * min(j + 1, p_w, i_w - j)
-            )
+            img[i, j] /= float(min(i + 1, p_h, i_h - i) * min(j + 1, p_w, i_w - j))
     return img
 
 

From fe05a308ae609b8a1a6def51f99d3de706fba048 Mon Sep 17 00:00:00 2001
From: Peter Piontek <piontek0@gmail.com>
Date: Wed, 29 Mar 2023 11:59:30 +0200
Subject: [PATCH 4/4] add changes suggested in review

---
 sklearn/feature_extraction/image.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py
index c44c81f666940..c4147956a7bc8 100644
--- a/sklearn/feature_extraction/image.py
+++ b/sklearn/feature_extraction/image.py
@@ -503,8 +503,8 @@ class PatchExtractor(TransformerMixin, BaseEstimator):
     ----------
     patch_size : tuple of int (patch_height, patch_width), default=None
         The dimensions of one patch. If set to None, the patch size will be
-        automatically set to (img_height // 10, img_width // 10), where
-        img_height and img_width are the dimensions of the input images.
+        automatically set to `(img_height // 10, img_width // 10)`, where
+        `img_height` and `img_width` are the dimensions of the input images.
 
     max_patches : int or float, default=None
         The maximum number of patches per image to extract. If `max_patches` is
@@ -514,7 +514,7 @@ class PatchExtractor(TransformerMixin, BaseEstimator):
     random_state : int, RandomState instance, default=None
         Determines the random number generator used for random sampling when
         `max_patches is not None`. Use an int to make the randomness
-        deterministic. If set to None, use the RandomState from np.random.
+        deterministic.
         See :term:`Glossary <random_state>`.
 
     See Also