scikit-learn · glemaitre · Oct 23, 2023 · Sep 28, 2023 · Oct 13, 2023 · Oct 16, 2023
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -70,8 +70,6 @@ jobs:
           - os: windows-latest
             python: 312
             platform_id: win_amd64
-            # TODO: remove when Python 3.12 is released
-            prerelease: "True"
 
           # Linux 64 bit manylinux2014
           - os: ubuntu-latest
@@ -97,8 +95,6 @@ jobs:
             python: 312
             platform_id: manylinux_x86_64
             manylinux_image: manylinux2014
-            # TODO: remove when Python 3.12 is released
-            prerelease: "True"
 
           # MacOS x86_64
           - os: macos-latest
@@ -116,8 +112,6 @@ jobs:
           - os: macos-latest
             python: 312
             platform_id: macosx_x86_64
-            # TODO: remove when Python 3.12 is released
-            prerelease: "True"
 
           # MacOS arm64
           # The wheel for the latest Python version is built and tested on

diff --git a/build_tools/cirrus/arm_wheel.yml b/build_tools/cirrus/arm_wheel.yml
@@ -25,8 +25,6 @@ macos_arm64_wheel_task:
     # is actually tested on Cirrus CI.
     - env:
         CIBW_BUILD: cp312-macosx_arm64
-        # TODO: remove when Python 3.12 is released
-        CIBW_PRERELEASE_PYTHONS: True
 
   conda_script:
     - curl -L --retry 10 -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
@@ -78,8 +76,6 @@ linux_arm64_wheel_task:
         CIBW_TEST_SKIP: "*_aarch64"
     - env:
         CIBW_BUILD: cp312-manylinux_aarch64
-        # TODO: remove when Python 3.12 is released
-        CIBW_PRERELEASE_PYTHONS: True
 
   cibuildwheel_script:
     - apt install -y python3 python-is-python3

diff --git a/doc/templates/index.html b/doc/templates/index.html
@@ -169,7 +169,9 @@ <h4 class="sk-landing-call-header">News</h4>
         <li><strong>On-going development:</strong>
         <a href="https://scikit-learn.org/dev/whats_new.html"><strong>What's new</strong> (Changelog)</a>
         </li>
-	<li><strong>September 2023.</strong> scikit-learn 1.3.1 is available for download (<a href="whats_new/v1.3.html#version-1-3-1">Changelog</a>).
+        <li><strong>October 2023.</strong> scikit-learn 1.3.2 is available for download (<a href="whats_new/v1.3.html#version-1-3-2">Changelog</a>).
+        </li>
+	      <li><strong>September 2023.</strong> scikit-learn 1.3.1 is available for download (<a href="whats_new/v1.3.html#version-1-3-1">Changelog</a>).
         </li>
         <li><strong>June 2023.</strong> scikit-learn 1.3.0 is available for download (<a href="whats_new/v1.3.html#version-1-3-0">Changelog</a>).
         </li>

diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -2,6 +2,47 @@
 
 .. currentmodule:: sklearn
 
+.. _changes_1_3_2:
+
+Version 1.3.2
+=============
+
+**October 2023**
+
+Changelog
+---------
+
+:mod:`sklearn.datasets`
+.......................
+
+- |Fix| All dataset fetchers now accept `data_home` as any object that implements
+  the :class:`os.PathLike` interface, for instance, :class:`pathlib.Path`.
+  :pr:`27468` by :user:`Yao Xiao <Charlie-XIAO>`.
+
+:mod:`sklearn.decomposition`
+............................
+
+- |Fix| Fixes a bug in :class:`decomposition.KernelPCA` by forcing the output of
+  the internal :class:`preprocessing.KernelCenterer` to be a default array. When the
+  arpack solver is used, it expects an array with a `dtype` attribute.
+  :pr:`27583` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.metrics`
+......................
+
+- |Fix| Fixes a bug for metrics using `zero_division=np.nan`
+  (e.g. :func:`~metrics.precision_score`) within a paralell loop
+  (e.g. :func:`~model_selection.cross_val_score`) where the singleton for `np.nan`
+  will be different in the sub-processes.
+  :pr:`27573` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.tree`
+...................
+
+- |Fix| Do not leak data via non-initialized memory in decision tree pickle files and make
+  the generation of those files deterministic. :pr:`27580` by :user:`Loïc Estève <lesteve>`.
+
+
 .. _changes_1_3_1:
 
 Version 1.3.1

diff --git a/sklearn/__init__.py b/sklearn/__init__.py
@@ -38,7 +38,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = "1.3.1"
+__version__ = "1.3.2"
 
 
 # On OSX, we can get a runtime error due to multiple OpenMP libraries loaded

diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py
@@ -24,7 +24,12 @@
 )
 
 
-def test_graphical_lasso(random_state=0):
+def test_graphical_lassos(random_state=1):
+    """Test the graphical lasso solvers.
+
+    This checks is unstable for some random seeds where the covariance found with "cd"
+    and "lars" solvers are different (4 cases / 100 tries).
+    """
     # Sample data from a sparse multivariate normal
     dim = 20
     n_samples = 100
@@ -46,10 +51,11 @@ def test_graphical_lasso(random_state=0):
             costs, dual_gap = np.array(costs).T
             # Check that the costs always decrease (doesn't hold if alpha == 0)
             if not alpha == 0:
-                assert_array_less(np.diff(costs), 0)
+                # use 1e-12 since the cost can be exactly 0
+                assert_array_less(np.diff(costs), 1e-12)
         # Check that the 2 approaches give similar results
-        assert_array_almost_equal(covs["cd"], covs["lars"], decimal=4)
-        assert_array_almost_equal(icovs["cd"], icovs["lars"], decimal=4)
+        assert_allclose(covs["cd"], covs["lars"], atol=1e-4)
+        assert_allclose(icovs["cd"], icovs["lars"], atol=1e-4)
 
     # Smoke test the estimator
     model = GraphicalLasso(alpha=0.25).fit(X)

diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
@@ -57,7 +57,7 @@ def get_data_home(data_home=None) -> str:
     ----------
     data_home : str or path-like, default=None
         The path to scikit-learn data directory. If `None`, the default path
-        is `~/sklearn_learn_data`.
+        is `~/scikit_learn_data`.
 
     Returns
     -------
@@ -84,7 +84,7 @@ def clear_data_home(data_home=None):
     ----------
     data_home : str or path-like, default=None
         The path to scikit-learn data directory. If `None`, the default path
-        is `~/sklearn_learn_data`.
+        is `~/scikit_learn_data`.
     """
     data_home = get_data_home(data_home)
     shutil.rmtree(data_home)

diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py
@@ -23,7 +23,7 @@
 
 import logging
 import tarfile
-from os import makedirs, remove
+from os import PathLike, makedirs, remove
 from os.path import exists
 
 import joblib
@@ -53,7 +53,7 @@
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "download_if_missing": ["boolean"],
         "return_X_y": ["boolean"],
         "as_frame": ["boolean"],
@@ -76,7 +76,7 @@ def fetch_california_housing(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
@@ -65,7 +65,7 @@
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, os.PathLike, None],
         "download_if_missing": ["boolean"],
         "random_state": ["random_state"],
         "shuffle": ["boolean"],
@@ -98,7 +98,7 @@ def fetch_covtype(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 

diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py
@@ -50,7 +50,7 @@
 @validate_params(
     {
         "subset": [StrOptions({"SA", "SF", "http", "smtp"}), None],
-        "data_home": [str, None],
+        "data_home": [str, os.PathLike, None],
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
         "percent10": ["boolean"],
@@ -92,7 +92,7 @@ def fetch_kddcup99(
         To return the corresponding classical subsets of kddcup 99.
         If None, return the entire kddcup 99 dataset.
 
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 

diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py
@@ -10,7 +10,7 @@
 
 import logging
 from numbers import Integral, Real
-from os import listdir, makedirs, remove
+from os import PathLike, listdir, makedirs, remove
 from os.path import exists, isdir, join
 
 import numpy as np
@@ -234,7 +234,7 @@ def _fetch_lfw_people(
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "funneled": ["boolean"],
         "resize": [Interval(Real, 0, None, closed="neither"), None],
         "min_faces_per_person": [Interval(Integral, 0, None, closed="left"), None],
@@ -272,7 +272,7 @@ def fetch_lfw_people(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
@@ -431,7 +431,7 @@ def _fetch_lfw_pairs(
 @validate_params(
     {
         "subset": [StrOptions({"train", "test", "10_folds"})],
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "funneled": ["boolean"],
         "resize": [Interval(Real, 0, None, closed="neither"), None],
         "color": ["boolean"],
@@ -480,7 +480,7 @@ def fetch_lfw_pairs(
         official evaluation set that is meant to be used with a 10-folds
         cross validation.
 
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By
         default all scikit-learn data is stored in '~/scikit_learn_data'
         subfolders.

diff --git a/sklearn/datasets/_olivetti_faces.py b/sklearn/datasets/_olivetti_faces.py
@@ -13,7 +13,7 @@
 # Copyright (c) 2011 David Warde-Farley <wardefar at iro dot umontreal dot ca>
 # License: BSD 3 clause
 
-from os import makedirs, remove
+from os import PathLike, makedirs, remove
 from os.path import exists
 
 import joblib
@@ -36,7 +36,7 @@
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
         "download_if_missing": ["boolean"],
@@ -67,7 +67,7 @@ def fetch_olivetti_faces(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 

diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py
@@ -749,7 +749,7 @@ def _valid_data_column_names(features_list, target_columns):
         "name": [str, None],
         "version": [Interval(Integral, 1, None, closed="left"), StrOptions({"active"})],
         "data_id": [Interval(Integral, 1, None, closed="left"), None],
-        "data_home": [str, None],
+        "data_home": [str, os.PathLike, None],
         "target_column": [str, list, None],
         "cache": [bool],
         "return_X_y": [bool],
@@ -769,7 +769,7 @@ def fetch_openml(
     *,
     version: Union[str, int] = "active",
     data_id: Optional[int] = None,
-    data_home: Optional[str] = None,
+    data_home: Optional[Union[str, os.PathLike]] = None,
     target_column: Optional[Union[str, List]] = "default-target",
     cache: bool = True,
     return_X_y: bool = False,
@@ -815,7 +815,7 @@ def fetch_openml(
         dataset. If data_id is not given, name (and potential version) are
         used to obtain a dataset.
 
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the data sets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 

diff --git a/sklearn/datasets/_rcv1.py b/sklearn/datasets/_rcv1.py
@@ -10,7 +10,7 @@
 
 import logging
 from gzip import GzipFile
-from os import makedirs, remove
+from os import PathLike, makedirs, remove
 from os.path import exists, join
 
 import joblib
@@ -74,7 +74,7 @@
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "subset": [StrOptions({"train", "test", "all"})],
         "download_if_missing": ["boolean"],
         "random_state": ["random_state"],
@@ -111,7 +111,7 @@ def fetch_rcv1(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 

diff --git a/sklearn/datasets/_species_distributions.py b/sklearn/datasets/_species_distributions.py
@@ -39,7 +39,7 @@
 
 import logging
 from io import BytesIO
-from os import makedirs, remove
+from os import PathLike, makedirs, remove
 from os.path import exists
 
 import joblib
@@ -136,7 +136,7 @@ def construct_grids(batch):
 
 
 @validate_params(
-    {"data_home": [str, None], "download_if_missing": ["boolean"]},
+    {"data_home": [str, PathLike, None], "download_if_missing": ["boolean"]},
     prefer_skip_nested_validation=True,
 )
 def fetch_species_distributions(*, data_home=None, download_if_missing=True):
@@ -146,7 +146,7 @@ def fetch_species_distributions(*, data_home=None, download_if_missing=True):
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.