From f05d7c4ca5f2afb2dfefc7a1255610ae8dc199f7 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 18 Jun 2024 20:37:49 +0200
Subject: [PATCH 01/17] MNT enable ruff-copyright-check

---
 pyproject.toml | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index ff7df45c1d843..4f2a33a12b38f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -140,8 +140,10 @@ exclude=[
 ]
 
 [tool.ruff.lint]
+# This enables us to use CPY001: copyright header check
+preview = true
 # all rules can be found here: https://beta.ruff.rs/docs/rules/
-select = ["E", "F", "W", "I"]
+select = ["E", "F", "W", "I", "CPY001"]
 ignore=[
     # space before : (needed for how black formats slicing)
     "E203",
@@ -151,11 +153,21 @@ ignore=[
     "E741",
 ]
 
+[tool.ruff.lint.flake8-copyright]
+notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\-License\\-Identifier:\\ BSD\\-3\\-Clause"
+
 [tool.ruff.lint.per-file-ignores]
 # It's fine not to put the import at the top of the file in the examples
 # folder.
 "examples/*"=["E402"]
 "doc/conf.py"=["E402"]
+"**/tests/*"=["CPY001"]
+"asv_benchmarks/*"=["CPY001"]
+"benchmarks/*"=["CPY001"]
+"doc/*"=["CPY001"]
+"build_tools/*"=["CPY001"]
+"maint_tools/*"=["CPY001"]
+".github/*"=["CPY001"]
 
 
 [tool.cython-lint]

From ed17f9664dbe843e7dc90292c986599ce93927a9 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 18 Jun 2024 21:05:49 +0200
Subject: [PATCH 02/17] add missing notes

---
 .../plot_cyclical_feature_engineering.py      |   3 +
 .../applications/plot_digits_denoising.py     |   3 +
 .../applications/plot_face_recognition.py     |   3 +
 .../plot_time_series_lagged_features.py       | 853 +++++++++---------
 .../bicluster/plot_bicluster_newsgroups.py    |   3 +
 .../plot_classifier_comparison.py             |   3 +
 examples/classification/plot_lda.py           |   3 +
 examples/classification/plot_lda_qda.py       |   3 +
 examples/cluster/plot_affinity_propagation.py |   3 +
 examples/cluster/plot_bisect_kmeans.py        |   3 +
 examples/cluster/plot_cluster_comparison.py   |   3 +
 examples/cluster/plot_cluster_iris.py         |   3 +
 examples/cluster/plot_dbscan.py               |   3 +
 examples/cluster/plot_dict_face_patches.py    |   3 +
 examples/cluster/plot_digits_agglomeration.py |   3 +
 examples/cluster/plot_hdbscan.py              |   3 +
 examples/cluster/plot_inductive_clustering.py |   3 +
 examples/cluster/plot_kmeans_digits.py        |   3 +
 examples/cluster/plot_kmeans_plusplus.py      |   3 +
 .../plot_kmeans_silhouette_analysis.py        |   3 +
 examples/cluster/plot_linkage_comparison.py   |   3 +
 examples/cluster/plot_mean_shift.py           |   3 +
 examples/cluster/plot_mini_batch_kmeans.py    |   3 +
 examples/compose/plot_compare_reduction.py    |   3 +
 examples/compose/plot_digits_pipe.py          |   3 +
 .../covariance/plot_covariance_estimation.py  |   3 +
 examples/covariance/plot_lw_vs_oas.py         |   3 +
 .../covariance/plot_mahalanobis_distances.py  |   3 +
 .../plot_robust_vs_empirical_covariance.py    |   3 +
 .../plot_compare_cross_decomposition.py       |   3 +
 .../cross_decomposition/plot_pcr_vs_pls.py    |   3 +
 examples/datasets/plot_digits_last_image.py   |   3 +
 examples/datasets/plot_iris_dataset.py        |   3 +
 examples/datasets/plot_random_dataset.py      |   3 +
 .../plot_random_multilabel_dataset.py         |   3 +
 .../decomposition/plot_faces_decomposition.py |   3 +
 .../plot_ica_blind_source_separation.py       |   3 +
 .../decomposition/plot_image_denoising.py     |   3 +
 examples/decomposition/plot_pca_iris.py       |   3 +
 examples/decomposition/plot_pca_vs_lda.py     |   3 +
 examples/decomposition/plot_sparse_coding.py  |   3 +
 examples/ensemble/plot_adaboost_multiclass.py |   3 +
 examples/ensemble/plot_forest_importances.py  |   3 +
 .../ensemble/plot_forest_importances_faces.py |   3 +
 examples/ensemble/plot_forest_iris.py         |   3 +
 .../plot_gradient_boosting_categorical.py     |   3 +
 .../plot_gradient_boosting_early_stopping.py  |   3 +
 .../plot_gradient_boosting_quantile.py        |   3 +
 examples/ensemble/plot_isolation_forest.py    |   3 +
 .../ensemble/plot_monotonic_constraints.py    |   3 +
 .../ensemble/plot_random_forest_embedding.py  |   3 +
 .../ensemble/plot_voting_decision_regions.py  |   3 +
 examples/ensemble/plot_voting_probas.py       |   3 +
 examples/ensemble/plot_voting_regressor.py    |   3 +
 examples/exercises/plot_cv_diabetes.py        |   3 +
 .../plot_digits_classification_exercise.py    |   3 +
 examples/exercises/plot_iris_exercise.py      |   3 +
 .../feature_selection/plot_f_test_vs_mi.py    |   3 +
 .../plot_feature_selection.py                 |   3 +
 .../plot_feature_selection_pipeline.py        |   3 +
 examples/feature_selection/plot_rfe_digits.py |   3 +
 .../plot_rfe_with_cross_validation.py         |   3 +
 .../plot_select_from_model_diabetes.py        |   3 +
 examples/gaussian_process/plot_gpc_iris.py    |   3 +
 .../plot_gpr_on_structured_data.py            |   3 +
 ...t_iterative_imputer_variants_comparison.py |   3 +
 .../inspection/plot_causal_interpretation.py  |   3 +
 ...linear_model_coefficient_interpretation.py |   3 +
 .../inspection/plot_partial_dependence.py     |   3 +
 .../inspection/plot_permutation_importance.py |   3 +
 ...t_permutation_importance_multicollinear.py |   3 +
 examples/linear_model/plot_ard.py             |   3 +
 .../plot_bayesian_ridge_curvefit.py           |   3 +
 ...puted_gram_matrix_with_weighted_samples.py |   3 +
 examples/linear_model/plot_iris_logistic.py   |   3 +
 .../linear_model/plot_lasso_and_elasticnet.py |   3 +
 .../plot_lasso_dense_vs_sparse_data.py        |   3 +
 examples/linear_model/plot_logistic.py        |   3 +
 examples/linear_model/plot_nnls.py            |   3 +
 examples/linear_model/plot_ols.py             |   3 +
 examples/linear_model/plot_ols_3d.py          |   3 +
 .../linear_model/plot_ols_ridge_variance.py   |   3 +
 examples/linear_model/plot_omp.py             |   3 +
 examples/linear_model/plot_ransac.py          |   3 +
 examples/linear_model/plot_ridge_coeffs.py    |   3 +
 examples/linear_model/plot_robust_fit.py      |   3 +
 examples/linear_model/plot_sgd_iris.py        |   3 +
 .../linear_model/plot_sgd_loss_functions.py   |   3 +
 examples/linear_model/plot_sgd_penalties.py   |   3 +
 .../plot_sgd_separating_hyperplane.py         |   3 +
 .../linear_model/plot_sgd_weighted_samples.py |   3 +
 .../linear_model/plot_sgdocsvm_vs_ocsvm.py    |   3 +
 ...sparse_logistic_regression_20newsgroups.py |   3 +
 examples/manifold/plot_compare_methods.py     |   3 +
 examples/manifold/plot_swissroll.py           |   3 +
 .../plot_display_object_visualization.py      |   3 +
 .../plot_estimator_representation.py          |   3 +
 .../plot_johnson_lindenstrauss_bound.py       |   3 +
 .../miscellaneous/plot_metadata_routing.py    |   3 +
 .../plot_multioutput_face_completion.py       |   3 +
 ...ot_partial_dependence_visualization_api.py |   3 +
 .../miscellaneous/plot_pipeline_display.py    |   3 +
 .../plot_roc_curve_visualization_api.py       |   3 +
 examples/mixture/plot_gmm.py                  |   3 +
 examples/mixture/plot_gmm_init.py             |   3 +
 examples/mixture/plot_gmm_pdf.py              |   3 +
 examples/mixture/plot_gmm_selection.py        |   3 +
 examples/mixture/plot_gmm_sin.py              |   3 +
 .../model_selection/plot_confusion_matrix.py  |   3 +
 .../plot_cost_sensitive_learning.py           |   3 +
 examples/model_selection/plot_cv_indices.py   |   3 +
 examples/model_selection/plot_cv_predict.py   |   3 +
 examples/model_selection/plot_det.py          |   3 +
 .../plot_grid_search_digits.py                |   3 +
 .../plot_grid_search_refit_callable.py        |   3 +
 .../model_selection/plot_grid_search_stats.py |   3 +
 .../model_selection/plot_learning_curve.py    |   3 +
 .../model_selection/plot_likelihood_ratios.py |   3 +
 .../plot_nested_cross_validation_iris.py      |   3 +
 .../model_selection/plot_precision_recall.py  |   3 +
 .../model_selection/plot_randomized_search.py |   3 +
 examples/model_selection/plot_roc.py          |   3 +
 examples/model_selection/plot_roc_crossval.py |   3 +
 .../plot_successive_halving_heatmap.py        |   3 +
 .../plot_successive_halving_iterations.py     |   3 +
 .../plot_tuned_decision_threshold.py          |   3 +
 .../plot_underfitting_overfitting.py          |   3 +
 .../model_selection/plot_validation_curve.py  |   3 +
 .../multiclass/plot_multiclass_overview.py    |   3 +
 examples/neighbors/plot_classification.py     |   3 +
 .../neighbors/plot_digits_kde_sampling.py     |   3 +
 examples/neighbors/plot_kde_1d.py             |   3 +
 .../neighbors/plot_lof_novelty_detection.py   |   3 +
 .../neighbors/plot_lof_outlier_detection.py   |   3 +
 examples/neighbors/plot_nca_classification.py |   3 +
 examples/neighbors/plot_nca_dim_reduction.py  |   3 +
 examples/neighbors/plot_nca_illustration.py   |   3 +
 examples/neighbors/plot_nearest_centroid.py   |   3 +
 .../plot_mlp_training_curves.py               |   3 +
 .../neural_networks/plot_mnist_filters.py     |   3 +
 .../plot_discretization_classification.py     |   3 +
 examples/preprocessing/plot_target_encoder.py |   3 +
 .../plot_target_encoder_cross_val.py          |   3 +
 .../plot_release_highlights_0_22_0.py         |   3 +
 .../plot_semi_supervised_newsgroups.py        |   3 +
 examples/svm/plot_custom_kernel.py            |   3 +
 examples/svm/plot_iris_svc.py                 |   3 +
 .../svm/plot_linearsvc_support_vectors.py     |   3 +
 examples/svm/plot_oneclass.py                 |   3 +
 examples/svm/plot_rbf_parameters.py           |   3 +
 examples/svm/plot_separating_hyperplane.py    |   3 +
 .../plot_separating_hyperplane_unbalanced.py  |   3 +
 examples/svm/plot_svm_anova.py                |   3 +
 examples/svm/plot_svm_kernels.py              |   3 +
 examples/svm/plot_svm_margin.py               |   3 +
 examples/svm/plot_svm_regression.py           |   3 +
 examples/svm/plot_svm_tie_breaking.py         |   3 +
 examples/svm/plot_weighted_samples.py         |   3 +
 examples/tree/plot_cost_complexity_pruning.py |   3 +
 examples/tree/plot_iris_dtc.py                |   3 +
 examples/tree/plot_tree_regression.py         |   3 +
 .../tree/plot_tree_regression_multioutput.py  |   3 +
 examples/tree/plot_unveil_tree_structure.py   |   3 +
 setup.py                                      |   2 +
 sklearn/__check_build/__init__.py             |   3 +
 sklearn/__init__.py                           |   3 +
 sklearn/_build_utils/__init__.py              |   3 +
 sklearn/_build_utils/openmp_helpers.py        |   3 +
 sklearn/_build_utils/pre_build_helpers.py     |   3 +
 sklearn/_build_utils/tempita.py               |   2 +
 sklearn/_build_utils/version.py               |   3 +
 sklearn/_config.py                            |   3 +
 sklearn/_distributor_init.py                  |   3 +
 sklearn/_loss/__init__.py                     |   3 +
 sklearn/_loss/link.py                         |   3 +
 sklearn/_loss/loss.py                         |   3 +
 sklearn/_min_dependencies.py                  |   3 +
 sklearn/cluster/__init__.py                   |   3 +
 sklearn/cluster/_agglomerative.py             |   3 +
 sklearn/cluster/_bisect_k_means.py            |   3 +
 sklearn/cluster/_hdbscan/__init__.py          |   2 +
 sklearn/cluster/_hdbscan/hdbscan.py           |   3 +
 sklearn/cluster/_mean_shift.py                |   3 +
 sklearn/cluster/_optics.py                    |   3 +
 sklearn/compose/__init__.py                   |   3 +
 sklearn/conftest.py                           |   2 +
 sklearn/covariance/__init__.py                |   3 +
 sklearn/cross_decomposition/__init__.py       |   3 +
 sklearn/datasets/__init__.py                  |   3 +
 sklearn/datasets/_arff_parser.py              |   3 +
 sklearn/datasets/_kddcup99.py                 |   3 +
 sklearn/datasets/_openml.py                   |   2 +
 sklearn/datasets/data/__init__.py             |   2 +
 sklearn/datasets/descr/__init__.py            |   2 +
 sklearn/datasets/images/__init__.py           |   2 +
 sklearn/decomposition/__init__.py             |   3 +
 sklearn/decomposition/_lda.py                 |   3 +
 sklearn/decomposition/_truncated_svd.py       |   3 +
 sklearn/ensemble/__init__.py                  |   3 +
 .../_hist_gradient_boosting/__init__.py       |   3 +
 .../_hist_gradient_boosting/binning.py        |   3 +
 .../gradient_boosting.py                      |   3 +
 .../_hist_gradient_boosting/grower.py         |   3 +
 .../_hist_gradient_boosting/predictor.py      |   3 +
 .../ensemble/_hist_gradient_boosting/utils.py |   3 +
 sklearn/exceptions.py                         |   3 +
 sklearn/experimental/__init__.py              |   3 +
 .../experimental/enable_halving_search_cv.py  |   3 +
 .../enable_hist_gradient_boosting.py          |   3 +
 .../experimental/enable_iterative_imputer.py  |   3 +
 sklearn/feature_extraction/__init__.py        |   3 +
 sklearn/feature_extraction/_stop_words.py     |   2 +
 sklearn/feature_selection/__init__.py         |   3 +
 sklearn/feature_selection/_mutual_info.py     |   2 +
 sklearn/feature_selection/_sequential.py      |   3 +
 .../feature_selection/_variance_threshold.py  |   2 +
 sklearn/impute/__init__.py                    |   3 +
 sklearn/impute/_iterative.py                  |   2 +
 sklearn/inspection/__init__.py                |   3 +
 sklearn/inspection/_pd_utils.py               |   2 +
 sklearn/inspection/_permutation_importance.py |   3 +
 sklearn/inspection/_plot/__init__.py          |   2 +
 sklearn/inspection/_plot/decision_boundary.py |   2 +
 .../inspection/_plot/partial_dependence.py    |   2 +
 sklearn/linear_model/__init__.py              |   3 +
 sklearn/linear_model/_linear_loss.py          |   3 +
 sklearn/linear_model/_logistic.py             |   3 +
 sklearn/manifold/__init__.py                  |   3 +
 sklearn/manifold/_mds.py                      |   3 +
 sklearn/metrics/__init__.py                   |   3 +
 .../_pairwise_distances_reduction/__init__.py |   2 +
 .../_dispatcher.py                            |   2 +
 sklearn/metrics/_plot/__init__.py             |   2 +
 sklearn/metrics/_plot/confusion_matrix.py     |   2 +
 sklearn/metrics/_plot/det_curve.py            |   2 +
 .../metrics/_plot/precision_recall_curve.py   |   2 +
 sklearn/metrics/_plot/regression.py           |   2 +
 sklearn/metrics/_plot/roc_curve.py            |   2 +
 sklearn/metrics/_scorer.py                    |   3 +
 sklearn/metrics/cluster/__init__.py           |   3 +
 sklearn/metrics/cluster/_bicluster.py         |   2 +
 sklearn/mixture/__init__.py                   |   3 +
 sklearn/model_selection/__init__.py           |   3 +
 .../_classification_threshold.py              |   2 +
 sklearn/model_selection/_plot.py              |   2 +
 .../_search_successive_halving.py             |   2 +
 sklearn/neighbors/__init__.py                 |   3 +
 sklearn/neighbors/_kde.py                     |   3 +
 sklearn/neighbors/_unsupervised.py            |   3 +
 sklearn/neural_network/__init__.py            |   3 +
 sklearn/preprocessing/__init__.py             |   3 +
 .../preprocessing/_function_transformer.py    |   2 +
 sklearn/preprocessing/_polynomial.py          |   3 +
 sklearn/preprocessing/_target_encoder.py      |   2 +
 sklearn/semi_supervised/__init__.py           |   3 +
 sklearn/svm/_base.py                          |   2 +
 sklearn/svm/_classes.py                       |   2 +
 sklearn/tree/__init__.py                      |   3 +
 sklearn/utils/__init__.py                     |   3 +
 sklearn/utils/_arpack.py                      |   2 +
 sklearn/utils/_array_api.py                   |   3 +
 sklearn/utils/_available_if.py                |   2 +
 sklearn/utils/_bunch.py                       |   2 +
 sklearn/utils/_chunking.py                    |   2 +
 sklearn/utils/_encode.py                      |   2 +
 sklearn/utils/_estimator_html_repr.py         |   2 +
 sklearn/utils/_indexing.py                    |   2 +
 sklearn/utils/_joblib.py                      |   2 +
 sklearn/utils/_mask.py                        |   2 +
 sklearn/utils/_missing.py                     |   2 +
 sklearn/utils/_mocking.py                     |   2 +
 sklearn/utils/_optional_dependencies.py       |   2 +
 sklearn/utils/_param_validation.py            |   2 +
 sklearn/utils/_plotting.py                    |   2 +
 sklearn/utils/_pprint.py                      |   3 +
 sklearn/utils/_response.py                    |   3 +
 sklearn/utils/_set_output.py                  |   2 +
 sklearn/utils/_show_versions.py               |   3 +
 sklearn/utils/_tags.py                        |   2 +
 sklearn/utils/_user_interface.py              |   2 +
 sklearn/utils/deprecation.py                  |   2 +
 sklearn/utils/discovery.py                    |   3 +
 sklearn/utils/estimator_checks.py             |   3 +
 sklearn/utils/optimize.py                     |   3 +
 sklearn/utils/parallel.py                     |   3 +
 sklearn/utils/stats.py                        |   2 +
 286 files changed, 1232 insertions(+), 425 deletions(-)

diff --git a/examples/applications/plot_cyclical_feature_engineering.py b/examples/applications/plot_cyclical_feature_engineering.py
index a23e98d331dc0..f7c561da48f8b 100644
--- a/examples/applications/plot_cyclical_feature_engineering.py
+++ b/examples/applications/plot_cyclical_feature_engineering.py
@@ -13,6 +13,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Data exploration on the Bike Sharing Demand dataset
 # ---------------------------------------------------
diff --git a/examples/applications/plot_digits_denoising.py b/examples/applications/plot_digits_denoising.py
index 9d4b0cae79d06..73b5a8034f8d6 100644
--- a/examples/applications/plot_digits_denoising.py
+++ b/examples/applications/plot_digits_denoising.py
@@ -21,6 +21,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Authors: Guillaume Lemaitre <guillaume.lemaitre@inria.fr>
 # Licence: BSD 3 clause
 
diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py
index 97a67fad52776..4ace1b81ef7a0 100644
--- a/examples/applications/plot_face_recognition.py
+++ b/examples/applications/plot_face_recognition.py
@@ -12,6 +12,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 from time import time
 
diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py
index 9159825cbbd43..53b29fa940958 100644
--- a/examples/applications/plot_time_series_lagged_features.py
+++ b/examples/applications/plot_time_series_lagged_features.py
@@ -1,425 +1,428 @@
-"""
-===========================================
-Lagged features for time series forecasting
-===========================================
-
-This example demonstrates how Polars-engineered lagged features can be used
-for time series forecasting with
-:class:`~sklearn.ensemble.HistGradientBoostingRegressor` on the Bike Sharing
-Demand dataset.
-
-See the example on
-:ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`
-for some data exploration on this dataset and a demo on periodic feature
-engineering.
-
-"""
-
-# %%
-# Analyzing the Bike Sharing Demand dataset
-# -----------------------------------------
-#
-# We start by loading the data from the OpenML repository
-# as a pandas dataframe. This will be replaced with Polars
-# once `fetch_openml` adds a native support for it.
-# We convert to Polars for feature engineering, as it automatically caches
-# common subexpressions which are reused in multiple expressions
-# (like `pl.col("count").shift(1)` below). See
-# https://docs.pola.rs/user-guide/lazy/optimizations/ for more information.
-
-import numpy as np
-import polars as pl
-
-from sklearn.datasets import fetch_openml
-
-pl.Config.set_fmt_str_lengths(20)
-
-bike_sharing = fetch_openml(
-    "Bike_Sharing_Demand", version=2, as_frame=True, parser="pandas"
-)
-df = bike_sharing.frame
-df = pl.DataFrame({col: df[col].to_numpy() for col in df.columns})
-
-# %%
-# Next, we take a look at the statistical summary of the dataset
-# so that we can better understand the data that we are working with.
-import polars.selectors as cs
-
-summary = df.select(cs.numeric()).describe()
-summary
-
-# %%
-# Let us look at the count of the seasons `"fall"`, `"spring"`, `"summer"`
-# and `"winter"` present in the dataset to confirm they are balanced.
-
-import matplotlib.pyplot as plt
-
-df["season"].value_counts()
-
-
-# %%
-# Generating Polars-engineered lagged features
-# --------------------------------------------
-# Let's consider the problem of predicting the demand at the
-# next hour given past demands. Since the demand is a continuous
-# variable, one could intuitively use any regression model. However, we do
-# not have the usual `(X_train, y_train)` dataset. Instead, we just have
-# the `y_train` demand data sequentially organized by time.
-lagged_df = df.select(
-    "count",
-    *[pl.col("count").shift(i).alias(f"lagged_count_{i}h") for i in [1, 2, 3]],
-    lagged_count_1d=pl.col("count").shift(24),
-    lagged_count_1d_1h=pl.col("count").shift(24 + 1),
-    lagged_count_7d=pl.col("count").shift(7 * 24),
-    lagged_count_7d_1h=pl.col("count").shift(7 * 24 + 1),
-    lagged_mean_24h=pl.col("count").shift(1).rolling_mean(24),
-    lagged_max_24h=pl.col("count").shift(1).rolling_max(24),
-    lagged_min_24h=pl.col("count").shift(1).rolling_min(24),
-    lagged_mean_7d=pl.col("count").shift(1).rolling_mean(7 * 24),
-    lagged_max_7d=pl.col("count").shift(1).rolling_max(7 * 24),
-    lagged_min_7d=pl.col("count").shift(1).rolling_min(7 * 24),
-)
-lagged_df.tail(10)
-
-# %%
-# Watch out however, the first lines have undefined values because their own
-# past is unknown. This depends on how much lag we used:
-lagged_df.head(10)
-
-# %%
-# We can now separate the lagged features in a matrix `X` and the target variable
-# (the counts to predict) in an array of the same first dimension `y`.
-lagged_df = lagged_df.drop_nulls()
-X = lagged_df.drop("count")
-y = lagged_df["count"]
-print("X shape: {}\ny shape: {}".format(X.shape, y.shape))
-
-# %%
-# Naive evaluation of the next hour bike demand regression
-# --------------------------------------------------------
-# Let's randomly split our tabularized dataset to train a gradient
-# boosting regression tree (GBRT) model and evaluate it using Mean
-# Absolute Percentage Error (MAPE). If our model is aimed at forecasting
-# (i.e., predicting future data from past data), we should not use training
-# data that are ulterior to the testing data. In time series machine learning
-# the "i.i.d" (independent and identically distributed) assumption does not
-# hold true as the data points are not independent and have a temporal
-# relationship.
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.model_selection import train_test_split
-
-X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=0.2, random_state=42
-)
-
-model = HistGradientBoostingRegressor().fit(X_train, y_train)
-
-# %%
-# Taking a look at the performance of the model.
-from sklearn.metrics import mean_absolute_percentage_error
-
-y_pred = model.predict(X_test)
-mean_absolute_percentage_error(y_test, y_pred)
-
-# %%
-# Proper next hour forecasting evaluation
-# ---------------------------------------
-# Let's use a proper evaluation splitting strategies that takes into account
-# the temporal structure of the dataset to evaluate our model's ability to
-# predict data points in the future (to avoid cheating by reading values from
-# the lagged features in the training set).
-from sklearn.model_selection import TimeSeriesSplit
-
-ts_cv = TimeSeriesSplit(
-    n_splits=3,  # to keep the notebook fast enough on common laptops
-    gap=48,  # 2 days data gap between train and test
-    max_train_size=10000,  # keep train sets of comparable sizes
-    test_size=3000,  # for 2 or 3 digits of precision in scores
-)
-all_splits = list(ts_cv.split(X, y))
-
-# %%
-# Training the model and evaluating its performance based on MAPE.
-train_idx, test_idx = all_splits[0]
-X_train, X_test = X[train_idx, :], X[test_idx, :]
-y_train, y_test = y[train_idx], y[test_idx]
-
-model = HistGradientBoostingRegressor().fit(X_train, y_train)
-y_pred = model.predict(X_test)
-mean_absolute_percentage_error(y_test, y_pred)
-
-# %%
-# The generalization error measured via a shuffled trained test split
-# is too optimistic. The generalization via a time-based split is likely to
-# be more representative of the true performance of the regression model.
-# Let's assess this variability of our error evaluation with proper
-# cross-validation:
-from sklearn.model_selection import cross_val_score
-
-cv_mape_scores = -cross_val_score(
-    model, X, y, cv=ts_cv, scoring="neg_mean_absolute_percentage_error"
-)
-cv_mape_scores
-
-# %%
-# The variability across splits is quite large! In a real life setting
-# it would be advised to use more splits to better assess the variability.
-# Let's report the mean CV scores and their standard deviation from now on.
-print(f"CV MAPE: {cv_mape_scores.mean():.3f} ± {cv_mape_scores.std():.3f}")
-
-# %%
-# We can compute several combinations of evaluation metrics and loss functions,
-# which are reported a bit below.
-from collections import defaultdict
-
-from sklearn.metrics import (
-    make_scorer,
-    mean_absolute_error,
-    mean_pinball_loss,
-    root_mean_squared_error,
-)
-from sklearn.model_selection import cross_validate
-
-
-def consolidate_scores(cv_results, scores, metric):
-    if metric == "MAPE":
-        scores[metric].append(f"{value.mean():.2f} ± {value.std():.2f}")
-    else:
-        scores[metric].append(f"{value.mean():.1f} ± {value.std():.1f}")
-
-    return scores
-
-
-scoring = {
-    "MAPE": make_scorer(mean_absolute_percentage_error),
-    "RMSE": make_scorer(root_mean_squared_error),
-    "MAE": make_scorer(mean_absolute_error),
-    "pinball_loss_05": make_scorer(mean_pinball_loss, alpha=0.05),
-    "pinball_loss_50": make_scorer(mean_pinball_loss, alpha=0.50),
-    "pinball_loss_95": make_scorer(mean_pinball_loss, alpha=0.95),
-}
-loss_functions = ["squared_error", "poisson", "absolute_error"]
-scores = defaultdict(list)
-for loss_func in loss_functions:
-    model = HistGradientBoostingRegressor(loss=loss_func)
-    cv_results = cross_validate(
-        model,
-        X,
-        y,
-        cv=ts_cv,
-        scoring=scoring,
-        n_jobs=2,
-    )
-    time = cv_results["fit_time"]
-    scores["loss"].append(loss_func)
-    scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
-
-    for key, value in cv_results.items():
-        if key.startswith("test_"):
-            metric = key.split("test_")[1]
-            scores = consolidate_scores(cv_results, scores, metric)
-
-
-# %%
-# Modeling predictive uncertainty via quantile regression
-# -------------------------------------------------------
-# Instead of modeling the expected value of the distribution of
-# :math:`Y|X` like the least squares and Poisson losses do, one could try to
-# estimate quantiles of the conditional distribution.
-#
-# :math:`Y|X=x_i` is expected to be a random variable for a given data point
-# :math:`x_i` because we expect that the number of rentals cannot be 100%
-# accurately predicted from the features. It can be influenced by other
-# variables not properly captured by the existing lagged features. For
-# instance whether or not it will rain in the next hour cannot be fully
-# anticipated from the past hours bike rental data. This is what we
-# call aleatoric uncertainty.
-#
-# Quantile regression makes it possible to give a finer description of that
-# distribution without making strong assumptions on its shape.
-quantile_list = [0.05, 0.5, 0.95]
-
-for quantile in quantile_list:
-    model = HistGradientBoostingRegressor(loss="quantile", quantile=quantile)
-    cv_results = cross_validate(
-        model,
-        X,
-        y,
-        cv=ts_cv,
-        scoring=scoring,
-        n_jobs=2,
-    )
-    time = cv_results["fit_time"]
-    scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
-
-    scores["loss"].append(f"quantile {int(quantile*100)}")
-    for key, value in cv_results.items():
-        if key.startswith("test_"):
-            metric = key.split("test_")[1]
-            scores = consolidate_scores(cv_results, scores, metric)
-
-scores_df = pl.DataFrame(scores)
-scores_df
-
-
-# %%
-# Let us take a look at the losses that minimise each metric.
-def min_arg(col):
-    col_split = pl.col(col).str.split(" ")
-    return pl.arg_sort_by(
-        col_split.list.get(0).cast(pl.Float64),
-        col_split.list.get(2).cast(pl.Float64),
-    ).first()
-
-
-scores_df.select(
-    pl.col("loss").get(min_arg(col_name)).alias(col_name)
-    for col_name in scores_df.columns
-    if col_name != "loss"
-)
-
-# %%
-# Even if the score distributions overlap due to the variance in the dataset,
-# it is true that the average RMSE is lower when `loss="squared_error"`, whereas
-# the average MAPE is lower when `loss="absolute_error"` as expected. That is
-# also the case for the Mean Pinball Loss with the quantiles 5 and 95. The score
-# corresponding to the 50 quantile loss is overlapping with the score obtained
-# by minimizing other loss functions, which is also the case for the MAE.
-#
-# A qualitative look at the predictions
-# -------------------------------------
-# We can now visualize the performance of the model with regards
-# to the 5th percentile, median and the 95th percentile:
-all_splits = list(ts_cv.split(X, y))
-train_idx, test_idx = all_splits[0]
-
-X_train, X_test = X[train_idx, :], X[test_idx, :]
-y_train, y_test = y[train_idx], y[test_idx]
-
-max_iter = 50
-gbrt_mean_poisson = HistGradientBoostingRegressor(loss="poisson", max_iter=max_iter)
-gbrt_mean_poisson.fit(X_train, y_train)
-mean_predictions = gbrt_mean_poisson.predict(X_test)
-
-gbrt_median = HistGradientBoostingRegressor(
-    loss="quantile", quantile=0.5, max_iter=max_iter
-)
-gbrt_median.fit(X_train, y_train)
-median_predictions = gbrt_median.predict(X_test)
-
-gbrt_percentile_5 = HistGradientBoostingRegressor(
-    loss="quantile", quantile=0.05, max_iter=max_iter
-)
-gbrt_percentile_5.fit(X_train, y_train)
-percentile_5_predictions = gbrt_percentile_5.predict(X_test)
-
-gbrt_percentile_95 = HistGradientBoostingRegressor(
-    loss="quantile", quantile=0.95, max_iter=max_iter
-)
-gbrt_percentile_95.fit(X_train, y_train)
-percentile_95_predictions = gbrt_percentile_95.predict(X_test)
-
-# %%
-# We can now take a look at the predictions made by the regression models:
-last_hours = slice(-96, None)
-fig, ax = plt.subplots(figsize=(15, 7))
-plt.title("Predictions by regression models")
-ax.plot(
-    y_test[last_hours],
-    "x-",
-    alpha=0.2,
-    label="Actual demand",
-    color="black",
-)
-ax.plot(
-    median_predictions[last_hours],
-    "^-",
-    label="GBRT median",
-)
-ax.plot(
-    mean_predictions[last_hours],
-    "x-",
-    label="GBRT mean (Poisson)",
-)
-ax.fill_between(
-    np.arange(96),
-    percentile_5_predictions[last_hours],
-    percentile_95_predictions[last_hours],
-    alpha=0.3,
-    label="GBRT 90% interval",
-)
-_ = ax.legend()
-
-# %%
-# Here it's interesting to notice that the blue area between the 5% and 95%
-# percentile estimators has a width that varies with the time of the day:
-#
-# - At night, the blue band is much narrower: the pair of models is quite
-#   certain that there will be a small number of bike rentals. And furthermore
-#   these seem correct in the sense that the actual demand stays in that blue
-#   band.
-# - During the day, the blue band is much wider: the uncertainty grows, probably
-#   because of the variability of the weather that can have a very large impact,
-#   especially on week-ends.
-# - We can also see that during week-days, the commute pattern is still visible in
-#   the 5% and 95% estimations.
-# - Finally, it is expected that 10% of the time, the actual demand does not lie
-#   between the 5% and 95% percentile estimates. On this test span, the actual
-#   demand seems to be higher, especially during the rush hours. It might reveal that
-#   our 95% percentile estimator underestimates the demand peaks. This could be be
-#   quantitatively confirmed by computing empirical coverage numbers as done in
-#   the :ref:`calibration of confidence intervals <calibration-section>`.
-#
-# Looking at the performance of non-linear regression models vs
-# the best models:
-from sklearn.metrics import PredictionErrorDisplay
-
-fig, axes = plt.subplots(ncols=3, figsize=(15, 6), sharey=True)
-fig.suptitle("Non-linear regression models")
-predictions = [
-    median_predictions,
-    percentile_5_predictions,
-    percentile_95_predictions,
-]
-labels = [
-    "Median",
-    "5th percentile",
-    "95th percentile",
-]
-for ax, pred, label in zip(axes, predictions, labels):
-    PredictionErrorDisplay.from_predictions(
-        y_true=y_test,
-        y_pred=pred,
-        kind="residual_vs_predicted",
-        scatter_kwargs={"alpha": 0.3},
-        ax=ax,
-    )
-    ax.set(xlabel="Predicted demand", ylabel="True demand")
-    ax.legend(["Best model", label])
-
-plt.show()
-
-# %%
-# Conclusion
-# ----------
-# Through this example we explored time series forecasting using lagged
-# features. We compared a naive regression (using the standardized
-# :class:`~sklearn.model_selection.train_test_split`) with a proper time
-# series evaluation strategy using
-# :class:`~sklearn.model_selection.TimeSeriesSplit`. We observed that the
-# model trained using :class:`~sklearn.model_selection.train_test_split`,
-# having a default value of `shuffle` set to `True` produced an overly
-# optimistic Mean Average Percentage Error (MAPE). The results
-# produced from the time-based split better represent the performance
-# of our time-series regression model. We also analyzed the predictive uncertainty
-# of our model via Quantile Regression. Predictions based on the 5th and
-# 95th percentile using `loss="quantile"` provide us with a quantitative estimate
-# of the uncertainty of the forecasts made by our time series regression model.
-# Uncertainty estimation can also be performed
-# using `MAPIE <https://mapie.readthedocs.io/en/latest/index.html>`_,
-# that provides an implementation based on recent work on conformal prediction
-# methods and estimates both aleatoric and epistemic uncertainty at the same time.
-# Furthermore, functionalities provided
-# by `sktime <https://www.sktime.net/en/latest/users.html>`_
-# can be used to extend scikit-learn estimators by making use of recursive time
-# series forecasting, that enables dynamic predictions of future values.
+"""
+===========================================
+Lagged features for time series forecasting
+===========================================
+
+This example demonstrates how Polars-engineered lagged features can be used
+for time series forecasting with
+:class:`~sklearn.ensemble.HistGradientBoostingRegressor` on the Bike Sharing
+Demand dataset.
+
+See the example on
+:ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`
+for some data exploration on this dataset and a demo on periodic feature
+engineering.
+
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+# %%
+# Analyzing the Bike Sharing Demand dataset
+# -----------------------------------------
+#
+# We start by loading the data from the OpenML repository
+# as a pandas dataframe. This will be replaced with Polars
+# once `fetch_openml` adds a native support for it.
+# We convert to Polars for feature engineering, as it automatically caches
+# common subexpressions which are reused in multiple expressions
+# (like `pl.col("count").shift(1)` below). See
+# https://docs.pola.rs/user-guide/lazy/optimizations/ for more information.
+
+import numpy as np
+import polars as pl
+
+from sklearn.datasets import fetch_openml
+
+pl.Config.set_fmt_str_lengths(20)
+
+bike_sharing = fetch_openml(
+    "Bike_Sharing_Demand", version=2, as_frame=True, parser="pandas"
+)
+df = bike_sharing.frame
+df = pl.DataFrame({col: df[col].to_numpy() for col in df.columns})
+
+# %%
+# Next, we take a look at the statistical summary of the dataset
+# so that we can better understand the data that we are working with.
+import polars.selectors as cs
+
+summary = df.select(cs.numeric()).describe()
+summary
+
+# %%
+# Let us look at the count of the seasons `"fall"`, `"spring"`, `"summer"`
+# and `"winter"` present in the dataset to confirm they are balanced.
+
+import matplotlib.pyplot as plt
+
+df["season"].value_counts()
+
+
+# %%
+# Generating Polars-engineered lagged features
+# --------------------------------------------
+# Let's consider the problem of predicting the demand at the
+# next hour given past demands. Since the demand is a continuous
+# variable, one could intuitively use any regression model. However, we do
+# not have the usual `(X_train, y_train)` dataset. Instead, we just have
+# the `y_train` demand data sequentially organized by time.
+lagged_df = df.select(
+    "count",
+    *[pl.col("count").shift(i).alias(f"lagged_count_{i}h") for i in [1, 2, 3]],
+    lagged_count_1d=pl.col("count").shift(24),
+    lagged_count_1d_1h=pl.col("count").shift(24 + 1),
+    lagged_count_7d=pl.col("count").shift(7 * 24),
+    lagged_count_7d_1h=pl.col("count").shift(7 * 24 + 1),
+    lagged_mean_24h=pl.col("count").shift(1).rolling_mean(24),
+    lagged_max_24h=pl.col("count").shift(1).rolling_max(24),
+    lagged_min_24h=pl.col("count").shift(1).rolling_min(24),
+    lagged_mean_7d=pl.col("count").shift(1).rolling_mean(7 * 24),
+    lagged_max_7d=pl.col("count").shift(1).rolling_max(7 * 24),
+    lagged_min_7d=pl.col("count").shift(1).rolling_min(7 * 24),
+)
+lagged_df.tail(10)
+
+# %%
+# Watch out however, the first lines have undefined values because their own
+# past is unknown. This depends on how much lag we used:
+lagged_df.head(10)
+
+# %%
+# We can now separate the lagged features in a matrix `X` and the target variable
+# (the counts to predict) in an array of the same first dimension `y`.
+lagged_df = lagged_df.drop_nulls()
+X = lagged_df.drop("count")
+y = lagged_df["count"]
+print("X shape: {}\ny shape: {}".format(X.shape, y.shape))
+
+# %%
+# Naive evaluation of the next hour bike demand regression
+# --------------------------------------------------------
+# Let's randomly split our tabularized dataset to train a gradient
+# boosting regression tree (GBRT) model and evaluate it using Mean
+# Absolute Percentage Error (MAPE). If our model is aimed at forecasting
+# (i.e., predicting future data from past data), we should not use training
+# data that are ulterior to the testing data. In time series machine learning
+# the "i.i.d" (independent and identically distributed) assumption does not
+# hold true as the data points are not independent and have a temporal
+# relationship.
+from sklearn.ensemble import HistGradientBoostingRegressor
+from sklearn.model_selection import train_test_split
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42
+)
+
+model = HistGradientBoostingRegressor().fit(X_train, y_train)
+
+# %%
+# Taking a look at the performance of the model.
+from sklearn.metrics import mean_absolute_percentage_error
+
+y_pred = model.predict(X_test)
+mean_absolute_percentage_error(y_test, y_pred)
+
+# %%
+# Proper next hour forecasting evaluation
+# ---------------------------------------
+# Let's use a proper evaluation splitting strategies that takes into account
+# the temporal structure of the dataset to evaluate our model's ability to
+# predict data points in the future (to avoid cheating by reading values from
+# the lagged features in the training set).
+from sklearn.model_selection import TimeSeriesSplit
+
+ts_cv = TimeSeriesSplit(
+    n_splits=3,  # to keep the notebook fast enough on common laptops
+    gap=48,  # 2 days data gap between train and test
+    max_train_size=10000,  # keep train sets of comparable sizes
+    test_size=3000,  # for 2 or 3 digits of precision in scores
+)
+all_splits = list(ts_cv.split(X, y))
+
+# %%
+# Training the model and evaluating its performance based on MAPE.
+train_idx, test_idx = all_splits[0]
+X_train, X_test = X[train_idx, :], X[test_idx, :]
+y_train, y_test = y[train_idx], y[test_idx]
+
+model = HistGradientBoostingRegressor().fit(X_train, y_train)
+y_pred = model.predict(X_test)
+mean_absolute_percentage_error(y_test, y_pred)
+
+# %%
+# The generalization error measured via a shuffled trained test split
+# is too optimistic. The generalization via a time-based split is likely to
+# be more representative of the true performance of the regression model.
+# Let's assess this variability of our error evaluation with proper
+# cross-validation:
+from sklearn.model_selection import cross_val_score
+
+cv_mape_scores = -cross_val_score(
+    model, X, y, cv=ts_cv, scoring="neg_mean_absolute_percentage_error"
+)
+cv_mape_scores
+
+# %%
+# The variability across splits is quite large! In a real life setting
+# it would be advised to use more splits to better assess the variability.
+# Let's report the mean CV scores and their standard deviation from now on.
+print(f"CV MAPE: {cv_mape_scores.mean():.3f} ± {cv_mape_scores.std():.3f}")
+
+# %%
+# We can compute several combinations of evaluation metrics and loss functions,
+# which are reported a bit below.
+from collections import defaultdict
+
+from sklearn.metrics import (
+    make_scorer,
+    mean_absolute_error,
+    mean_pinball_loss,
+    root_mean_squared_error,
+)
+from sklearn.model_selection import cross_validate
+
+
+def consolidate_scores(cv_results, scores, metric):
+    if metric == "MAPE":
+        scores[metric].append(f"{value.mean():.2f} ± {value.std():.2f}")
+    else:
+        scores[metric].append(f"{value.mean():.1f} ± {value.std():.1f}")
+
+    return scores
+
+
+scoring = {
+    "MAPE": make_scorer(mean_absolute_percentage_error),
+    "RMSE": make_scorer(root_mean_squared_error),
+    "MAE": make_scorer(mean_absolute_error),
+    "pinball_loss_05": make_scorer(mean_pinball_loss, alpha=0.05),
+    "pinball_loss_50": make_scorer(mean_pinball_loss, alpha=0.50),
+    "pinball_loss_95": make_scorer(mean_pinball_loss, alpha=0.95),
+}
+loss_functions = ["squared_error", "poisson", "absolute_error"]
+scores = defaultdict(list)
+for loss_func in loss_functions:
+    model = HistGradientBoostingRegressor(loss=loss_func)
+    cv_results = cross_validate(
+        model,
+        X,
+        y,
+        cv=ts_cv,
+        scoring=scoring,
+        n_jobs=2,
+    )
+    time = cv_results["fit_time"]
+    scores["loss"].append(loss_func)
+    scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
+
+    for key, value in cv_results.items():
+        if key.startswith("test_"):
+            metric = key.split("test_")[1]
+            scores = consolidate_scores(cv_results, scores, metric)
+
+
+# %%
+# Modeling predictive uncertainty via quantile regression
+# -------------------------------------------------------
+# Instead of modeling the expected value of the distribution of
+# :math:`Y|X` like the least squares and Poisson losses do, one could try to
+# estimate quantiles of the conditional distribution.
+#
+# :math:`Y|X=x_i` is expected to be a random variable for a given data point
+# :math:`x_i` because we expect that the number of rentals cannot be 100%
+# accurately predicted from the features. It can be influenced by other
+# variables not properly captured by the existing lagged features. For
+# instance whether or not it will rain in the next hour cannot be fully
+# anticipated from the past hours bike rental data. This is what we
+# call aleatoric uncertainty.
+#
+# Quantile regression makes it possible to give a finer description of that
+# distribution without making strong assumptions on its shape.
+quantile_list = [0.05, 0.5, 0.95]
+
+for quantile in quantile_list:
+    model = HistGradientBoostingRegressor(loss="quantile", quantile=quantile)
+    cv_results = cross_validate(
+        model,
+        X,
+        y,
+        cv=ts_cv,
+        scoring=scoring,
+        n_jobs=2,
+    )
+    time = cv_results["fit_time"]
+    scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
+
+    scores["loss"].append(f"quantile {int(quantile*100)}")
+    for key, value in cv_results.items():
+        if key.startswith("test_"):
+            metric = key.split("test_")[1]
+            scores = consolidate_scores(cv_results, scores, metric)
+
+scores_df = pl.DataFrame(scores)
+scores_df
+
+
+# %%
+# Let us take a look at the losses that minimise each metric.
+def min_arg(col):
+    col_split = pl.col(col).str.split(" ")
+    return pl.arg_sort_by(
+        col_split.list.get(0).cast(pl.Float64),
+        col_split.list.get(2).cast(pl.Float64),
+    ).first()
+
+
+scores_df.select(
+    pl.col("loss").get(min_arg(col_name)).alias(col_name)
+    for col_name in scores_df.columns
+    if col_name != "loss"
+)
+
+# %%
+# Even if the score distributions overlap due to the variance in the dataset,
+# it is true that the average RMSE is lower when `loss="squared_error"`, whereas
+# the average MAPE is lower when `loss="absolute_error"` as expected. That is
+# also the case for the Mean Pinball Loss with the quantiles 5 and 95. The score
+# corresponding to the 50 quantile loss is overlapping with the score obtained
+# by minimizing other loss functions, which is also the case for the MAE.
+#
+# A qualitative look at the predictions
+# -------------------------------------
+# We can now visualize the performance of the model with regards
+# to the 5th percentile, median and the 95th percentile:
+all_splits = list(ts_cv.split(X, y))
+train_idx, test_idx = all_splits[0]
+
+X_train, X_test = X[train_idx, :], X[test_idx, :]
+y_train, y_test = y[train_idx], y[test_idx]
+
+max_iter = 50
+gbrt_mean_poisson = HistGradientBoostingRegressor(loss="poisson", max_iter=max_iter)
+gbrt_mean_poisson.fit(X_train, y_train)
+mean_predictions = gbrt_mean_poisson.predict(X_test)
+
+gbrt_median = HistGradientBoostingRegressor(
+    loss="quantile", quantile=0.5, max_iter=max_iter
+)
+gbrt_median.fit(X_train, y_train)
+median_predictions = gbrt_median.predict(X_test)
+
+gbrt_percentile_5 = HistGradientBoostingRegressor(
+    loss="quantile", quantile=0.05, max_iter=max_iter
+)
+gbrt_percentile_5.fit(X_train, y_train)
+percentile_5_predictions = gbrt_percentile_5.predict(X_test)
+
+gbrt_percentile_95 = HistGradientBoostingRegressor(
+    loss="quantile", quantile=0.95, max_iter=max_iter
+)
+gbrt_percentile_95.fit(X_train, y_train)
+percentile_95_predictions = gbrt_percentile_95.predict(X_test)
+
+# %%
+# We can now take a look at the predictions made by the regression models:
+last_hours = slice(-96, None)
+fig, ax = plt.subplots(figsize=(15, 7))
+plt.title("Predictions by regression models")
+ax.plot(
+    y_test[last_hours],
+    "x-",
+    alpha=0.2,
+    label="Actual demand",
+    color="black",
+)
+ax.plot(
+    median_predictions[last_hours],
+    "^-",
+    label="GBRT median",
+)
+ax.plot(
+    mean_predictions[last_hours],
+    "x-",
+    label="GBRT mean (Poisson)",
+)
+ax.fill_between(
+    np.arange(96),
+    percentile_5_predictions[last_hours],
+    percentile_95_predictions[last_hours],
+    alpha=0.3,
+    label="GBRT 90% interval",
+)
+_ = ax.legend()
+
+# %%
+# Here it's interesting to notice that the blue area between the 5% and 95%
+# percentile estimators has a width that varies with the time of the day:
+#
+# - At night, the blue band is much narrower: the pair of models is quite
+#   certain that there will be a small number of bike rentals. And furthermore
+#   these seem correct in the sense that the actual demand stays in that blue
+#   band.
+# - During the day, the blue band is much wider: the uncertainty grows, probably
+#   because of the variability of the weather that can have a very large impact,
+#   especially on week-ends.
+# - We can also see that during week-days, the commute pattern is still visible in
+#   the 5% and 95% estimations.
+# - Finally, it is expected that 10% of the time, the actual demand does not lie
+#   between the 5% and 95% percentile estimates. On this test span, the actual
+#   demand seems to be higher, especially during the rush hours. It might reveal that
+#   our 95% percentile estimator underestimates the demand peaks. This could be be
+#   quantitatively confirmed by computing empirical coverage numbers as done in
+#   the :ref:`calibration of confidence intervals <calibration-section>`.
+#
+# Looking at the performance of non-linear regression models vs
+# the best models:
+from sklearn.metrics import PredictionErrorDisplay
+
+fig, axes = plt.subplots(ncols=3, figsize=(15, 6), sharey=True)
+fig.suptitle("Non-linear regression models")
+predictions = [
+    median_predictions,
+    percentile_5_predictions,
+    percentile_95_predictions,
+]
+labels = [
+    "Median",
+    "5th percentile",
+    "95th percentile",
+]
+for ax, pred, label in zip(axes, predictions, labels):
+    PredictionErrorDisplay.from_predictions(
+        y_true=y_test,
+        y_pred=pred,
+        kind="residual_vs_predicted",
+        scatter_kwargs={"alpha": 0.3},
+        ax=ax,
+    )
+    ax.set(xlabel="Predicted demand", ylabel="True demand")
+    ax.legend(["Best model", label])
+
+plt.show()
+
+# %%
+# Conclusion
+# ----------
+# Through this example we explored time series forecasting using lagged
+# features. We compared a naive regression (using the standardized
+# :class:`~sklearn.model_selection.train_test_split`) with a proper time
+# series evaluation strategy using
+# :class:`~sklearn.model_selection.TimeSeriesSplit`. We observed that the
+# model trained using :class:`~sklearn.model_selection.train_test_split`,
+# having a default value of `shuffle` set to `True` produced an overly
+# optimistic Mean Average Percentage Error (MAPE). The results
+# produced from the time-based split better represent the performance
+# of our time-series regression model. We also analyzed the predictive uncertainty
+# of our model via Quantile Regression. Predictions based on the 5th and
+# 95th percentile using `loss="quantile"` provide us with a quantitative estimate
+# of the uncertainty of the forecasts made by our time series regression model.
+# Uncertainty estimation can also be performed
+# using `MAPIE <https://mapie.readthedocs.io/en/latest/index.html>`_,
+# that provides an implementation based on recent work on conformal prediction
+# methods and estimates both aleatoric and epistemic uncertainty at the same time.
+# Furthermore, functionalities provided
+# by `sktime <https://www.sktime.net/en/latest/users.html>`_
+# can be used to extend scikit-learn estimators by making use of recursive time
+# series forecasting, that enables dynamic predictions of future values.
diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py
index 0fef820bb9f2a..d110f3e6de9c5 100644
--- a/examples/bicluster/plot_bicluster_newsgroups.py
+++ b/examples/bicluster/plot_bicluster_newsgroups.py
@@ -23,6 +23,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import operator
 from collections import defaultdict
 from time import time
diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py
index 4fb8a142e2606..3108618091b4f 100644
--- a/examples/classification/plot_classifier_comparison.py
+++ b/examples/classification/plot_classifier_comparison.py
@@ -19,6 +19,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 #              Andreas Müller
 # Modified for documentation by Jaques Grobler
diff --git a/examples/classification/plot_lda.py b/examples/classification/plot_lda.py
index 88135079529c8..cf052a9379b22 100644
--- a/examples/classification/plot_lda.py
+++ b/examples/classification/plot_lda.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py
index 0691f52390a06..599659fdac2dc 100644
--- a/examples/classification/plot_lda_qda.py
+++ b/examples/classification/plot_lda_qda.py
@@ -11,6 +11,9 @@
 deviation with QDA.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Data generation
 # ---------------
diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py
index e286104636d67..2066212abea5d 100644
--- a/examples/cluster/plot_affinity_propagation.py
+++ b/examples/cluster/plot_affinity_propagation.py
@@ -9,6 +9,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 from sklearn import metrics
diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py
index a562ebbc96ba5..7fc738bf08218 100644
--- a/examples/cluster/plot_bisect_kmeans.py
+++ b/examples/cluster/plot_bisect_kmeans.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 from sklearn.cluster import BisectingKMeans, KMeans
diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py
index bc6f158c02ed0..539c07cfd442e 100644
--- a/examples/cluster/plot_cluster_comparison.py
+++ b/examples/cluster/plot_cluster_comparison.py
@@ -24,6 +24,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import time
 import warnings
 from itertools import cycle, islice
diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py
index e3a6d6a3d8664..e469eeb3a86ff 100644
--- a/examples/cluster/plot_cluster_iris.py
+++ b/examples/cluster/plot_cluster_iris.py
@@ -18,6 +18,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py
index 0b0bd64ecf62b..af56701db846f 100644
--- a/examples/cluster/plot_dbscan.py
+++ b/examples/cluster/plot_dbscan.py
@@ -12,6 +12,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Data generation
 # ---------------
diff --git a/examples/cluster/plot_dict_face_patches.py b/examples/cluster/plot_dict_face_patches.py
index 99b241bfdeea9..27eeb07ec7867 100644
--- a/examples/cluster/plot_dict_face_patches.py
+++ b/examples/cluster/plot_dict_face_patches.py
@@ -21,6 +21,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load the data
 # -------------
diff --git a/examples/cluster/plot_digits_agglomeration.py b/examples/cluster/plot_digits_agglomeration.py
index 3773a300cb51a..d4a427905d91f 100644
--- a/examples/cluster/plot_digits_agglomeration.py
+++ b/examples/cluster/plot_digits_agglomeration.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py
index 630ab1f150fcb..07cea76ae072a 100644
--- a/examples/cluster/plot_hdbscan.py
+++ b/examples/cluster/plot_hdbscan.py
@@ -12,6 +12,9 @@
 
 We first define a couple utility functions for convenience.
 """
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 # %%
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/examples/cluster/plot_inductive_clustering.py b/examples/cluster/plot_inductive_clustering.py
index b6464459160e3..b92a814cd4ef3 100644
--- a/examples/cluster/plot_inductive_clustering.py
+++ b/examples/cluster/plot_inductive_clustering.py
@@ -20,6 +20,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Authors: Chirag Nagpal
 #          Christos Aridas
 
diff --git a/examples/cluster/plot_kmeans_digits.py b/examples/cluster/plot_kmeans_digits.py
index d61ec91d13d52..3e7c70b9d08a9 100644
--- a/examples/cluster/plot_kmeans_digits.py
+++ b/examples/cluster/plot_kmeans_digits.py
@@ -25,6 +25,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load the dataset
 # ----------------
diff --git a/examples/cluster/plot_kmeans_plusplus.py b/examples/cluster/plot_kmeans_plusplus.py
index 69ea738635ddf..355426a2a4872 100644
--- a/examples/cluster/plot_kmeans_plusplus.py
+++ b/examples/cluster/plot_kmeans_plusplus.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 from sklearn.cluster import kmeans_plusplus
diff --git a/examples/cluster/plot_kmeans_silhouette_analysis.py b/examples/cluster/plot_kmeans_silhouette_analysis.py
index a999e83fcac5d..4b5c8d2c6d66d 100644
--- a/examples/cluster/plot_kmeans_silhouette_analysis.py
+++ b/examples/cluster/plot_kmeans_silhouette_analysis.py
@@ -31,6 +31,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.cm as cm
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/examples/cluster/plot_linkage_comparison.py b/examples/cluster/plot_linkage_comparison.py
index 793fee059d797..c08dedfbab1bc 100644
--- a/examples/cluster/plot_linkage_comparison.py
+++ b/examples/cluster/plot_linkage_comparison.py
@@ -23,6 +23,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import time
 import warnings
 from itertools import cycle, islice
diff --git a/examples/cluster/plot_mean_shift.py b/examples/cluster/plot_mean_shift.py
index aacbc7f216405..456a1c4ac2020 100644
--- a/examples/cluster/plot_mean_shift.py
+++ b/examples/cluster/plot_mean_shift.py
@@ -11,6 +11,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 from sklearn.cluster import MeanShift, estimate_bandwidth
diff --git a/examples/cluster/plot_mini_batch_kmeans.py b/examples/cluster/plot_mini_batch_kmeans.py
index 3a6e8aa63786b..d189ed0e02a5c 100644
--- a/examples/cluster/plot_mini_batch_kmeans.py
+++ b/examples/cluster/plot_mini_batch_kmeans.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Generate the data
 # -----------------
diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py
index 529366c6244f2..2aeb9fae4af5e 100644
--- a/examples/compose/plot_compare_reduction.py
+++ b/examples/compose/plot_compare_reduction.py
@@ -20,6 +20,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Authors: Robert McGibbon
 #          Joel Nothman
 #          Guillaume Lemaitre
diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py
index 511878fcd4d99..c23e9b3b96a08 100644
--- a/examples/compose/plot_digits_pipe.py
+++ b/examples/compose/plot_digits_pipe.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py
index 04baa0fd98bc0..1fdede5364eec 100644
--- a/examples/covariance/plot_covariance_estimation.py
+++ b/examples/covariance/plot_covariance_estimation.py
@@ -15,6 +15,9 @@
 trade-off.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Generate sample data
 # --------------------
diff --git a/examples/covariance/plot_lw_vs_oas.py b/examples/covariance/plot_lw_vs_oas.py
index 107f6bd1c29cc..c1c41bc811a85 100644
--- a/examples/covariance/plot_lw_vs_oas.py
+++ b/examples/covariance/plot_lw_vs_oas.py
@@ -21,6 +21,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy.linalg import cholesky, toeplitz
diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py
index 537d3fa0d9d8a..a1507c3ef162e 100644
--- a/examples/covariance/plot_mahalanobis_distances.py
+++ b/examples/covariance/plot_mahalanobis_distances.py
@@ -62,6 +62,9 @@
 
 """  # noqa: E501
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Generate data
 # --------------
diff --git a/examples/covariance/plot_robust_vs_empirical_covariance.py b/examples/covariance/plot_robust_vs_empirical_covariance.py
index c61a97ddd979b..54871c495e82c 100644
--- a/examples/covariance/plot_robust_vs_empirical_covariance.py
+++ b/examples/covariance/plot_robust_vs_empirical_covariance.py
@@ -53,6 +53,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.font_manager
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/examples/cross_decomposition/plot_compare_cross_decomposition.py b/examples/cross_decomposition/plot_compare_cross_decomposition.py
index 762c42dfdf31c..1fce2f70bc42a 100644
--- a/examples/cross_decomposition/plot_compare_cross_decomposition.py
+++ b/examples/cross_decomposition/plot_compare_cross_decomposition.py
@@ -21,6 +21,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Dataset based latent variables model
 # ------------------------------------
diff --git a/examples/cross_decomposition/plot_pcr_vs_pls.py b/examples/cross_decomposition/plot_pcr_vs_pls.py
index 895c75dc1a728..e0606a5c3dd42 100644
--- a/examples/cross_decomposition/plot_pcr_vs_pls.py
+++ b/examples/cross_decomposition/plot_pcr_vs_pls.py
@@ -33,6 +33,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # The data
 # --------
diff --git a/examples/datasets/plot_digits_last_image.py b/examples/datasets/plot_digits_last_image.py
index ce6c48e58715e..836db4b37758c 100644
--- a/examples/datasets/plot_digits_last_image.py
+++ b/examples/datasets/plot_digits_last_image.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index d36ebca1ebe83..ea9c89bbb6082 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -15,6 +15,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
diff --git a/examples/datasets/plot_random_dataset.py b/examples/datasets/plot_random_dataset.py
index e5cbdb080b59f..50ae2def9eaae 100644
--- a/examples/datasets/plot_random_dataset.py
+++ b/examples/datasets/plot_random_dataset.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 from sklearn.datasets import make_blobs, make_classification, make_gaussian_quantiles
diff --git a/examples/datasets/plot_random_multilabel_dataset.py b/examples/datasets/plot_random_multilabel_dataset.py
index e6e2d6ad9edcf..36a8bc23e4d11 100644
--- a/examples/datasets/plot_random_multilabel_dataset.py
+++ b/examples/datasets/plot_random_multilabel_dataset.py
@@ -35,6 +35,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py
index 2ed22a52f7d34..7082c922e1086 100644
--- a/examples/decomposition/plot_faces_decomposition.py
+++ b/examples/decomposition/plot_faces_decomposition.py
@@ -13,6 +13,9 @@
 - License: BSD 3 clause
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Dataset preparation
 # -------------------
diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py
index 584d6b9509589..e8d571d814a1b 100644
--- a/examples/decomposition/plot_ica_blind_source_separation.py
+++ b/examples/decomposition/plot_ica_blind_source_separation.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Generate sample data
 # --------------------
diff --git a/examples/decomposition/plot_image_denoising.py b/examples/decomposition/plot_image_denoising.py
index 646669d1469ff..5248fdff5a8ca 100644
--- a/examples/decomposition/plot_image_denoising.py
+++ b/examples/decomposition/plot_image_denoising.py
@@ -32,6 +32,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Generate distorted image
 # ------------------------
diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py
index f574e14bdfd17..66c7bc7994129 100644
--- a/examples/decomposition/plot_pca_iris.py
+++ b/examples/decomposition/plot_pca_iris.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # SPDX-License-Identifier: BSD-3-Clause
 
diff --git a/examples/decomposition/plot_pca_vs_lda.py b/examples/decomposition/plot_pca_vs_lda.py
index e88a0aff7882f..4679a410af76a 100644
--- a/examples/decomposition/plot_pca_vs_lda.py
+++ b/examples/decomposition/plot_pca_vs_lda.py
@@ -18,6 +18,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 from sklearn import datasets
diff --git a/examples/decomposition/plot_sparse_coding.py b/examples/decomposition/plot_sparse_coding.py
index c45cd3c83b04f..778f718c2ac87 100644
--- a/examples/decomposition/plot_sparse_coding.py
+++ b/examples/decomposition/plot_sparse_coding.py
@@ -16,6 +16,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py
index 8ae7372b0874f..db0200997d9e0 100644
--- a/examples/ensemble/plot_adaboost_multiclass.py
+++ b/examples/ensemble/plot_adaboost_multiclass.py
@@ -25,6 +25,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Noel Dawe <noel.dawe@gmail.com>
 # SPDX-License-Identifier: BSD-3-Clause
 
diff --git a/examples/ensemble/plot_forest_importances.py b/examples/ensemble/plot_forest_importances.py
index 269451168dd7a..b77e30c327fb4 100644
--- a/examples/ensemble/plot_forest_importances.py
+++ b/examples/ensemble/plot_forest_importances.py
@@ -13,6 +13,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 # %%
diff --git a/examples/ensemble/plot_forest_importances_faces.py b/examples/ensemble/plot_forest_importances_faces.py
index 8b8e8751ec5a2..5b8db11b1692e 100644
--- a/examples/ensemble/plot_forest_importances_faces.py
+++ b/examples/ensemble/plot_forest_importances_faces.py
@@ -12,6 +12,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Loading the data and model fitting
 # ----------------------------------
diff --git a/examples/ensemble/plot_forest_iris.py b/examples/ensemble/plot_forest_iris.py
index c2056ce1905d1..78a28e521ff90 100644
--- a/examples/ensemble/plot_forest_iris.py
+++ b/examples/ensemble/plot_forest_iris.py
@@ -42,6 +42,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib.colors import ListedColormap
diff --git a/examples/ensemble/plot_gradient_boosting_categorical.py b/examples/ensemble/plot_gradient_boosting_categorical.py
index 2e260a4be1802..e80c0fb6fdc6e 100644
--- a/examples/ensemble/plot_gradient_boosting_categorical.py
+++ b/examples/ensemble/plot_gradient_boosting_categorical.py
@@ -27,6 +27,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load Ames Housing dataset
 # -------------------------
diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py
index 6c239e97d66ee..39e8b19a3125f 100644
--- a/examples/ensemble/plot_gradient_boosting_early_stopping.py
+++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py
@@ -32,6 +32,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Data Preparation
 # ----------------
diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py
index 723a494b04db8..3e2c44568de3c 100644
--- a/examples/ensemble/plot_gradient_boosting_quantile.py
+++ b/examples/ensemble/plot_gradient_boosting_quantile.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Generate some data for a synthetic regression problem by applying the
 # function f to uniformly sampled random inputs.
diff --git a/examples/ensemble/plot_isolation_forest.py b/examples/ensemble/plot_isolation_forest.py
index f5fad1d7b9ea9..2bd5bc9e99a0e 100644
--- a/examples/ensemble/plot_isolation_forest.py
+++ b/examples/ensemble/plot_isolation_forest.py
@@ -16,6 +16,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Data generation
 # ---------------
diff --git a/examples/ensemble/plot_monotonic_constraints.py b/examples/ensemble/plot_monotonic_constraints.py
index dcd5f05af626c..40fb61eae19e9 100644
--- a/examples/ensemble/plot_monotonic_constraints.py
+++ b/examples/ensemble/plot_monotonic_constraints.py
@@ -20,6 +20,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/examples/ensemble/plot_random_forest_embedding.py b/examples/ensemble/plot_random_forest_embedding.py
index fe26e04ca7789..d3d595df232a9 100644
--- a/examples/ensemble/plot_random_forest_embedding.py
+++ b/examples/ensemble/plot_random_forest_embedding.py
@@ -26,6 +26,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index 90441c6d28339..d40d831fb911f 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -23,6 +23,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from itertools import product
 
 import matplotlib.pyplot as plt
diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py
index 424959e6d5072..848358ca1d208 100644
--- a/examples/ensemble/plot_voting_probas.py
+++ b/examples/ensemble/plot_voting_probas.py
@@ -23,6 +23,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/ensemble/plot_voting_regressor.py b/examples/ensemble/plot_voting_regressor.py
index d33becca505e3..6ccc4e81b700a 100644
--- a/examples/ensemble/plot_voting_regressor.py
+++ b/examples/ensemble/plot_voting_regressor.py
@@ -23,6 +23,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 from sklearn.datasets import load_diabetes
diff --git a/examples/exercises/plot_cv_diabetes.py b/examples/exercises/plot_cv_diabetes.py
index 9d0232de9e660..5e582b4b21571 100644
--- a/examples/exercises/plot_cv_diabetes.py
+++ b/examples/exercises/plot_cv_diabetes.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load dataset and apply GridSearchCV
 # -----------------------------------
diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py
index 25b0171c66421..d65006178ca4f 100644
--- a/examples/exercises/plot_digits_classification_exercise.py
+++ b/examples/exercises/plot_digits_classification_exercise.py
@@ -12,6 +12,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from sklearn import datasets, linear_model, neighbors
 
 X_digits, y_digits = datasets.load_digits(return_X_y=True)
diff --git a/examples/exercises/plot_iris_exercise.py b/examples/exercises/plot_iris_exercise.py
index 07687b920e1b8..8dcc4368ab620 100644
--- a/examples/exercises/plot_iris_exercise.py
+++ b/examples/exercises/plot_iris_exercise.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/feature_selection/plot_f_test_vs_mi.py b/examples/feature_selection/plot_f_test_vs_mi.py
index 5c015e7e4fd58..e3c75d39e0a27 100644
--- a/examples/feature_selection/plot_f_test_vs_mi.py
+++ b/examples/feature_selection/plot_f_test_vs_mi.py
@@ -23,6 +23,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/feature_selection/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py
index 2cf64cb6ea598..520747f417db1 100644
--- a/examples/feature_selection/plot_feature_selection.py
+++ b/examples/feature_selection/plot_feature_selection.py
@@ -16,6 +16,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Generate sample data
 # --------------------
diff --git a/examples/feature_selection/plot_feature_selection_pipeline.py b/examples/feature_selection/plot_feature_selection_pipeline.py
index 1d7c44050ea78..b33bfeda0a37a 100644
--- a/examples/feature_selection/plot_feature_selection_pipeline.py
+++ b/examples/feature_selection/plot_feature_selection_pipeline.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # We will start by generating a binary classification dataset. Subsequently, we
 # will divide the dataset into two subsets.
diff --git a/examples/feature_selection/plot_rfe_digits.py b/examples/feature_selection/plot_rfe_digits.py
index 198a3d6f3af90..360a9bd92837f 100644
--- a/examples/feature_selection/plot_rfe_digits.py
+++ b/examples/feature_selection/plot_rfe_digits.py
@@ -18,6 +18,9 @@
 
 """  # noqa: E501
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 from sklearn.datasets import load_digits
diff --git a/examples/feature_selection/plot_rfe_with_cross_validation.py b/examples/feature_selection/plot_rfe_with_cross_validation.py
index 6e4a8ae0ee8c5..4e3e45384e026 100644
--- a/examples/feature_selection/plot_rfe_with_cross_validation.py
+++ b/examples/feature_selection/plot_rfe_with_cross_validation.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Data generation
 # ---------------
diff --git a/examples/feature_selection/plot_select_from_model_diabetes.py b/examples/feature_selection/plot_select_from_model_diabetes.py
index f008d8d6e8b68..9359e9a982742 100644
--- a/examples/feature_selection/plot_select_from_model_diabetes.py
+++ b/examples/feature_selection/plot_select_from_model_diabetes.py
@@ -19,6 +19,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Loading the data
 # ----------------
diff --git a/examples/gaussian_process/plot_gpc_iris.py b/examples/gaussian_process/plot_gpc_iris.py
index 88c536d8824c8..a01d9ac081d7e 100644
--- a/examples/gaussian_process/plot_gpc_iris.py
+++ b/examples/gaussian_process/plot_gpc_iris.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/gaussian_process/plot_gpr_on_structured_data.py b/examples/gaussian_process/plot_gpr_on_structured_data.py
index e702f1fe0769a..f3a8de5d018ef 100644
--- a/examples/gaussian_process/plot_gpr_on_structured_data.py
+++ b/examples/gaussian_process/plot_gpr_on_structured_data.py
@@ -38,6 +38,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 import numpy as np
 
diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py
index 445a08c05f02f..f06875a5f7fcd 100644
--- a/examples/impute/plot_iterative_imputer_variants_comparison.py
+++ b/examples/impute/plot_iterative_imputer_variants_comparison.py
@@ -44,6 +44,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
diff --git a/examples/inspection/plot_causal_interpretation.py b/examples/inspection/plot_causal_interpretation.py
index 68f10b5304842..cf6c72f29951d 100644
--- a/examples/inspection/plot_causal_interpretation.py
+++ b/examples/inspection/plot_causal_interpretation.py
@@ -15,6 +15,9 @@
 identifying that causal effect.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # The dataset: simulated hourly wages
 # -----------------------------------
diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py
index 0e11f01937ebc..f0ec361f4fa81 100644
--- a/examples/inspection/plot_linear_model_coefficient_interpretation.py
+++ b/examples/inspection/plot_linear_model_coefficient_interpretation.py
@@ -39,6 +39,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py
index 4c3e0f409eeff..eace8afeb96a0 100644
--- a/examples/inspection/plot_partial_dependence.py
+++ b/examples/inspection/plot_partial_dependence.py
@@ -34,6 +34,9 @@
        Graphical Statistics, 24(1): 44-65 <1309.6392>`
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Bike sharing dataset preprocessing
 # ----------------------------------
diff --git a/examples/inspection/plot_permutation_importance.py b/examples/inspection/plot_permutation_importance.py
index 66b40fcd25bc8..73c5179a09b87 100644
--- a/examples/inspection/plot_permutation_importance.py
+++ b/examples/inspection/plot_permutation_importance.py
@@ -25,6 +25,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Data Loading and Feature Engineering
 # ------------------------------------
diff --git a/examples/inspection/plot_permutation_importance_multicollinear.py b/examples/inspection/plot_permutation_importance_multicollinear.py
index a8fe52b1565d9..212c22cdf64e0 100644
--- a/examples/inspection/plot_permutation_importance_multicollinear.py
+++ b/examples/inspection/plot_permutation_importance_multicollinear.py
@@ -21,6 +21,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Random Forest Feature Importance on Breast Cancer Data
 # ------------------------------------------------------
diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py
index e39baa111c4e2..9b1c355ef0ef5 100644
--- a/examples/linear_model/plot_ard.py
+++ b/examples/linear_model/plot_ard.py
@@ -19,6 +19,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Arturo Amor <david-arturo.amor-quiroz@inria.fr>
 
 # %%
diff --git a/examples/linear_model/plot_bayesian_ridge_curvefit.py b/examples/linear_model/plot_bayesian_ridge_curvefit.py
index 12f48b9ce347d..f1c86a196c2a3 100644
--- a/examples/linear_model/plot_bayesian_ridge_curvefit.py
+++ b/examples/linear_model/plot_bayesian_ridge_curvefit.py
@@ -26,6 +26,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Yoshihiro Uchida <nimbus1after2a1sun7shower@gmail.com>
 
 # %%
diff --git a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
index b31d95348c083..e118847a8737c 100644
--- a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
+++ b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
@@ -17,6 +17,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Let's start by loading the dataset and creating some sample weights.
 import numpy as np
diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py
index ca7cb00d5f878..288de6a886550 100644
--- a/examples/linear_model/plot_iris_logistic.py
+++ b/examples/linear_model/plot_iris_logistic.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py
index 78ab9624b64a4..f0eb2400c95ef 100644
--- a/examples/linear_model/plot_lasso_and_elasticnet.py
+++ b/examples/linear_model/plot_lasso_and_elasticnet.py
@@ -21,6 +21,9 @@
 compared with the ground-truth.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Arturo Amor <david-arturo.amor-quiroz@inria.fr>
 
 # %%
diff --git a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py
index a797d5d708160..920994da1ffb5 100644
--- a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py
+++ b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from time import time
 
 from scipy import linalg, sparse
diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py
index a22bc5cb39119..8ddd4deef2a8f 100644
--- a/examples/linear_model/plot_logistic.py
+++ b/examples/linear_model/plot_logistic.py
@@ -9,6 +9,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gael Varoquaux
 # SPDX-License-Identifier: BSD-3-Clause
 
diff --git a/examples/linear_model/plot_nnls.py b/examples/linear_model/plot_nnls.py
index 05a8550ec166b..9ab19e19a1882 100644
--- a/examples/linear_model/plot_nnls.py
+++ b/examples/linear_model/plot_nnls.py
@@ -9,6 +9,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py
index d94bfadf435a9..bb4a242cdbad2 100644
--- a/examples/linear_model/plot_ols.py
+++ b/examples/linear_model/plot_ols.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
 
diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py
index 328131a3b81c0..38fbcca546297 100644
--- a/examples/linear_model/plot_ols_3d.py
+++ b/examples/linear_model/plot_ols_3d.py
@@ -9,6 +9,9 @@
 give us much regarding `y` when compared to just feature 1.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py
index 324770887afcf..86c28b4d3ae23 100644
--- a/examples/linear_model/plot_ols_ridge_variance.py
+++ b/examples/linear_model/plot_ols_ridge_variance.py
@@ -19,6 +19,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
diff --git a/examples/linear_model/plot_omp.py b/examples/linear_model/plot_omp.py
index aa6044173b8ce..815b3c9425fdf 100644
--- a/examples/linear_model/plot_omp.py
+++ b/examples/linear_model/plot_omp.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/linear_model/plot_ransac.py b/examples/linear_model/plot_ransac.py
index 7b89150c4bd20..ecef43e79f9bf 100644
--- a/examples/linear_model/plot_ransac.py
+++ b/examples/linear_model/plot_ransac.py
@@ -15,6 +15,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 from matplotlib import pyplot as plt
 
diff --git a/examples/linear_model/plot_ridge_coeffs.py b/examples/linear_model/plot_ridge_coeffs.py
index 4bfb1f4c29325..0cff8c79a55bb 100644
--- a/examples/linear_model/plot_ridge_coeffs.py
+++ b/examples/linear_model/plot_ridge_coeffs.py
@@ -51,6 +51,9 @@
 capable of generalizing well to unseen data while avoiding overfitting.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Kornel Kielczewski -- <kornel.k@plusnet.pl>
 
 # %%
diff --git a/examples/linear_model/plot_robust_fit.py b/examples/linear_model/plot_robust_fit.py
index 79213c9a8e83e..2b447e6175cdc 100644
--- a/examples/linear_model/plot_robust_fit.py
+++ b/examples/linear_model/plot_robust_fit.py
@@ -30,6 +30,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 from matplotlib import pyplot as plt
 
diff --git a/examples/linear_model/plot_sgd_iris.py b/examples/linear_model/plot_sgd_iris.py
index 838f612d4659c..46dc2e7c31cd1 100644
--- a/examples/linear_model/plot_sgd_iris.py
+++ b/examples/linear_model/plot_sgd_iris.py
@@ -9,6 +9,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/linear_model/plot_sgd_loss_functions.py b/examples/linear_model/plot_sgd_loss_functions.py
index 140562184b946..b0c61da6ddcc1 100644
--- a/examples/linear_model/plot_sgd_loss_functions.py
+++ b/examples/linear_model/plot_sgd_loss_functions.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/linear_model/plot_sgd_penalties.py b/examples/linear_model/plot_sgd_penalties.py
index ff71dba5f20a3..6f8830b52fe7a 100644
--- a/examples/linear_model/plot_sgd_penalties.py
+++ b/examples/linear_model/plot_sgd_penalties.py
@@ -11,6 +11,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/linear_model/plot_sgd_separating_hyperplane.py b/examples/linear_model/plot_sgd_separating_hyperplane.py
index e84ab7c519ae9..90f7502900291 100644
--- a/examples/linear_model/plot_sgd_separating_hyperplane.py
+++ b/examples/linear_model/plot_sgd_separating_hyperplane.py
@@ -9,6 +9,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/linear_model/plot_sgd_weighted_samples.py b/examples/linear_model/plot_sgd_weighted_samples.py
index 4d605e99b4e49..e9e6587004e70 100644
--- a/examples/linear_model/plot_sgd_weighted_samples.py
+++ b/examples/linear_model/plot_sgd_weighted_samples.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py
index 60e9cd8078802..aabc8058dc407 100644
--- a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py
+++ b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py
@@ -19,6 +19,9 @@
 
 """  # noqa: E501
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 import matplotlib
 import matplotlib.lines as mlines
diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
index 404250a855e0a..fc6b1c57d7ad7 100644
--- a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
+++ b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
@@ -20,6 +20,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Arthur Mensch
 
 import timeit
diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py
index a3d3947d5b85f..9c123aadda8ea 100644
--- a/examples/manifold/plot_compare_methods.py
+++ b/examples/manifold/plot_compare_methods.py
@@ -20,6 +20,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Jake Vanderplas -- <vanderplas@astro.washington.edu>
 
 # %%
diff --git a/examples/manifold/plot_swissroll.py b/examples/manifold/plot_swissroll.py
index 65df88588efef..803dc391ba4c2 100644
--- a/examples/manifold/plot_swissroll.py
+++ b/examples/manifold/plot_swissroll.py
@@ -9,6 +9,9 @@
 in the data.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Swiss Roll
 # ---------------------------------------------------
diff --git a/examples/miscellaneous/plot_display_object_visualization.py b/examples/miscellaneous/plot_display_object_visualization.py
index 075413379a92c..cf39a1407d28e 100644
--- a/examples/miscellaneous/plot_display_object_visualization.py
+++ b/examples/miscellaneous/plot_display_object_visualization.py
@@ -15,6 +15,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load Data and train model
 # -------------------------
diff --git a/examples/miscellaneous/plot_estimator_representation.py b/examples/miscellaneous/plot_estimator_representation.py
index 1c9e3745db0de..683f0c5785f20 100644
--- a/examples/miscellaneous/plot_estimator_representation.py
+++ b/examples/miscellaneous/plot_estimator_representation.py
@@ -7,6 +7,9 @@
 displayed.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from sklearn.compose import make_column_transformer
 from sklearn.impute import SimpleImputer
 from sklearn.linear_model import LogisticRegression
diff --git a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py
index 85161a6ee51bb..5528eada1ed4a 100644
--- a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py
+++ b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py
@@ -13,6 +13,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import sys
 from time import time
 
diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py
index e96b54436cf30..634ca304d125d 100644
--- a/examples/miscellaneous/plot_metadata_routing.py
+++ b/examples/miscellaneous/plot_metadata_routing.py
@@ -27,6 +27,9 @@
 First a few imports and some random data for the rest of the script.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 
 import warnings
diff --git a/examples/miscellaneous/plot_multioutput_face_completion.py b/examples/miscellaneous/plot_multioutput_face_completion.py
index 62070bc05e488..a924da0d2b4a5 100644
--- a/examples/miscellaneous/plot_multioutput_face_completion.py
+++ b/examples/miscellaneous/plot_multioutput_face_completion.py
@@ -12,6 +12,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/miscellaneous/plot_partial_dependence_visualization_api.py b/examples/miscellaneous/plot_partial_dependence_visualization_api.py
index 38a984fa5b0cd..8c98b40816496 100644
--- a/examples/miscellaneous/plot_partial_dependence_visualization_api.py
+++ b/examples/miscellaneous/plot_partial_dependence_visualization_api.py
@@ -13,6 +13,9 @@
 
 """  # noqa: E501
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import pandas as pd
 
diff --git a/examples/miscellaneous/plot_pipeline_display.py b/examples/miscellaneous/plot_pipeline_display.py
index 9642bb56b903f..bf9ff2e549f8b 100755
--- a/examples/miscellaneous/plot_pipeline_display.py
+++ b/examples/miscellaneous/plot_pipeline_display.py
@@ -11,6 +11,9 @@
 steps in the pipeline.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Displaying a Pipeline with a Preprocessing Step and Classifier
 ################################################################################
diff --git a/examples/miscellaneous/plot_roc_curve_visualization_api.py b/examples/miscellaneous/plot_roc_curve_visualization_api.py
index 7fc8df9724337..d377d321e061e 100644
--- a/examples/miscellaneous/plot_roc_curve_visualization_api.py
+++ b/examples/miscellaneous/plot_roc_curve_visualization_api.py
@@ -9,6 +9,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load Data and Train a SVC
 # -------------------------
diff --git a/examples/mixture/plot_gmm.py b/examples/mixture/plot_gmm.py
index 82e48a8d13eb0..9a27b1c42f81a 100644
--- a/examples/mixture/plot_gmm.py
+++ b/examples/mixture/plot_gmm.py
@@ -24,6 +24,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import itertools
 
 import matplotlib as mpl
diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py
index 410a843cf78db..dd3d1c8a22692 100644
--- a/examples/mixture/plot_gmm_init.py
+++ b/examples/mixture/plot_gmm_init.py
@@ -33,6 +33,9 @@
 time to initialize and low number of GaussianMixture iterations to converge.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Gordon Walsh <gordon.p.walsh@gmail.com>
 # Data generation code from Jake Vanderplas <vanderplas@astro.washington.edu>
 
diff --git a/examples/mixture/plot_gmm_pdf.py b/examples/mixture/plot_gmm_pdf.py
index 062bdfd4d6d67..be70578402f55 100644
--- a/examples/mixture/plot_gmm_pdf.py
+++ b/examples/mixture/plot_gmm_pdf.py
@@ -9,6 +9,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib.colors import LogNorm
diff --git a/examples/mixture/plot_gmm_selection.py b/examples/mixture/plot_gmm_selection.py
index cd84c03ab7d13..ef256aa4f8e0f 100644
--- a/examples/mixture/plot_gmm_selection.py
+++ b/examples/mixture/plot_gmm_selection.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Data generation
 # ---------------
diff --git a/examples/mixture/plot_gmm_sin.py b/examples/mixture/plot_gmm_sin.py
index 34af17b8920bc..fe9c12bbe5adc 100644
--- a/examples/mixture/plot_gmm_sin.py
+++ b/examples/mixture/plot_gmm_sin.py
@@ -39,6 +39,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import itertools
 
 import matplotlib as mpl
diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py
index 278083a994e58..9a0312d34f005 100644
--- a/examples/model_selection/plot_confusion_matrix.py
+++ b/examples/model_selection/plot_confusion_matrix.py
@@ -24,6 +24,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/model_selection/plot_cost_sensitive_learning.py b/examples/model_selection/plot_cost_sensitive_learning.py
index 55707e78e137a..3021d5aaab53d 100644
--- a/examples/model_selection/plot_cost_sensitive_learning.py
+++ b/examples/model_selection/plot_cost_sensitive_learning.py
@@ -35,6 +35,9 @@
     <https://cseweb.ucsd.edu/~elkan/rescale.pdf>`_
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Cost-sensitive learning with constant gains and costs
 # -----------------------------------------------------
diff --git a/examples/model_selection/plot_cv_indices.py b/examples/model_selection/plot_cv_indices.py
index d456546891069..b922fc75d7473 100644
--- a/examples/model_selection/plot_cv_indices.py
+++ b/examples/model_selection/plot_cv_indices.py
@@ -12,6 +12,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib.patches import Patch
diff --git a/examples/model_selection/plot_cv_predict.py b/examples/model_selection/plot_cv_predict.py
index bae1cffbd24e7..fa77749020d2b 100644
--- a/examples/model_selection/plot_cv_predict.py
+++ b/examples/model_selection/plot_cv_predict.py
@@ -9,6 +9,9 @@
 errors.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # We will load the diabetes dataset and create an instance of a linear
 # regression model.
diff --git a/examples/model_selection/plot_det.py b/examples/model_selection/plot_det.py
index 3e56b8bd35d31..bf72fc8ade61f 100644
--- a/examples/model_selection/plot_det.py
+++ b/examples/model_selection/plot_det.py
@@ -33,6 +33,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Generate synthetic data
 # -----------------------
diff --git a/examples/model_selection/plot_grid_search_digits.py b/examples/model_selection/plot_grid_search_digits.py
index ec4360692aaf3..f9d7adc2a404b 100644
--- a/examples/model_selection/plot_grid_search_digits.py
+++ b/examples/model_selection/plot_grid_search_digits.py
@@ -15,6 +15,9 @@
 sections on :ref:`cross_validation` and :ref:`grid_search`.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # The dataset
 # -----------
diff --git a/examples/model_selection/plot_grid_search_refit_callable.py b/examples/model_selection/plot_grid_search_refit_callable.py
index a851ee5f9bb19..9a8bf3c70d9cc 100644
--- a/examples/model_selection/plot_grid_search_refit_callable.py
+++ b/examples/model_selection/plot_grid_search_refit_callable.py
@@ -18,6 +18,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Wenhao Zhang <wenhaoz@ucla.edu>
 
 import matplotlib.pyplot as plt
diff --git a/examples/model_selection/plot_grid_search_stats.py b/examples/model_selection/plot_grid_search_stats.py
index 9335b12055515..a4f1c8e1417ba 100644
--- a/examples/model_selection/plot_grid_search_stats.py
+++ b/examples/model_selection/plot_grid_search_stats.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # We will start by simulating moon shaped data (where the ideal separation
 # between classes is non-linear), adding to it a moderate degree of noise.
diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py
index 450392679095f..d8060c67cbe15 100644
--- a/examples/model_selection/plot_learning_curve.py
+++ b/examples/model_selection/plot_learning_curve.py
@@ -13,6 +13,9 @@
 accuracy.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Learning Curve
 # ==============
diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py
index 9a3f29def9e98..2fc3ad3d040f5 100644
--- a/examples/model_selection/plot_likelihood_ratios.py
+++ b/examples/model_selection/plot_likelihood_ratios.py
@@ -25,6 +25,9 @@ class proportion than the target application.
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Authors:  Arturo Amor <david-arturo.amor-quiroz@inria.fr>
 #           Olivier Grisel <olivier.grisel@ensta.org>
 # %%
diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py
index 38defdadf4165..15082123761af 100644
--- a/examples/model_selection/plot_nested_cross_validation_iris.py
+++ b/examples/model_selection/plot_nested_cross_validation_iris.py
@@ -44,6 +44,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 from matplotlib import pyplot as plt
 
diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py
index 19a93c7324cbb..bb7c79ff27da2 100644
--- a/examples/model_selection/plot_precision_recall.py
+++ b/examples/model_selection/plot_precision_recall.py
@@ -92,6 +92,9 @@
              :func:`sklearn.metrics.f1_score`
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # In binary classification settings
 # ---------------------------------
diff --git a/examples/model_selection/plot_randomized_search.py b/examples/model_selection/plot_randomized_search.py
index 140b359ff1934..7acd3a5550acf 100644
--- a/examples/model_selection/plot_randomized_search.py
+++ b/examples/model_selection/plot_randomized_search.py
@@ -20,6 +20,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from time import time
 
 import numpy as np
diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py
index 5a94afcdf1edf..1b2a9760342a3 100644
--- a/examples/model_selection/plot_roc.py
+++ b/examples/model_selection/plot_roc.py
@@ -33,6 +33,9 @@
     curves and their respective AUC.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load and prepare data
 # =====================
diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py
index 962b39754f8bd..fb6432a71ed79 100644
--- a/examples/model_selection/plot_roc_crossval.py
+++ b/examples/model_selection/plot_roc_crossval.py
@@ -27,6 +27,9 @@
     generalize the metrics for multiclass classifiers.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load and prepare data
 # =====================
diff --git a/examples/model_selection/plot_successive_halving_heatmap.py b/examples/model_selection/plot_successive_halving_heatmap.py
index 9b079e4b1351f..4d9b676443e5e 100644
--- a/examples/model_selection/plot_successive_halving_heatmap.py
+++ b/examples/model_selection/plot_successive_halving_heatmap.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from time import time
 
 import matplotlib.pyplot as plt
diff --git a/examples/model_selection/plot_successive_halving_iterations.py b/examples/model_selection/plot_successive_halving_iterations.py
index 31805d308e269..31c1a0b9d5b34 100644
--- a/examples/model_selection/plot_successive_halving_iterations.py
+++ b/examples/model_selection/plot_successive_halving_iterations.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
diff --git a/examples/model_selection/plot_tuned_decision_threshold.py b/examples/model_selection/plot_tuned_decision_threshold.py
index 7e997ee255e4d..59986a3910d00 100644
--- a/examples/model_selection/plot_tuned_decision_threshold.py
+++ b/examples/model_selection/plot_tuned_decision_threshold.py
@@ -14,6 +14,9 @@
 threshold, depending on a metric of interest.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # The diabetes dataset
 # --------------------
diff --git a/examples/model_selection/plot_underfitting_overfitting.py b/examples/model_selection/plot_underfitting_overfitting.py
index 412946fc9ca8b..a6151cd6b3c20 100644
--- a/examples/model_selection/plot_underfitting_overfitting.py
+++ b/examples/model_selection/plot_underfitting_overfitting.py
@@ -21,6 +21,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py
index 947d8ac2b2fdb..44a382fed0c17 100644
--- a/examples/model_selection/plot_validation_curve.py
+++ b/examples/model_selection/plot_validation_curve.py
@@ -13,6 +13,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/multiclass/plot_multiclass_overview.py b/examples/multiclass/plot_multiclass_overview.py
index 9ef5405512b67..1a0fddc40571d 100644
--- a/examples/multiclass/plot_multiclass_overview.py
+++ b/examples/multiclass/plot_multiclass_overview.py
@@ -20,6 +20,9 @@
 will review them.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # The Yeast UCI dataset
 # ---------------------
diff --git a/examples/neighbors/plot_classification.py b/examples/neighbors/plot_classification.py
index 43c45558054cf..1754869943ac7 100644
--- a/examples/neighbors/plot_classification.py
+++ b/examples/neighbors/plot_classification.py
@@ -8,6 +8,9 @@
 decision boundary obtained with regards to the parameter `weights`.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load the data
 # -------------
diff --git a/examples/neighbors/plot_digits_kde_sampling.py b/examples/neighbors/plot_digits_kde_sampling.py
index 045058eab09cc..d4860f117e4e9 100644
--- a/examples/neighbors/plot_digits_kde_sampling.py
+++ b/examples/neighbors/plot_digits_kde_sampling.py
@@ -11,6 +11,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/neighbors/plot_kde_1d.py b/examples/neighbors/plot_kde_1d.py
index fc5b1914f23de..83734ec70bcfc 100644
--- a/examples/neighbors/plot_kde_1d.py
+++ b/examples/neighbors/plot_kde_1d.py
@@ -28,6 +28,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Jake Vanderplas <jakevdp@cs.washington.edu>
 #
 import matplotlib.pyplot as plt
diff --git a/examples/neighbors/plot_lof_novelty_detection.py b/examples/neighbors/plot_lof_novelty_detection.py
index 789efa66c7b5c..9743ec4e3df2f 100644
--- a/examples/neighbors/plot_lof_novelty_detection.py
+++ b/examples/neighbors/plot_lof_novelty_detection.py
@@ -25,6 +25,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib
 import matplotlib.lines as mlines
 import matplotlib.pyplot as plt
diff --git a/examples/neighbors/plot_lof_outlier_detection.py b/examples/neighbors/plot_lof_outlier_detection.py
index edb79294ce594..9b5e92579625b 100644
--- a/examples/neighbors/plot_lof_outlier_detection.py
+++ b/examples/neighbors/plot_lof_outlier_detection.py
@@ -22,6 +22,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Generate data with outliers
 # ---------------------------
diff --git a/examples/neighbors/plot_nca_classification.py b/examples/neighbors/plot_nca_classification.py
index 0703caf90056e..496038cec7f88 100644
--- a/examples/neighbors/plot_nca_classification.py
+++ b/examples/neighbors/plot_nca_classification.py
@@ -15,6 +15,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # SPDX-License-Identifier: BSD-3-Clause
 
 import matplotlib.pyplot as plt
diff --git a/examples/neighbors/plot_nca_dim_reduction.py b/examples/neighbors/plot_nca_dim_reduction.py
index 415618f6bc4c7..1e6305549a46c 100644
--- a/examples/neighbors/plot_nca_dim_reduction.py
+++ b/examples/neighbors/plot_nca_dim_reduction.py
@@ -28,6 +28,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # SPDX-License-Identifier: BSD-3-Clause
 
 import matplotlib.pyplot as plt
diff --git a/examples/neighbors/plot_nca_illustration.py b/examples/neighbors/plot_nca_illustration.py
index 7085817f4e9fa..8d847f28d6e4d 100644
--- a/examples/neighbors/plot_nca_illustration.py
+++ b/examples/neighbors/plot_nca_illustration.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # SPDX-License-Identifier: BSD-3-Clause
 
 import matplotlib.pyplot as plt
diff --git a/examples/neighbors/plot_nearest_centroid.py b/examples/neighbors/plot_nearest_centroid.py
index c8f710d0a0377..1718e213f9252 100644
--- a/examples/neighbors/plot_nearest_centroid.py
+++ b/examples/neighbors/plot_nearest_centroid.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib.colors import ListedColormap
diff --git a/examples/neural_networks/plot_mlp_training_curves.py b/examples/neural_networks/plot_mlp_training_curves.py
index 8ee285877caa8..7b63d0de1adfe 100644
--- a/examples/neural_networks/plot_mlp_training_curves.py
+++ b/examples/neural_networks/plot_mlp_training_curves.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 
 import matplotlib.pyplot as plt
diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py
index f37452a757d20..889e78e2e5e5b 100644
--- a/examples/neural_networks/plot_mnist_filters.py
+++ b/examples/neural_networks/plot_mnist_filters.py
@@ -24,6 +24,9 @@
 to build this documentation on a regular basis.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 
 import matplotlib.pyplot as plt
diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py
index 04a461a61799f..310089e296883 100644
--- a/examples/preprocessing/plot_discretization_classification.py
+++ b/examples/preprocessing/plot_discretization_classification.py
@@ -28,6 +28,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Tom Dupré la Tour
 # Adapted from plot_classifier_comparison by Gaël Varoquaux and Andreas Müller
 #
diff --git a/examples/preprocessing/plot_target_encoder.py b/examples/preprocessing/plot_target_encoder.py
index 98b73a9529679..04f3222d4e512 100644
--- a/examples/preprocessing/plot_target_encoder.py
+++ b/examples/preprocessing/plot_target_encoder.py
@@ -16,6 +16,9 @@
     :ref:`User Guide <target_encoder>`. for details.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Loading Data from OpenML
 # ========================
diff --git a/examples/preprocessing/plot_target_encoder_cross_val.py b/examples/preprocessing/plot_target_encoder_cross_val.py
index 7244a1bf61cd6..3d51664710096 100644
--- a/examples/preprocessing/plot_target_encoder_cross_val.py
+++ b/examples/preprocessing/plot_target_encoder_cross_val.py
@@ -16,6 +16,9 @@
 fitting procedure to prevent overfitting.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Create Synthetic Dataset
 # ========================
diff --git a/examples/release_highlights/plot_release_highlights_0_22_0.py b/examples/release_highlights/plot_release_highlights_0_22_0.py
index 2e4c9185365a9..26763110402ec 100644
--- a/examples/release_highlights/plot_release_highlights_0_22_0.py
+++ b/examples/release_highlights/plot_release_highlights_0_22_0.py
@@ -20,6 +20,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # New plotting API
 # ----------------
diff --git a/examples/semi_supervised/plot_semi_supervised_newsgroups.py b/examples/semi_supervised/plot_semi_supervised_newsgroups.py
index 19bcb13c5a99b..1ad7bf85953e7 100644
--- a/examples/semi_supervised/plot_semi_supervised_newsgroups.py
+++ b/examples/semi_supervised/plot_semi_supervised_newsgroups.py
@@ -11,6 +11,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 from sklearn.datasets import fetch_20newsgroups
diff --git a/examples/svm/plot_custom_kernel.py b/examples/svm/plot_custom_kernel.py
index cacd67ed056ac..d3816849f73b8 100644
--- a/examples/svm/plot_custom_kernel.py
+++ b/examples/svm/plot_custom_kernel.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/svm/plot_iris_svc.py b/examples/svm/plot_iris_svc.py
index d13a9fe49c803..77259f9d1ea2c 100644
--- a/examples/svm/plot_iris_svc.py
+++ b/examples/svm/plot_iris_svc.py
@@ -34,6 +34,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 from sklearn import datasets, svm
diff --git a/examples/svm/plot_linearsvc_support_vectors.py b/examples/svm/plot_linearsvc_support_vectors.py
index 7f82b6c8bb0fe..021e1c6b55962 100644
--- a/examples/svm/plot_linearsvc_support_vectors.py
+++ b/examples/svm/plot_linearsvc_support_vectors.py
@@ -9,6 +9,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/svm/plot_oneclass.py b/examples/svm/plot_oneclass.py
index 4f44f42fe338e..0db71966db6a9 100644
--- a/examples/svm/plot_oneclass.py
+++ b/examples/svm/plot_oneclass.py
@@ -11,6 +11,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 import numpy as np
 
diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py
index ba0154b477b46..356707e2d72b2 100644
--- a/examples/svm/plot_rbf_parameters.py
+++ b/examples/svm/plot_rbf_parameters.py
@@ -75,6 +75,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Utility class to move the midpoint of a colormap to be around
 # the values of interest.
diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py
index 23f464169f516..842da314feb1a 100644
--- a/examples/svm/plot_separating_hyperplane.py
+++ b/examples/svm/plot_separating_hyperplane.py
@@ -9,6 +9,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 from sklearn import svm
diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py
index f9c615cc43d4f..d0814e1af065f 100644
--- a/examples/svm/plot_separating_hyperplane_unbalanced.py
+++ b/examples/svm/plot_separating_hyperplane_unbalanced.py
@@ -25,6 +25,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.lines as mlines
 import matplotlib.pyplot as plt
 
diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py
index 3d5a934bf4884..1c2a78e79fdb9 100644
--- a/examples/svm/plot_svm_anova.py
+++ b/examples/svm/plot_svm_anova.py
@@ -10,6 +10,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Load some data to play with
 # ---------------------------
diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py
index 9d859864084f0..480293a03b9f0 100644
--- a/examples/svm/plot_svm_kernels.py
+++ b/examples/svm/plot_svm_kernels.py
@@ -36,6 +36,9 @@
 kernel (`"rbf"`) and the sigmoid kernel (`"sigmoid"`).
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # SPDX-License-Identifier: BSD-3-Clause
 
diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py
index de293f61b848d..671b7f4902bc8 100644
--- a/examples/svm/plot_svm_margin.py
+++ b/examples/svm/plot_svm_margin.py
@@ -13,6 +13,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # SPDX-License-Identifier: BSD-3-Clause
diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py
index ab34528a37af6..5da00ef1f88b7 100644
--- a/examples/svm/plot_svm_regression.py
+++ b/examples/svm/plot_svm_regression.py
@@ -7,6 +7,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/svm/plot_svm_tie_breaking.py b/examples/svm/plot_svm_tie_breaking.py
index 0a43d4125aec5..a9276efd44572 100644
--- a/examples/svm/plot_svm_tie_breaking.py
+++ b/examples/svm/plot_svm_tie_breaking.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Code source: Andreas Mueller, Adrin Jalali
 # SPDX-License-Identifier: BSD-3-Clause
 
diff --git a/examples/svm/plot_weighted_samples.py b/examples/svm/plot_weighted_samples.py
index c17742e091390..0fb5bedd6e59b 100644
--- a/examples/svm/plot_weighted_samples.py
+++ b/examples/svm/plot_weighted_samples.py
@@ -14,6 +14,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/tree/plot_cost_complexity_pruning.py b/examples/tree/plot_cost_complexity_pruning.py
index b232389ea9ded..bdd1a2b0c358f 100644
--- a/examples/tree/plot_cost_complexity_pruning.py
+++ b/examples/tree/plot_cost_complexity_pruning.py
@@ -17,6 +17,9 @@
 See also :ref:`minimal_cost_complexity_pruning` for details on pruning.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 
 from sklearn.datasets import load_breast_cancer
diff --git a/examples/tree/plot_iris_dtc.py b/examples/tree/plot_iris_dtc.py
index 61c3bec3460ed..9d4298919d515 100644
--- a/examples/tree/plot_iris_dtc.py
+++ b/examples/tree/plot_iris_dtc.py
@@ -15,6 +15,9 @@
 We also show the tree structure of a model built on all of the features.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # First load the copy of the Iris dataset shipped with scikit-learn:
 from sklearn.datasets import load_iris
diff --git a/examples/tree/plot_tree_regression.py b/examples/tree/plot_tree_regression.py
index 5a3da0b7b6d06..c499e95f428c4 100644
--- a/examples/tree/plot_tree_regression.py
+++ b/examples/tree/plot_tree_regression.py
@@ -14,6 +14,9 @@
 details of the training data and learn from the noise, i.e. they overfit.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Import the necessary modules and libraries
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/examples/tree/plot_tree_regression_multioutput.py b/examples/tree/plot_tree_regression_multioutput.py
index b6d2800d2732d..0fed498c0087e 100644
--- a/examples/tree/plot_tree_regression_multioutput.py
+++ b/examples/tree/plot_tree_regression_multioutput.py
@@ -15,6 +15,9 @@
 details of the training data and learn from the noise, i.e. they overfit.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/tree/plot_unveil_tree_structure.py b/examples/tree/plot_unveil_tree_structure.py
index 19b7c643ec0f7..e87c9278450ad 100644
--- a/examples/tree/plot_unveil_tree_structure.py
+++ b/examples/tree/plot_unveil_tree_structure.py
@@ -16,6 +16,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 from matplotlib import pyplot as plt
 
diff --git a/setup.py b/setup.py
index 7e67210736305..97e7a67458180 100755
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 #! /usr/bin/env python
 #
 # Authors: The scikit-learn developers
diff --git a/sklearn/__check_build/__init__.py b/sklearn/__check_build/__init__.py
index ad1a3a818b14d..0f6508ece1c4d 100644
--- a/sklearn/__check_build/__init__.py
+++ b/sklearn/__check_build/__init__.py
@@ -2,6 +2,9 @@
 compile scikit-learn properly.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import os
 
 INPLACE_MSG = """
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index a61a2afde8855..03b375a47335e 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -1,5 +1,8 @@
 """Configure global settings and get information about the working environment."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Machine learning module for Python
 # ==================================
 #
diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py
index ceb72441000c3..4e08b5dc873d7 100644
--- a/sklearn/_build_utils/__init__.py
+++ b/sklearn/_build_utils/__init__.py
@@ -2,6 +2,9 @@
 Utilities useful during the build.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # author: Andy Mueller, Gael Varoquaux
 # license: BSD
 
diff --git a/sklearn/_build_utils/openmp_helpers.py b/sklearn/_build_utils/openmp_helpers.py
index 66e6089e33fef..6380258da391d 100644
--- a/sklearn/_build_utils/openmp_helpers.py
+++ b/sklearn/_build_utils/openmp_helpers.py
@@ -1,5 +1,8 @@
 """Helpers for OpenMP support during the build."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # This code is adapted for a large part from the astropy openmp helpers, which
 # can be found at: https://github.com/astropy/extension-helpers/blob/master/extension_helpers/_openmp_helpers.py  # noqa
 
diff --git a/sklearn/_build_utils/pre_build_helpers.py b/sklearn/_build_utils/pre_build_helpers.py
index 73adb26f5416b..ae05c8bf10957 100644
--- a/sklearn/_build_utils/pre_build_helpers.py
+++ b/sklearn/_build_utils/pre_build_helpers.py
@@ -1,5 +1,8 @@
 """Helpers to check build environment before actual build of scikit-learn"""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import glob
 import os
 import subprocess
diff --git a/sklearn/_build_utils/tempita.py b/sklearn/_build_utils/tempita.py
index 8da4b9c0e7ace..fd59fe51c7ba6 100644
--- a/sklearn/_build_utils/tempita.py
+++ b/sklearn/_build_utils/tempita.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import argparse
 import os
 
diff --git a/sklearn/_build_utils/version.py b/sklearn/_build_utils/version.py
index 49a3cfb82bebd..0616f8c058c2b 100644
--- a/sklearn/_build_utils/version.py
+++ b/sklearn/_build_utils/version.py
@@ -1,6 +1,9 @@
 #!/usr/bin/env python
 """Extract version number from __init__.py"""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import os
 
 sklearn_init = os.path.join(os.path.dirname(__file__), "../__init__.py")
diff --git a/sklearn/_config.py b/sklearn/_config.py
index fc9392de68df6..05549c88a9ddc 100644
--- a/sklearn/_config.py
+++ b/sklearn/_config.py
@@ -1,5 +1,8 @@
 """Global configuration state and functions for management"""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import os
 import threading
 from contextlib import contextmanager as contextmanager
diff --git a/sklearn/_distributor_init.py b/sklearn/_distributor_init.py
index f0901034e83e4..d66d5d36955c1 100644
--- a/sklearn/_distributor_init.py
+++ b/sklearn/_distributor_init.py
@@ -8,3 +8,6 @@
 The scikit-learn standard source distribution will not put code in this file,
 so you can safely replace this file with your own version.
 """
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/sklearn/_loss/__init__.py b/sklearn/_loss/__init__.py
index ee15e693c16f6..bc348bbca8a15 100644
--- a/sklearn/_loss/__init__.py
+++ b/sklearn/_loss/__init__.py
@@ -3,6 +3,9 @@
 fitting classification and regression tasks.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from .loss import (
     AbsoluteError,
     HalfBinomialLoss,
diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py
index a6560d58d91e6..1bb290aecc64a 100644
--- a/sklearn/_loss/link.py
+++ b/sklearn/_loss/link.py
@@ -2,6 +2,9 @@
 Module contains classes for invertible (and differentiable) link functions.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Christian Lorentzen <lorentzen.ch@gmail.com>
 
 from abc import ABC, abstractmethod
diff --git a/sklearn/_loss/loss.py b/sklearn/_loss/loss.py
index 96863cc00fe01..b45ff3322699a 100644
--- a/sklearn/_loss/loss.py
+++ b/sklearn/_loss/loss.py
@@ -6,6 +6,9 @@
 classification.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Goals:
 # - Provide a common private module for loss functions/classes.
 # - To be used in:
diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py
index 9c108791b45bc..f7df37bedda0c 100644
--- a/sklearn/_min_dependencies.py
+++ b/sklearn/_min_dependencies.py
@@ -1,5 +1,8 @@
 """All minimum dependencies for scikit-learn."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import argparse
 from collections import defaultdict
 
diff --git a/sklearn/cluster/__init__.py b/sklearn/cluster/__init__.py
index 5d497d2976ad1..a0545d3b90d56 100644
--- a/sklearn/cluster/__init__.py
+++ b/sklearn/cluster/__init__.py
@@ -1,5 +1,8 @@
 """Popular unsupervised clustering algorithms."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._affinity_propagation import AffinityPropagation, affinity_propagation
 from ._agglomerative import (
     AgglomerativeClustering,
diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 55215d2a0312c..68fa315f11634 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -8,6 +8,9 @@
 License: BSD 3 clause
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from heapq import heapify, heappop, heappush, heappushpop
 from numbers import Integral, Real
diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py
index 1d4a9e1d84c26..d615447d913eb 100644
--- a/sklearn/cluster/_bisect_k_means.py
+++ b/sklearn/cluster/_bisect_k_means.py
@@ -1,5 +1,8 @@
 """Bisecting K-means clustering."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Michal Krawczyk <mkrwczyk.1@gmail.com>
 
 import warnings
diff --git a/sklearn/cluster/_hdbscan/__init__.py b/sklearn/cluster/_hdbscan/__init__.py
index e69de29bb2d1d..67dd18fb94b59 100644
--- a/sklearn/cluster/_hdbscan/__init__.py
+++ b/sklearn/cluster/_hdbscan/__init__.py
@@ -0,0 +1,2 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py
index 9933318313cc8..f999343dd875d 100644
--- a/sklearn/cluster/_hdbscan/hdbscan.py
+++ b/sklearn/cluster/_hdbscan/hdbscan.py
@@ -3,6 +3,9 @@
          of Applications with Noise
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Authors: Leland McInnes <leland.mcinnes@gmail.com>
 #          Steve Astels <sastels@gmail.com>
 #          John Healy <jchealy@gmail.com>
diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
index a99a607f3cf0d..db5c22d9650bf 100644
--- a/sklearn/cluster/_mean_shift.py
+++ b/sklearn/cluster/_mean_shift.py
@@ -9,6 +9,9 @@
 Seeding is performed using a binning technique for scalability.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Authors: Conrad Lee <conradlee@gmail.com>
 #          Alexandre Gramfort <alexandre.gramfort@inria.fr>
 #          Gael Varoquaux <gael.varoquaux@normalesup.org>
diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py
index b2a0c4d642a00..46f795e94ffb2 100755
--- a/sklearn/cluster/_optics.py
+++ b/sklearn/cluster/_optics.py
@@ -10,6 +10,9 @@
 License: BSD 3 clause
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from numbers import Integral, Real
 
diff --git a/sklearn/compose/__init__.py b/sklearn/compose/__init__.py
index e6e8fc6fd2c1b..9f20bc9856074 100644
--- a/sklearn/compose/__init__.py
+++ b/sklearn/compose/__init__.py
@@ -5,6 +5,9 @@
 :class:`~sklearn.pipeline.FeatureUnion`.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._column_transformer import (
     ColumnTransformer,
     make_column_selector,
diff --git a/sklearn/conftest.py b/sklearn/conftest.py
index 203c524561fdd..6df1d5ba473f6 100644
--- a/sklearn/conftest.py
+++ b/sklearn/conftest.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import builtins
 import platform
 import sys
diff --git a/sklearn/covariance/__init__.py b/sklearn/covariance/__init__.py
index 19ee1d8a09759..989f3372b42e0 100644
--- a/sklearn/covariance/__init__.py
+++ b/sklearn/covariance/__init__.py
@@ -5,6 +5,9 @@
 closely related to the theory of Gaussian graphical models.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._elliptic_envelope import EllipticEnvelope
 from ._empirical_covariance import (
     EmpiricalCovariance,
diff --git a/sklearn/cross_decomposition/__init__.py b/sklearn/cross_decomposition/__init__.py
index 15eb21467daee..cad873ed800c6 100644
--- a/sklearn/cross_decomposition/__init__.py
+++ b/sklearn/cross_decomposition/__init__.py
@@ -1,5 +1,8 @@
 """Algorithms for cross decomposition."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._pls import CCA, PLSSVD, PLSCanonical, PLSRegression
 
 __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"]
diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py
index 58cddb099faff..0ba2cbe88efc4 100644
--- a/sklearn/datasets/__init__.py
+++ b/sklearn/datasets/__init__.py
@@ -1,5 +1,8 @@
 """Utilities to load popular datasets and artificial data generators."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import textwrap
 
 from ._base import (
diff --git a/sklearn/datasets/_arff_parser.py b/sklearn/datasets/_arff_parser.py
index 86dfeb37a6ef5..fb6e629a73c8d 100644
--- a/sklearn/datasets/_arff_parser.py
+++ b/sklearn/datasets/_arff_parser.py
@@ -1,5 +1,8 @@
 """Implementation of ARFF parsers: via LIAC-ARFF and pandas."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import itertools
 import re
 from collections import OrderedDict
diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py
index 597fb9c9dece3..ab4db0522ef20 100644
--- a/sklearn/datasets/_kddcup99.py
+++ b/sklearn/datasets/_kddcup99.py
@@ -8,6 +8,9 @@
 
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import errno
 import logging
 import os
diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py
index a423928ffff40..e270676272b03 100644
--- a/sklearn/datasets/_openml.py
+++ b/sklearn/datasets/_openml.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import gzip
 import hashlib
 import json
diff --git a/sklearn/datasets/data/__init__.py b/sklearn/datasets/data/__init__.py
index e69de29bb2d1d..67dd18fb94b59 100644
--- a/sklearn/datasets/data/__init__.py
+++ b/sklearn/datasets/data/__init__.py
@@ -0,0 +1,2 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/sklearn/datasets/descr/__init__.py b/sklearn/datasets/descr/__init__.py
index e69de29bb2d1d..67dd18fb94b59 100644
--- a/sklearn/datasets/descr/__init__.py
+++ b/sklearn/datasets/descr/__init__.py
@@ -0,0 +1,2 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/sklearn/datasets/images/__init__.py b/sklearn/datasets/images/__init__.py
index e69de29bb2d1d..67dd18fb94b59 100644
--- a/sklearn/datasets/images/__init__.py
+++ b/sklearn/datasets/images/__init__.py
@@ -0,0 +1,2 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py
index 4730bda31ebbd..cd013fe9c7a93 100644
--- a/sklearn/decomposition/__init__.py
+++ b/sklearn/decomposition/__init__.py
@@ -4,6 +4,9 @@
 regarded as dimensionality reduction techniques.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ..utils.extmath import randomized_svd
 from ._dict_learning import (
     DictionaryLearning,
diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py
index 4f91483a468a9..9b6f32f2e1926 100644
--- a/sklearn/decomposition/_lda.py
+++ b/sklearn/decomposition/_lda.py
@@ -8,6 +8,9 @@
 Link: https://github.com/blei-lab/onlineldavb
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Chyi-Kwei Yau
 # Author: Matthew D. Hoffman (original onlineldavb implementation)
 from numbers import Integral, Real
diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py
index d978191f104f7..b50ed239c6b19 100644
--- a/sklearn/decomposition/_truncated_svd.py
+++ b/sklearn/decomposition/_truncated_svd.py
@@ -1,5 +1,8 @@
 """Truncated SVD for sparse matrices, aka latent semantic analysis (LSA)."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Lars Buitinck
 #         Olivier Grisel <olivier.grisel@ensta.org>
 #         Michael Becker <mike@beckerfuffle.com>
diff --git a/sklearn/ensemble/__init__.py b/sklearn/ensemble/__init__.py
index e49d744ed6391..2a8cf413be9da 100644
--- a/sklearn/ensemble/__init__.py
+++ b/sklearn/ensemble/__init__.py
@@ -1,5 +1,8 @@
 """Ensemble-based methods for classification, regression and anomaly detection."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._bagging import BaggingClassifier, BaggingRegressor
 from ._base import BaseEnsemble
 from ._forest import (
diff --git a/sklearn/ensemble/_hist_gradient_boosting/__init__.py b/sklearn/ensemble/_hist_gradient_boosting/__init__.py
index 879fae1189f87..5939d83c84838 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/__init__.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/__init__.py
@@ -3,3 +3,6 @@
 The implementation is a port from pygbm which is itself strongly inspired
 from LightGBM.
 """
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py
index d23f6e7b00a82..f5bfa7d531094 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/binning.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py
@@ -6,6 +6,9 @@
 approximately the same number of samples.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Nicolas Hug
 
 import numpy as np
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 78f8456e969de..043321de569a8 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -1,5 +1,8 @@
 """Fast Gradient Boosting decision trees for classification and regression."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Nicolas Hug
 
 import itertools
diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index 419e2f26c2653..98de6cd428e67 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -5,6 +5,9 @@
 the gradients and hessians of the training data.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Nicolas Hug
 
 import numbers
diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
index 799c25aadcec3..7ab9542ef6e0f 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
@@ -2,6 +2,9 @@
 This module contains the TreePredictor class which is used for prediction.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Nicolas Hug
 
 import numpy as np
diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.py b/sklearn/ensemble/_hist_gradient_boosting/utils.py
index 1ff17217164c8..429fbed611c22 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/utils.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/utils.py
@@ -1,5 +1,8 @@
 """This module contains utility routines."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ...base import is_classifier
 from .binning import _BinMapper
 
diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py
index 7f6b26163cc87..caba4e174817a 100644
--- a/sklearn/exceptions.py
+++ b/sklearn/exceptions.py
@@ -1,5 +1,8 @@
 """Custom warnings and errors used across scikit-learn."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 __all__ = [
     "NotFittedError",
     "ConvergenceWarning",
diff --git a/sklearn/experimental/__init__.py b/sklearn/experimental/__init__.py
index 205a11a3d524b..593d247e5bc40 100644
--- a/sklearn/experimental/__init__.py
+++ b/sklearn/experimental/__init__.py
@@ -5,3 +5,6 @@
     The features and estimators that are experimental aren't subject to
     deprecation cycles. Use them at your own risks!
 """
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/sklearn/experimental/enable_halving_search_cv.py b/sklearn/experimental/enable_halving_search_cv.py
index dd399ef35b6f7..85f93b26459d0 100644
--- a/sklearn/experimental/enable_halving_search_cv.py
+++ b/sklearn/experimental/enable_halving_search_cv.py
@@ -19,6 +19,9 @@
 flake8 to ignore the import, which appears as unused.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from .. import model_selection
 from ..model_selection._search_successive_halving import (
     HalvingGridSearchCV,
diff --git a/sklearn/experimental/enable_hist_gradient_boosting.py b/sklearn/experimental/enable_hist_gradient_boosting.py
index 6fa4512ce39c6..9269b2d0b6d6c 100644
--- a/sklearn/experimental/enable_hist_gradient_boosting.py
+++ b/sklearn/experimental/enable_hist_gradient_boosting.py
@@ -7,6 +7,9 @@
 normally from `sklearn.ensemble`.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Don't remove this file, we don't want to break users code just because the
 # feature isn't experimental anymore.
 
diff --git a/sklearn/experimental/enable_iterative_imputer.py b/sklearn/experimental/enable_iterative_imputer.py
index 0b906961ca184..544e0d60eea28 100644
--- a/sklearn/experimental/enable_iterative_imputer.py
+++ b/sklearn/experimental/enable_iterative_imputer.py
@@ -12,6 +12,9 @@
     >>> from sklearn.impute import IterativeImputer
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from .. import impute
 from ..impute._iterative import IterativeImputer
 
diff --git a/sklearn/feature_extraction/__init__.py b/sklearn/feature_extraction/__init__.py
index dced9d46b4cc7..621c8683f441e 100644
--- a/sklearn/feature_extraction/__init__.py
+++ b/sklearn/feature_extraction/__init__.py
@@ -1,5 +1,8 @@
 """Feature extraction from raw data."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from . import text
 from ._dict_vectorizer import DictVectorizer
 from ._hash import FeatureHasher
diff --git a/sklearn/feature_extraction/_stop_words.py b/sklearn/feature_extraction/_stop_words.py
index 37ae02a0f36c5..ac5c9f495ae84 100644
--- a/sklearn/feature_extraction/_stop_words.py
+++ b/sklearn/feature_extraction/_stop_words.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 # This list of English stop words is taken from the "Glasgow Information
 # Retrieval Group". The original list can be found at
 # http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words
diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py
index d431a5b016807..fbb8f54350630 100644
--- a/sklearn/feature_selection/__init__.py
+++ b/sklearn/feature_selection/__init__.py
@@ -4,6 +4,9 @@
 algorithm.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._base import SelectorMixin
 from ._from_model import SelectFromModel
 from ._mutual_info import mutual_info_classif, mutual_info_regression
diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py
index f3808068f46a5..42afdfb10dc3a 100644
--- a/sklearn/feature_selection/_mutual_info.py
+++ b/sklearn/feature_selection/_mutual_info.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 # Author: Nikolay Mayorov <n59_ru@hotmail.com>
 # License: 3-clause BSD
 
diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py
index 9c393724f9cea..471f9a373a3da 100644
--- a/sklearn/feature_selection/_sequential.py
+++ b/sklearn/feature_selection/_sequential.py
@@ -2,6 +2,9 @@
 Sequential feature selection
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from numbers import Integral, Real
 
 import numpy as np
diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py
index f97c75db1e34b..a9b360037590f 100644
--- a/sklearn/feature_selection/_variance_threshold.py
+++ b/sklearn/feature_selection/_variance_threshold.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 # Author: Lars Buitinck
 # License: 3-clause BSD
 from numbers import Real
diff --git a/sklearn/impute/__init__.py b/sklearn/impute/__init__.py
index a40c6b432d6d5..2f9ed9017c6cb 100644
--- a/sklearn/impute/__init__.py
+++ b/sklearn/impute/__init__.py
@@ -1,5 +1,8 @@
 """Transformers for missing value imputation."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import typing
 
 from ._base import MissingIndicator, SimpleImputer
diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py
index 41f903061c34d..22cede929ebb3 100644
--- a/sklearn/impute/_iterative.py
+++ b/sklearn/impute/_iterative.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import warnings
 from collections import namedtuple
 from numbers import Integral, Real
diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py
index 312a19d2c9bce..8bb2b5dc575e9 100644
--- a/sklearn/inspection/__init__.py
+++ b/sklearn/inspection/__init__.py
@@ -1,5 +1,8 @@
 """Tools for model inspection."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._partial_dependence import partial_dependence
 from ._permutation_importance import permutation_importance
 from ._plot.decision_boundary import DecisionBoundaryDisplay
diff --git a/sklearn/inspection/_pd_utils.py b/sklearn/inspection/_pd_utils.py
index 76f4d626fd53c..4d890212e2838 100644
--- a/sklearn/inspection/_pd_utils.py
+++ b/sklearn/inspection/_pd_utils.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 def _check_feature_names(X, feature_names=None):
     """Check feature names.
 
diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py
index 659db143153cc..8ecd7237b077d 100644
--- a/sklearn/inspection/_permutation_importance.py
+++ b/sklearn/inspection/_permutation_importance.py
@@ -1,5 +1,8 @@
 """Permutation importance for estimators."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numbers
 
 import numpy as np
diff --git a/sklearn/inspection/_plot/__init__.py b/sklearn/inspection/_plot/__init__.py
index e69de29bb2d1d..67dd18fb94b59 100644
--- a/sklearn/inspection/_plot/__init__.py
+++ b/sklearn/inspection/_plot/__init__.py
@@ -0,0 +1,2 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py
index 92e1a2527400e..b87316f670cc9 100644
--- a/sklearn/inspection/_plot/decision_boundary.py
+++ b/sklearn/inspection/_plot/decision_boundary.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numpy as np
 
 from ...base import is_regressor
diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py
index 3d516d727192e..8a5118df6862b 100644
--- a/sklearn/inspection/_plot/partial_dependence.py
+++ b/sklearn/inspection/_plot/partial_dependence.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numbers
 from itertools import chain
 from math import ceil
diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index a2b0ffba0e728..182cd94b0260d 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -1,5 +1,8 @@
 """A variety of linear models."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # See http://scikit-learn.sourceforge.net/modules/sgd.html and
 # http://scikit-learn.sourceforge.net/modules/linear_model.html for
 # complete documentation.
diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py
index e8c1466b30623..cfac0a2739115 100644
--- a/sklearn/linear_model/_linear_loss.py
+++ b/sklearn/linear_model/_linear_loss.py
@@ -2,6 +2,9 @@
 Loss functions for linear models with raw_prediction = X @ coef
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 from scipy import sparse
 
diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
index 23daa86f2146d..6362a7fa45395 100644
--- a/sklearn/linear_model/_logistic.py
+++ b/sklearn/linear_model/_logistic.py
@@ -2,6 +2,9 @@
 Logistic Regression
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
 #         Fabian Pedregosa <f@bianp.net>
 #         Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
diff --git a/sklearn/manifold/__init__.py b/sklearn/manifold/__init__.py
index b3f53e84b04fb..2266b6e08af88 100644
--- a/sklearn/manifold/__init__.py
+++ b/sklearn/manifold/__init__.py
@@ -1,5 +1,8 @@
 """Data embedding techniques."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._isomap import Isomap
 from ._locally_linear import LocallyLinearEmbedding, locally_linear_embedding
 from ._mds import MDS, smacof
diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py
index f57ce6c4c8d2c..db8ae2ff40444 100644
--- a/sklearn/manifold/_mds.py
+++ b/sklearn/manifold/_mds.py
@@ -2,6 +2,9 @@
 Multi-dimensional Scaling (MDS).
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # author: Nelle Varoquaux <nelle.varoquaux@gmail.com>
 # SPDX-License-Identifier: BSD-3-Clause
 
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index 81a67d9793bb0..787df39a21979 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -1,5 +1,8 @@
 """Score functions, performance metrics, pairwise metrics and distance computations."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from . import cluster
 from ._classification import (
     accuracy_score,
diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py
index 73d291995c31b..6c8deb34dc448 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py
+++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 #
 # Pairwise Distances Reductions
 # =============================
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
index 956de3577bcee..83bb8cd2e0d5c 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
+++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from abc import abstractmethod
 from typing import List
 
diff --git a/sklearn/metrics/_plot/__init__.py b/sklearn/metrics/_plot/__init__.py
index e69de29bb2d1d..67dd18fb94b59 100644
--- a/sklearn/metrics/_plot/__init__.py
+++ b/sklearn/metrics/_plot/__init__.py
@@ -0,0 +1,2 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/sklearn/metrics/_plot/confusion_matrix.py b/sklearn/metrics/_plot/confusion_matrix.py
index 01783367649f5..0d5f5d84719ad 100644
--- a/sklearn/metrics/_plot/confusion_matrix.py
+++ b/sklearn/metrics/_plot/confusion_matrix.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from itertools import product
 
 import numpy as np
diff --git a/sklearn/metrics/_plot/det_curve.py b/sklearn/metrics/_plot/det_curve.py
index e7336b10f5bb6..712a87237c35a 100644
--- a/sklearn/metrics/_plot/det_curve.py
+++ b/sklearn/metrics/_plot/det_curve.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import scipy as sp
 
 from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
diff --git a/sklearn/metrics/_plot/precision_recall_curve.py b/sklearn/metrics/_plot/precision_recall_curve.py
index 852dbf3981b2c..869c8bc95e675 100644
--- a/sklearn/metrics/_plot/precision_recall_curve.py
+++ b/sklearn/metrics/_plot/precision_recall_curve.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from collections import Counter
 
 from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
diff --git a/sklearn/metrics/_plot/regression.py b/sklearn/metrics/_plot/regression.py
index 1a3dfa0127931..b079dc02524b3 100644
--- a/sklearn/metrics/_plot/regression.py
+++ b/sklearn/metrics/_plot/regression.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numbers
 
 import numpy as np
diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py
index 292fb6e2e2f69..cb8718705e831 100644
--- a/sklearn/metrics/_plot/roc_curve.py
+++ b/sklearn/metrics/_plot/roc_curve.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
 from .._ranking import auc, roc_curve
 
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index c1a916aa0b5f3..6a67e7392cb1a 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -13,6 +13,9 @@
 ground truth labeling (or ``None`` in the case of unsupervised models).
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Authors: Andreas Mueller <amueller@ais.uni-bonn.de>
 #          Lars Buitinck
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py
index 776b2f2dbd063..47c7ae161edf2 100644
--- a/sklearn/metrics/cluster/__init__.py
+++ b/sklearn/metrics/cluster/__init__.py
@@ -5,6 +5,9 @@
   model itself.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._bicluster import consensus_score
 from ._supervised import (
     adjusted_mutual_info_score,
diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py
index 713d0bee8fa2e..c58f0e6047ad1 100644
--- a/sklearn/metrics/cluster/_bicluster.py
+++ b/sklearn/metrics/cluster/_bicluster.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numpy as np
 from scipy.optimize import linear_sum_assignment
 
diff --git a/sklearn/mixture/__init__.py b/sklearn/mixture/__init__.py
index 9eb449226a9cb..6832f110e4cc6 100644
--- a/sklearn/mixture/__init__.py
+++ b/sklearn/mixture/__init__.py
@@ -1,5 +1,8 @@
 """Mixture modeling algorithms."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._bayesian_mixture import BayesianGaussianMixture
 from ._gaussian_mixture import GaussianMixture
 
diff --git a/sklearn/model_selection/__init__.py b/sklearn/model_selection/__init__.py
index 8a50d032dc164..55b548ce45814 100644
--- a/sklearn/model_selection/__init__.py
+++ b/sklearn/model_selection/__init__.py
@@ -1,5 +1,8 @@
 """Tools for model selection, such as cross validation and hyper-parameter tuning."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import typing
 
 from ._classification_threshold import (
diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py
index 1d221d3388434..d29ad5d28c322 100644
--- a/sklearn/model_selection/_classification_threshold.py
+++ b/sklearn/model_selection/_classification_threshold.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from collections.abc import MutableMapping
 from numbers import Integral, Real
 
diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py
index 08518cf2482d4..0565675e87ee7 100644
--- a/sklearn/model_selection/_plot.py
+++ b/sklearn/model_selection/_plot.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numpy as np
 
 from ..utils._optional_dependencies import check_matplotlib_support
diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py
index b1cf5ee50965c..373dbfac22be5 100644
--- a/sklearn/model_selection/_search_successive_halving.py
+++ b/sklearn/model_selection/_search_successive_halving.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from abc import abstractmethod
 from copy import deepcopy
 from math import ceil, floor, log
diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py
index e2c1a50632564..02c4a28b9a6c4 100644
--- a/sklearn/neighbors/__init__.py
+++ b/sklearn/neighbors/__init__.py
@@ -1,5 +1,8 @@
 """The k-nearest neighbors algorithms."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._ball_tree import BallTree
 from ._base import VALID_METRICS, VALID_METRICS_SPARSE, sort_graph_by_row_values
 from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier
diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py
index a9e5fe011150a..0e4b45cc77e36 100644
--- a/sklearn/neighbors/_kde.py
+++ b/sklearn/neighbors/_kde.py
@@ -3,6 +3,9 @@
 -------------------------
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Author: Jake Vanderplas <jakevdp@cs.washington.edu>
 import itertools
 from numbers import Integral, Real
diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py
index 4185bbe15826b..8888fe18483c6 100644
--- a/sklearn/neighbors/_unsupervised.py
+++ b/sklearn/neighbors/_unsupervised.py
@@ -1,5 +1,8 @@
 """Unsupervised nearest neighbors learner"""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ..base import _fit_context
 from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin
 
diff --git a/sklearn/neural_network/__init__.py b/sklearn/neural_network/__init__.py
index 7bf5f1241de69..fa580a8b40642 100644
--- a/sklearn/neural_network/__init__.py
+++ b/sklearn/neural_network/__init__.py
@@ -1,5 +1,8 @@
 """Models based on neural networks."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # SPDX-License-Identifier: BSD-3-Clause
 
 from ._multilayer_perceptron import MLPClassifier, MLPRegressor
diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py
index 9e49379c9122a..d5ea1fe15f036 100644
--- a/sklearn/preprocessing/__init__.py
+++ b/sklearn/preprocessing/__init__.py
@@ -1,5 +1,8 @@
 """Methods for scaling, centering, normalization, binarization, and more."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._data import (
     Binarizer,
     KernelCenterer,
diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index c49684d0ebfbc..3f5d7ba4ad205 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import warnings
 
 import numpy as np
diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index f4c9fb032cfb0..2175e93e38694 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -2,6 +2,9 @@
 This file contains preprocessing tools based on polynomials.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import collections
 from itertools import chain, combinations
 from itertools import combinations_with_replacement as combinations_w_r
diff --git a/sklearn/preprocessing/_target_encoder.py b/sklearn/preprocessing/_target_encoder.py
index b3b7c3d5e7bd9..779618026def1 100644
--- a/sklearn/preprocessing/_target_encoder.py
+++ b/sklearn/preprocessing/_target_encoder.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from numbers import Integral, Real
 
 import numpy as np
diff --git a/sklearn/semi_supervised/__init__.py b/sklearn/semi_supervised/__init__.py
index 055c3f431b2c1..fba2488a753df 100644
--- a/sklearn/semi_supervised/__init__.py
+++ b/sklearn/semi_supervised/__init__.py
@@ -4,6 +4,9 @@
 data for classification tasks.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._label_propagation import LabelPropagation, LabelSpreading
 from ._self_training import SelfTrainingClassifier
 
diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
index 47d4027c50754..22adea71cd27c 100644
--- a/sklearn/svm/_base.py
+++ b/sklearn/svm/_base.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import warnings
 from abc import ABCMeta, abstractmethod
 from numbers import Integral, Real
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 5b547fcb98cd6..8ecdca6f47b56 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from numbers import Integral, Real
 
 import numpy as np
diff --git a/sklearn/tree/__init__.py b/sklearn/tree/__init__.py
index 23ab17aa0bbbe..c961a811fe05c 100644
--- a/sklearn/tree/__init__.py
+++ b/sklearn/tree/__init__.py
@@ -1,5 +1,8 @@
 """Decision tree based models for classification and regression."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ._classes import (
     BaseDecisionTree,
     DecisionTreeClassifier,
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 011347cb2d443..cb06d90572c7e 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -1,5 +1,8 @@
 """Various utilities to help with development."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import platform
 import warnings
 from collections.abc import Sequence
diff --git a/sklearn/utils/_arpack.py b/sklearn/utils/_arpack.py
index 3465ac98c2e81..3087a0f4dbf0a 100644
--- a/sklearn/utils/_arpack.py
+++ b/sklearn/utils/_arpack.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from .validation import check_random_state
 
 
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index c222e26fcc82c..49da1a3b3d5db 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1,5 +1,8 @@
 """Tools to support array_api."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import itertools
 import math
 from functools import wraps
diff --git a/sklearn/utils/_available_if.py b/sklearn/utils/_available_if.py
index 2d9598df9de7e..9230a35ec9ab0 100644
--- a/sklearn/utils/_available_if.py
+++ b/sklearn/utils/_available_if.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from functools import update_wrapper, wraps
 from types import MethodType
 
diff --git a/sklearn/utils/_bunch.py b/sklearn/utils/_bunch.py
index d90aeb7d93c74..4d474e88eb80e 100644
--- a/sklearn/utils/_bunch.py
+++ b/sklearn/utils/_bunch.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import warnings
 
 
diff --git a/sklearn/utils/_chunking.py b/sklearn/utils/_chunking.py
index 7bf53d0626c85..e2462fa7f7eda 100644
--- a/sklearn/utils/_chunking.py
+++ b/sklearn/utils/_chunking.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import warnings
 from itertools import islice
 from numbers import Integral
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index 3fd4d45f522e6..897980e43ce11 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from collections import Counter
 from contextlib import suppress
 from typing import NamedTuple
diff --git a/sklearn/utils/_estimator_html_repr.py b/sklearn/utils/_estimator_html_repr.py
index 5e465234f516b..1b1eab6054e30 100644
--- a/sklearn/utils/_estimator_html_repr.py
+++ b/sklearn/utils/_estimator_html_repr.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import html
 import itertools
 from contextlib import closing
diff --git a/sklearn/utils/_indexing.py b/sklearn/utils/_indexing.py
index ca2327f2bb109..8923d568cbf33 100644
--- a/sklearn/utils/_indexing.py
+++ b/sklearn/utils/_indexing.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numbers
 import sys
 import warnings
diff --git a/sklearn/utils/_joblib.py b/sklearn/utils/_joblib.py
index 7638a30e7b5fa..837dd5102e918 100644
--- a/sklearn/utils/_joblib.py
+++ b/sklearn/utils/_joblib.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 # TODO(1.7): remove this file
 
 import warnings as _warnings
diff --git a/sklearn/utils/_mask.py b/sklearn/utils/_mask.py
index 0a66dc5a20a81..95fefaca6e67c 100644
--- a/sklearn/utils/_mask.py
+++ b/sklearn/utils/_mask.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 from contextlib import suppress
 
 import numpy as np
diff --git a/sklearn/utils/_missing.py b/sklearn/utils/_missing.py
index b48381cfcf3bb..f2e024df887be 100644
--- a/sklearn/utils/_missing.py
+++ b/sklearn/utils/_missing.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import math
 import numbers
 from contextlib import suppress
diff --git a/sklearn/utils/_mocking.py b/sklearn/utils/_mocking.py
index 0afed8c08cfaa..f4606d2a98c04 100644
--- a/sklearn/utils/_mocking.py
+++ b/sklearn/utils/_mocking.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numpy as np
 
 from ..base import BaseEstimator, ClassifierMixin
diff --git a/sklearn/utils/_optional_dependencies.py b/sklearn/utils/_optional_dependencies.py
index 14ffeb1d5b6ee..b2e970502c123 100644
--- a/sklearn/utils/_optional_dependencies.py
+++ b/sklearn/utils/_optional_dependencies.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 def check_matplotlib_support(caller_name):
     """Raise ImportError with detailed error message if mpl is not installed.
 
diff --git a/sklearn/utils/_param_validation.py b/sklearn/utils/_param_validation.py
index 56b7d0ee1fe4c..e1de052bd01d3 100644
--- a/sklearn/utils/_param_validation.py
+++ b/sklearn/utils/_param_validation.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import functools
 import math
 import operator
diff --git a/sklearn/utils/_plotting.py b/sklearn/utils/_plotting.py
index 2db38baa9abfa..ab3e21bafa134 100644
--- a/sklearn/utils/_plotting.py
+++ b/sklearn/utils/_plotting.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numpy as np
 
 from . import check_consistent_length
diff --git a/sklearn/utils/_pprint.py b/sklearn/utils/_pprint.py
index 9b33cd617a5fc..00d8391b3af64 100644
--- a/sklearn/utils/_pprint.py
+++ b/sklearn/utils/_pprint.py
@@ -1,6 +1,9 @@
 """This module contains the _EstimatorPrettyPrinter class used in
 BaseEstimator.__repr__ for pretty-printing estimators"""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
 # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 Python Software Foundation;
 # All Rights Reserved
diff --git a/sklearn/utils/_response.py b/sklearn/utils/_response.py
index 0381c872a94b0..86c430dbd23f2 100644
--- a/sklearn/utils/_response.py
+++ b/sklearn/utils/_response.py
@@ -3,6 +3,9 @@
 It allows to make uniform checks and validation.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 from ..base import is_classifier
diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py
index 42757dbb00fae..510be7469b96f 100644
--- a/sklearn/utils/_set_output.py
+++ b/sklearn/utils/_set_output.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import importlib
 from functools import wraps
 from typing import Protocol, runtime_checkable
diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py
index a947e35838504..43d2c15dbd7a5 100644
--- a/sklearn/utils/_show_versions.py
+++ b/sklearn/utils/_show_versions.py
@@ -4,6 +4,9 @@
 adapted from :func:`pandas.show_versions`
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # SPDX-License-Identifier: BSD-3-Clause
 
 import platform
diff --git a/sklearn/utils/_tags.py b/sklearn/utils/_tags.py
index c8f6ffb651a0d..af2ca444b5d33 100644
--- a/sklearn/utils/_tags.py
+++ b/sklearn/utils/_tags.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numpy as np
 
 _DEFAULT_TAGS = {
diff --git a/sklearn/utils/_user_interface.py b/sklearn/utils/_user_interface.py
index 09e6f2b7bf849..f48b934dc7c8b 100644
--- a/sklearn/utils/_user_interface.py
+++ b/sklearn/utils/_user_interface.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import timeit
 from contextlib import contextmanager
 
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index a3225597701c7..e6ad56264e1c3 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import functools
 import warnings
 
diff --git a/sklearn/utils/discovery.py b/sklearn/utils/discovery.py
index 7a6c73997ef8c..40d5b5f8cf714 100644
--- a/sklearn/utils/discovery.py
+++ b/sklearn/utils/discovery.py
@@ -1,5 +1,8 @@
 """Utilities to discover scikit-learn objects."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import inspect
 import pkgutil
 from importlib import import_module
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5ba1540094588..5264ec0a7e913 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1,5 +1,8 @@
 """Various utilities to check the compatibility of estimators with scikit-learn API."""
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import pickle
 import re
 import warnings
diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py
index ac91110651d94..ba2ddee605440 100644
--- a/sklearn/utils/optimize.py
+++ b/sklearn/utils/optimize.py
@@ -9,6 +9,9 @@
 significant speedups.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # This is a modified file from scipy.optimize
 # Original authors: Travis Oliphant, Eric Jones
 # Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour
diff --git a/sklearn/utils/parallel.py b/sklearn/utils/parallel.py
index 7d7349ebe2816..da7ad69ffc3bf 100644
--- a/sklearn/utils/parallel.py
+++ b/sklearn/utils/parallel.py
@@ -2,6 +2,9 @@
 usage.
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 import functools
 import warnings
 from functools import update_wrapper
diff --git a/sklearn/utils/stats.py b/sklearn/utils/stats.py
index d0e22ea3694f4..b931cb6b6f90d 100644
--- a/sklearn/utils/stats.py
+++ b/sklearn/utils/stats.py
@@ -1,3 +1,5 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 import numpy as np
 
 from .extmath import stable_cumsum

From b2d1fc274a694014927bd1deac3a69d4d9687c6c Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 18 Jun 2024 21:12:12 +0200
Subject: [PATCH 03/17] minor fixes

---
 examples/applications/plot_time_series_lagged_features.py    | 2 +-
 examples/applications/plot_topics_extraction_with_nmf_lda.py | 2 +-
 examples/model_selection/plot_likelihood_ratios.py           | 2 +-
 examples/model_selection/plot_roc.py                         | 4 ++--
 pyproject.toml                                               | 2 ++
 setup.py                                                     | 2 +-
 sklearn/linear_model/_glm/_newton_solver.py                  | 2 +-
 sklearn/mixture/tests/test_bayesian_mixture.py               | 2 +-
 sklearn/model_selection/_validation.py                       | 4 ++--
 9 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py
index 53b29fa940958..95d339aa0a1c1 100644
--- a/examples/applications/plot_time_series_lagged_features.py
+++ b/examples/applications/plot_time_series_lagged_features.py
@@ -255,7 +255,7 @@ def consolidate_scores(cv_results, scores, metric):
     time = cv_results["fit_time"]
     scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
 
-    scores["loss"].append(f"quantile {int(quantile*100)}")
+    scores["loss"].append(f"quantile {int(quantile * 100)}")
     for key, value in cv_results.items():
         if key.startswith("test_"):
             metric = key.split("test_")[1]
diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py
index faeef5ae15a11..a6f774d01e2de 100644
--- a/examples/applications/plot_topics_extraction_with_nmf_lda.py
+++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py
@@ -50,7 +50,7 @@ def plot_top_words(model, feature_names, n_top_words, title):
 
         ax = axes[topic_idx]
         ax.barh(top_features, weights, height=0.7)
-        ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 30})
+        ax.set_title(f"Topic {topic_idx + 1}", fontdict={"fontsize": 30})
         ax.tick_params(axis="both", which="major", labelsize=20)
         for i in "top right left".split():
             ax.spines[i].set_visible(False)
diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py
index 2fc3ad3d040f5..260bab2c90bf0 100644
--- a/examples/model_selection/plot_likelihood_ratios.py
+++ b/examples/model_selection/plot_likelihood_ratios.py
@@ -42,7 +42,7 @@ class proportion than the target application.
 from sklearn.datasets import make_classification
 
 X, y = make_classification(n_samples=10_000, weights=[0.9, 0.1], random_state=0)
-print(f"Percentage of people carrying the disease: {100*y.mean():.2f}%")
+print(f"Percentage of people carrying the disease: {100 * y.mean():.2f}%")
 
 # %%
 # A machine learning model is built to diagnose if a person with some given
diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py
index 1b2a9760342a3..fbc3463a112e5 100644
--- a/examples/model_selection/plot_roc.py
+++ b/examples/model_selection/plot_roc.py
@@ -151,9 +151,9 @@
 #
 # We can briefly demo the effect of :func:`numpy.ravel`:
 
-print(f"y_score:\n{y_score[0:2,:]}")
+print(f"y_score:\n{y_score[0:2, :]}")
 print()
-print(f"y_score.ravel():\n{y_score[0:2,:].ravel()}")
+print(f"y_score.ravel():\n{y_score[0:2, :].ravel()}")
 
 # %%
 # In a multi-class classification setup with highly imbalanced classes,
diff --git a/pyproject.toml b/pyproject.toml
index 4f2a33a12b38f..c96e3cfb51ccc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -151,6 +151,8 @@ ignore=[
     "E731",
     # do not use variables named 'l', 'O', or 'I'
     "E741",
+    # ignore assigned but unused variables
+    "F841",
 ]
 
 [tool.ruff.lint.flake8-copyright]
diff --git a/setup.py b/setup.py
index 97e7a67458180..4b7c298998a82 100755
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
-#! /usr/bin/env python
+# ! /usr/bin/env python
 #
 # Authors: The scikit-learn developers
 # License: 3-clause BSD
diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py
index b2be604d931c5..870f90c9b6255 100644
--- a/sklearn/linear_model/_glm/_newton_solver.py
+++ b/sklearn/linear_model/_glm/_newton_solver.py
@@ -253,7 +253,7 @@ def line_search(self, X, y, sample_weight):
             check = loss_improvement <= t * armijo_term
             if is_verbose:
                 print(
-                    f"    line search iteration={i+1}, step size={t}\n"
+                    f"    line search iteration={i + 1}, step size={t}\n"
                     f"      check loss improvement <= armijo term: {loss_improvement} "
                     f"<= {t * armijo_term} {check}"
                 )
diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py
index 925cd2fdb615b..b9123c3a37c26 100644
--- a/sklearn/mixture/tests/test_bayesian_mixture.py
+++ b/sklearn/mixture/tests/test_bayesian_mixture.py
@@ -118,7 +118,7 @@ def test_bayesian_mixture_precisions_prior_initialisation():
     )
     msg = (
         "The parameter 'degrees_of_freedom_prior' should be greater than"
-        f" {n_features -1}, but got {bad_degrees_of_freedom_prior_:.3f}."
+        f" {n_features - 1}, but got {bad_degrees_of_freedom_prior_:.3f}."
     )
     with pytest.raises(ValueError, match=msg):
         bgmm.fit(X)
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 92c93193010ff..4a3e352d6c705 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -844,9 +844,9 @@ def _fit_and_score(
     progress_msg = ""
     if verbose > 2:
         if split_progress is not None:
-            progress_msg = f" {split_progress[0]+1}/{split_progress[1]}"
+            progress_msg = f" {split_progress[0] + 1}/{split_progress[1]}"
         if candidate_progress and verbose > 9:
-            progress_msg += f"; {candidate_progress[0]+1}/{candidate_progress[1]}"
+            progress_msg += f"; {candidate_progress[0] + 1}/{candidate_progress[1]}"
 
     if verbose > 1:
         if parameters is None:

From 6c1d14478e84650d639337bdf88d47aee5814078 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 18 Jun 2024 21:42:16 +0200
Subject: [PATCH 04/17] CLN fix the remaining ruff issues

---
 .../plot_tweedie_regression_insurance_claims.py           | 2 +-
 pyproject.toml                                            | 1 +
 sklearn/cluster/_optics.py                                | 2 +-
 sklearn/cluster/tests/test_dbscan.py                      | 2 +-
 sklearn/linear_model/tests/test_ridge.py                  | 4 ++--
 sklearn/metrics/pairwise.py                               | 4 +++-
 sklearn/model_selection/_search.py                        | 4 ++--
 sklearn/model_selection/_split.py                         | 2 +-
 sklearn/model_selection/tests/test_validation.py          | 8 ++++----
 sklearn/utils/estimator_checks.py                         | 2 +-
 sklearn/utils/tests/test_validation.py                    | 2 +-
 sklearn/utils/validation.py                               | 2 +-
 12 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 31a91fb37c766..2c3a1e6f4cea6 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -79,7 +79,7 @@ def load_mtpl2(n_samples=None):
     df["ClaimAmount"] = df["ClaimAmount"].fillna(0)
 
     # unquote string fields
-    for column_name in df.columns[df.dtypes.values == object]:
+    for column_name in df.columns[df.dtypes.values is object]:
         df[column_name] = df[column_name].str.strip("'")
     return df.iloc[:n_samples]
 
diff --git a/pyproject.toml b/pyproject.toml
index c96e3cfb51ccc..2c551d4c72984 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -152,6 +152,7 @@ ignore=[
     # do not use variables named 'l', 'O', or 'I'
     "E741",
     # ignore assigned but unused variables
+    # this comes up with preview=true
     "F841",
 ]
 
diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py
index 46f795e94ffb2..347c33869aaf4 100755
--- a/sklearn/cluster/_optics.py
+++ b/sklearn/cluster/_optics.py
@@ -327,7 +327,7 @@ def fit(self, X, y=None):
             Returns a fitted instance of self.
         """
         dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float
-        if dtype == bool and X.dtype != bool:
+        if dtype is bool and X.dtype is not bool:
             msg = (
                 "Data will be converted to boolean for"
                 f" metric {self.metric}, to avoid this warning,"
diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
index d42cc2b17d518..556f89312d2fc 100644
--- a/sklearn/cluster/tests/test_dbscan.py
+++ b/sklearn/cluster/tests/test_dbscan.py
@@ -291,7 +291,7 @@ def test_input_validation():
 def test_pickle():
     obj = DBSCAN()
     s = pickle.dumps(obj)
-    assert type(pickle.loads(s)) == obj.__class__
+    assert type(pickle.loads(s)) is obj.__class__
 
 
 def test_boundaries():
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 167ce0bac4cba..9be28cac141b1 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -1020,7 +1020,7 @@ def _test_ridge_cv(sparse_container):
     ridge_cv.predict(X)
 
     assert len(ridge_cv.coef_.shape) == 1
-    assert type(ridge_cv.intercept_) == np.float64
+    assert type(ridge_cv.intercept_) is np.float64
 
     cv = KFold(5)
     ridge_cv.set_params(cv=cv)
@@ -1028,7 +1028,7 @@ def _test_ridge_cv(sparse_container):
     ridge_cv.predict(X)
 
     assert len(ridge_cv.coef_.shape) == 1
-    assert type(ridge_cv.intercept_) == np.float64
+    assert type(ridge_cv.intercept_) is np.float64
 
 
 @pytest.mark.parametrize(
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index ddf871d30816c..5234e0583cc1d 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -2361,7 +2361,9 @@ def pairwise_distances(
 
         dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else "infer_float"
 
-        if dtype == bool and (X.dtype != bool or (Y is not None and Y.dtype != bool)):
+        if dtype is bool and (
+            X.dtype is not bool or (Y is not None and Y.dtype is not bool)
+        ):
             msg = "Data was converted to boolean for metric %s" % metric
             warnings.warn(msg, DataConversionWarning)
 
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 110db2c39a4a2..f4c13d6b5dd26 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -1097,14 +1097,14 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
             except (TypeError, ValueError):
                 arr_dtype = np.dtype(object)
             else:
-                if any(np.min_scalar_type(x) == object for x in param_list):
+                if any(np.min_scalar_type(x) is object for x in param_list):
                     # `np.result_type` might get thrown off by `.dtype` properties
                     # (which some estimators have).
                     # If finding the result dtype this way would give object,
                     # then we use object.
                     # https://github.com/scikit-learn/scikit-learn/issues/29157
                     arr_dtype = np.dtype(object)
-            if len(param_list) == n_candidates and arr_dtype != object:
+            if len(param_list) == n_candidates and arr_dtype is not object:
                 # Exclude `object` else the numpy constructor might infer a list of
                 # tuples to be a 2d array.
                 results[key] = MaskedArray(param_list, mask=False, dtype=arr_dtype)
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 9508e9237c9a2..1382ad063f43e 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -2896,7 +2896,7 @@ def _build_repr(self):
                 value = getattr(self, key, None)
                 if value is None and hasattr(self, "cvargs"):
                     value = self.cvargs.get(key, None)
-            if len(w) and w[0].category == FutureWarning:
+            if len(w) and w[0].category is FutureWarning:
                 # if the parameter is deprecated, don't show it
                 continue
         finally:
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index d94d3f054bba2..0996eca367164 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -586,10 +586,10 @@ def custom_scorer(clf, X, y):
             )
 
             # Make sure all the arrays are of np.ndarray type
-            assert type(cv_results["test_r2"]) == np.ndarray
-            assert type(cv_results["test_neg_mean_squared_error"]) == np.ndarray
-            assert type(cv_results["fit_time"]) == np.ndarray
-            assert type(cv_results["score_time"]) == np.ndarray
+            assert type(cv_results["test_r2"]) is np.ndarray
+            assert type(cv_results["test_neg_mean_squared_error"]) is np.ndarray
+            assert type(cv_results["fit_time"]) is np.ndarray
+            assert type(cv_results["score_time"]) is np.ndarray
 
             # Ensure all the times are within sane limits
             assert np.all(cv_results["fit_time"] >= 0)
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5264ec0a7e913..7fbbbfe7c15ec 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1509,7 +1509,7 @@ def _apply_on_subsets(func, X):
     result_by_batch = [func(batch.reshape(1, n_features)) for batch in X]
 
     # func can output tuple (e.g. score_samples)
-    if type(result_full) == tuple:
+    if type(result_full) is tuple:
         result_full = result_full[0]
         result_by_batch = list(map(lambda x: x[0], result_by_batch))
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 92fff950e875e..3ab70a0680db6 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -1340,7 +1340,7 @@ def test_check_scalar_invalid(
             include_boundaries=include_boundaries,
         )
     assert str(raised_error.value) == str(err_msg)
-    assert type(raised_error.value) == type(err_msg)
+    assert type(raised_error.value) is type(err_msg)
 
 
 _psd_cases_valid = {
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index d632abb77280d..ec93678efbbe2 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -871,7 +871,7 @@ def is_sparse(dtype):
         )
         if all(isinstance(dtype_iter, np.dtype) for dtype_iter in dtypes_orig):
             dtype_orig = np.result_type(*dtypes_orig)
-        elif pandas_requires_conversion and any(d == object for d in dtypes_orig):
+        elif pandas_requires_conversion and any(d is object for d in dtypes_orig):
             # Force object if any of the dtypes is an object
             dtype_orig = object
 

From ede3462e14435f5117af65f60f6f1d6a63a1283e Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Fri, 12 Jul 2024 13:24:19 +0200
Subject: [PATCH 05/17] fix merge conflict

---
 .../plot_time_series_lagged_features.py       | 25 -------------------
 1 file changed, 25 deletions(-)

diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py
index 7000176aca980..83c34b76e05b2 100644
--- a/examples/applications/plot_time_series_lagged_features.py
+++ b/examples/applications/plot_time_series_lagged_features.py
@@ -22,27 +22,6 @@
 # Analyzing the Bike Sharing Demand dataset
 # -----------------------------------------
 #
-# We start by loading the data from the OpenML repository
-# as a pandas dataframe. This will be replaced with Polars
-# once `fetch_openml` adds a native support for it.
-# We convert to Polars for feature engineering, as it automatically caches
-# common subexpressions which are reused in multiple expressions
-# (like `pl.col("count").shift(1)` below). See
-# https://docs.pola.rs/user-guide/lazy/optimizations/ for more information.
-
-import numpy as np
-import polars as pl
-
-from sklearn.datasets import fetch_openml
-
-pl.Config.set_fmt_str_lengths(20)
-
-bike_sharing = fetch_openml(
-    "Bike_Sharing_Demand", version=2, as_frame=True, parser="pandas"
-)
-df = bike_sharing.frame
-df = pl.DataFrame({col: df[col].to_numpy() for col in df.columns})
-== == == =
 # We start by loading the data from the OpenML repository as a raw parquet file
 # to illustrate how to work with an arbitrary parquet file instead of hiding this
 # step in a convenience tool such as `sklearn.datasets.fetch_openml`.
@@ -287,11 +266,7 @@ def consolidate_scores(cv_results, scores, metric):
     time = cv_results["fit_time"]
     scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
 
-<< << << < HEAD
     scores["loss"].append(f"quantile {int(quantile * 100)}")
-== == == =
-    scores["loss"].append(f"quantile {int(quantile * 100)}")
->> >> >> > upstream / main
     for key, value in cv_results.items():
         if key.startswith("test_"):
             metric = key.split("test_")[1]

From 6afe10099a2c2f3e23669e97c17dec1fe786c6a4 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Fri, 12 Jul 2024 14:17:39 +0200
Subject: [PATCH 06/17] make preview explicit selection

---
 pyproject.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 2c551d4c72984..707926d193f3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -142,6 +142,8 @@ exclude=[
 [tool.ruff.lint]
 # This enables us to use CPY001: copyright header check
 preview = true
+# This enables us to use the explicit preview rules that we want only
+explicit-preview-rules = true
 # all rules can be found here: https://beta.ruff.rs/docs/rules/
 select = ["E", "F", "W", "I", "CPY001"]
 ignore=[

From 6a394a380bc2ca49934c7645a5e50f3df09ac1df Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Fri, 12 Jul 2024 14:20:21 +0200
Subject: [PATCH 07/17] revert some changes

---
 examples/applications/plot_time_series_lagged_features.py    | 2 +-
 examples/applications/plot_topics_extraction_with_nmf_lda.py | 2 +-
 examples/model_selection/plot_likelihood_ratios.py           | 2 +-
 examples/model_selection/plot_roc.py                         | 4 ++--
 pyproject.toml                                               | 3 ---
 sklearn/linear_model/_glm/_newton_solver.py                  | 2 +-
 sklearn/mixture/tests/test_bayesian_mixture.py               | 2 +-
 sklearn/model_selection/_validation.py                       | 4 ++--
 8 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py
index 83c34b76e05b2..539ff24a6c1da 100644
--- a/examples/applications/plot_time_series_lagged_features.py
+++ b/examples/applications/plot_time_series_lagged_features.py
@@ -266,7 +266,7 @@ def consolidate_scores(cv_results, scores, metric):
     time = cv_results["fit_time"]
     scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
 
-    scores["loss"].append(f"quantile {int(quantile * 100)}")
+    scores["loss"].append(f"quantile {int(quantile*100)}")
     for key, value in cv_results.items():
         if key.startswith("test_"):
             metric = key.split("test_")[1]
diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py
index a6f774d01e2de..faeef5ae15a11 100644
--- a/examples/applications/plot_topics_extraction_with_nmf_lda.py
+++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py
@@ -50,7 +50,7 @@ def plot_top_words(model, feature_names, n_top_words, title):
 
         ax = axes[topic_idx]
         ax.barh(top_features, weights, height=0.7)
-        ax.set_title(f"Topic {topic_idx + 1}", fontdict={"fontsize": 30})
+        ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 30})
         ax.tick_params(axis="both", which="major", labelsize=20)
         for i in "top right left".split():
             ax.spines[i].set_visible(False)
diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py
index 260bab2c90bf0..2fc3ad3d040f5 100644
--- a/examples/model_selection/plot_likelihood_ratios.py
+++ b/examples/model_selection/plot_likelihood_ratios.py
@@ -42,7 +42,7 @@ class proportion than the target application.
 from sklearn.datasets import make_classification
 
 X, y = make_classification(n_samples=10_000, weights=[0.9, 0.1], random_state=0)
-print(f"Percentage of people carrying the disease: {100 * y.mean():.2f}%")
+print(f"Percentage of people carrying the disease: {100*y.mean():.2f}%")
 
 # %%
 # A machine learning model is built to diagnose if a person with some given
diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py
index fbc3463a112e5..1b2a9760342a3 100644
--- a/examples/model_selection/plot_roc.py
+++ b/examples/model_selection/plot_roc.py
@@ -151,9 +151,9 @@
 #
 # We can briefly demo the effect of :func:`numpy.ravel`:
 
-print(f"y_score:\n{y_score[0:2, :]}")
+print(f"y_score:\n{y_score[0:2,:]}")
 print()
-print(f"y_score.ravel():\n{y_score[0:2, :].ravel()}")
+print(f"y_score.ravel():\n{y_score[0:2,:].ravel()}")
 
 # %%
 # In a multi-class classification setup with highly imbalanced classes,
diff --git a/pyproject.toml b/pyproject.toml
index 707926d193f3a..1af573efb7527 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -153,9 +153,6 @@ ignore=[
     "E731",
     # do not use variables named 'l', 'O', or 'I'
     "E741",
-    # ignore assigned but unused variables
-    # this comes up with preview=true
-    "F841",
 ]
 
 [tool.ruff.lint.flake8-copyright]
diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py
index 870f90c9b6255..b2be604d931c5 100644
--- a/sklearn/linear_model/_glm/_newton_solver.py
+++ b/sklearn/linear_model/_glm/_newton_solver.py
@@ -253,7 +253,7 @@ def line_search(self, X, y, sample_weight):
             check = loss_improvement <= t * armijo_term
             if is_verbose:
                 print(
-                    f"    line search iteration={i + 1}, step size={t}\n"
+                    f"    line search iteration={i+1}, step size={t}\n"
                     f"      check loss improvement <= armijo term: {loss_improvement} "
                     f"<= {t * armijo_term} {check}"
                 )
diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py
index b9123c3a37c26..925cd2fdb615b 100644
--- a/sklearn/mixture/tests/test_bayesian_mixture.py
+++ b/sklearn/mixture/tests/test_bayesian_mixture.py
@@ -118,7 +118,7 @@ def test_bayesian_mixture_precisions_prior_initialisation():
     )
     msg = (
         "The parameter 'degrees_of_freedom_prior' should be greater than"
-        f" {n_features - 1}, but got {bad_degrees_of_freedom_prior_:.3f}."
+        f" {n_features -1}, but got {bad_degrees_of_freedom_prior_:.3f}."
     )
     with pytest.raises(ValueError, match=msg):
         bgmm.fit(X)
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 7b6f73646e5ce..ddc9b542b0a5e 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -844,9 +844,9 @@ def _fit_and_score(
     progress_msg = ""
     if verbose > 2:
         if split_progress is not None:
-            progress_msg = f" {split_progress[0] + 1}/{split_progress[1]}"
+            progress_msg = f" {split_progress[0]+1}/{split_progress[1]}"
         if candidate_progress and verbose > 9:
-            progress_msg += f"; {candidate_progress[0] + 1}/{candidate_progress[1]}"
+            progress_msg += f"; {candidate_progress[0]+1}/{candidate_progress[1]}"
 
     if verbose > 1:
         if parameters is None:

From 575e2e0d3990f5e842d7a6d626730f32e4f801c2 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Fri, 12 Jul 2024 14:22:37 +0200
Subject: [PATCH 08/17] revert some changes

---
 .../plot_tweedie_regression_insurance_claims.py           | 2 +-
 sklearn/cluster/_optics.py                                | 2 +-
 sklearn/cluster/tests/test_dbscan.py                      | 2 +-
 sklearn/linear_model/tests/test_ridge.py                  | 4 ++--
 sklearn/metrics/pairwise.py                               | 4 +---
 sklearn/model_selection/_split.py                         | 2 +-
 sklearn/model_selection/tests/test_validation.py          | 8 ++++----
 sklearn/utils/estimator_checks.py                         | 2 +-
 sklearn/utils/tests/test_validation.py                    | 2 +-
 sklearn/utils/validation.py                               | 2 +-
 10 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 2c3a1e6f4cea6..31a91fb37c766 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -79,7 +79,7 @@ def load_mtpl2(n_samples=None):
     df["ClaimAmount"] = df["ClaimAmount"].fillna(0)
 
     # unquote string fields
-    for column_name in df.columns[df.dtypes.values is object]:
+    for column_name in df.columns[df.dtypes.values == object]:
         df[column_name] = df[column_name].str.strip("'")
     return df.iloc[:n_samples]
 
diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py
index 347c33869aaf4..46f795e94ffb2 100755
--- a/sklearn/cluster/_optics.py
+++ b/sklearn/cluster/_optics.py
@@ -327,7 +327,7 @@ def fit(self, X, y=None):
             Returns a fitted instance of self.
         """
         dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float
-        if dtype is bool and X.dtype is not bool:
+        if dtype == bool and X.dtype != bool:
             msg = (
                 "Data will be converted to boolean for"
                 f" metric {self.metric}, to avoid this warning,"
diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
index 556f89312d2fc..d42cc2b17d518 100644
--- a/sklearn/cluster/tests/test_dbscan.py
+++ b/sklearn/cluster/tests/test_dbscan.py
@@ -291,7 +291,7 @@ def test_input_validation():
 def test_pickle():
     obj = DBSCAN()
     s = pickle.dumps(obj)
-    assert type(pickle.loads(s)) is obj.__class__
+    assert type(pickle.loads(s)) == obj.__class__
 
 
 def test_boundaries():
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 9be28cac141b1..167ce0bac4cba 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -1020,7 +1020,7 @@ def _test_ridge_cv(sparse_container):
     ridge_cv.predict(X)
 
     assert len(ridge_cv.coef_.shape) == 1
-    assert type(ridge_cv.intercept_) is np.float64
+    assert type(ridge_cv.intercept_) == np.float64
 
     cv = KFold(5)
     ridge_cv.set_params(cv=cv)
@@ -1028,7 +1028,7 @@ def _test_ridge_cv(sparse_container):
     ridge_cv.predict(X)
 
     assert len(ridge_cv.coef_.shape) == 1
-    assert type(ridge_cv.intercept_) is np.float64
+    assert type(ridge_cv.intercept_) == np.float64
 
 
 @pytest.mark.parametrize(
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index aa000c850f047..9382d585a5fe7 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -2364,9 +2364,7 @@ def pairwise_distances(
 
         dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else "infer_float"
 
-        if dtype is bool and (
-            X.dtype is not bool or (Y is not None and Y.dtype is not bool)
-        ):
+        if dtype == bool and (X.dtype != bool or (Y is not None and Y.dtype != bool)):
             msg = "Data was converted to boolean for metric %s" % metric
             warnings.warn(msg, DataConversionWarning)
 
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index af35f903e4832..bfd741eee5811 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -2935,7 +2935,7 @@ def _build_repr(self):
                 value = getattr(self, key, None)
                 if value is None and hasattr(self, "cvargs"):
                     value = self.cvargs.get(key, None)
-            if len(w) and w[0].category is FutureWarning:
+            if len(w) and w[0].category == FutureWarning:
                 # if the parameter is deprecated, don't show it
                 continue
         finally:
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 911a3bac2d672..33d4d366bf17a 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -586,10 +586,10 @@ def custom_scorer(clf, X, y):
             )
 
             # Make sure all the arrays are of np.ndarray type
-            assert type(cv_results["test_r2"]) is np.ndarray
-            assert type(cv_results["test_neg_mean_squared_error"]) is np.ndarray
-            assert type(cv_results["fit_time"]) is np.ndarray
-            assert type(cv_results["score_time"]) is np.ndarray
+            assert type(cv_results["test_r2"]) == np.ndarray
+            assert type(cv_results["test_neg_mean_squared_error"]) == np.ndarray
+            assert type(cv_results["fit_time"]) == np.ndarray
+            assert type(cv_results["score_time"]) == np.ndarray
 
             # Ensure all the times are within sane limits
             assert np.all(cv_results["fit_time"] >= 0)
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 62d9973f004d4..ae7a8737ff2bb 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1509,7 +1509,7 @@ def _apply_on_subsets(func, X):
     result_by_batch = [func(batch.reshape(1, n_features)) for batch in X]
 
     # func can output tuple (e.g. score_samples)
-    if type(result_full) is tuple:
+    if type(result_full) == tuple:
         result_full = result_full[0]
         result_by_batch = list(map(lambda x: x[0], result_by_batch))
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index c567cafbac624..5bde51ae514d9 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -1341,7 +1341,7 @@ def test_check_scalar_invalid(
             include_boundaries=include_boundaries,
         )
     assert str(raised_error.value) == str(err_msg)
-    assert type(raised_error.value) is type(err_msg)
+    assert type(raised_error.value) == type(err_msg)
 
 
 _psd_cases_valid = {
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 612d93f1b21aa..af9fdb4a79cba 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -879,7 +879,7 @@ def is_sparse(dtype):
         )
         if all(isinstance(dtype_iter, np.dtype) for dtype_iter in dtypes_orig):
             dtype_orig = np.result_type(*dtypes_orig)
-        elif pandas_requires_conversion and any(d is object for d in dtypes_orig):
+        elif pandas_requires_conversion and any(d == object for d in dtypes_orig):
             # Force object if any of the dtypes is an object
             dtype_orig = object
 

From b6be6b9eb38334f1f965b7859228160e83ca51b9 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 16 Jul 2024 11:25:10 +0200
Subject: [PATCH 09/17] working

---
 .../plot_time_series_lagged_features.py             |  1 -
 pyproject.toml                                      | 13 +++++++++++++
 sklearn/feature_extraction/__init__.py              |  2 +-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py
index 539ff24a6c1da..edb27ade48007 100644
--- a/examples/applications/plot_time_series_lagged_features.py
+++ b/examples/applications/plot_time_series_lagged_features.py
@@ -52,7 +52,6 @@
 # https://docs.pola.rs/user-guide/lazy/optimizations/ for more information.
 
 df = pl.read_parquet(bike_sharing_data_file)
->> >> >> > upstream / main
 
 # %%
 # Next, we take a look at the statistical summary of the dataset
diff --git a/pyproject.toml b/pyproject.toml
index 1af573efb7527..5bd410388e35f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -153,6 +153,13 @@ ignore=[
     "E731",
     # do not use variables named 'l', 'O', or 'I'
     "E741",
+    # E721 is in preview (july 2024) and gives many false positives.
+    # Use `is` and `is not` for type comparisons, or `isinstance()` for
+    # isinstance checks
+    "E721",
+    # F841 is in preview (july 2024), and we don't care much about it.
+    # Local variable ... is assigned to but never used
+    "F841",
 ]
 
 [tool.ruff.lint.flake8-copyright]
@@ -168,8 +175,14 @@ notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\
 "benchmarks/*"=["CPY001"]
 "doc/*"=["CPY001"]
 "build_tools/*"=["CPY001"]
+"sklearn/_build_utils/*"=["CPY001"]
 "maint_tools/*"=["CPY001"]
 ".github/*"=["CPY001"]
+# __doc__ is too long (>4096 chars) and therefore false positive on copyright check
+"examples/model_selection/plot_precision_recall.py"=["CPY001"]
+"examples/svm/plot_rbf_parameters.py"=["CPY001"]
+# __all__ has un-imported names
+"sklearn/__init__.py"=["F822"]
 
 
 [tool.cython-lint]
diff --git a/sklearn/feature_extraction/__init__.py b/sklearn/feature_extraction/__init__.py
index 621c8683f441e..3ca86d86bee68 100644
--- a/sklearn/feature_extraction/__init__.py
+++ b/sklearn/feature_extraction/__init__.py
@@ -3,7 +3,7 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-from . import text
+from . import image, text
 from ._dict_vectorizer import DictVectorizer
 from ._hash import FeatureHasher
 from .image import grid_to_graph, img_to_graph

From 2691838f691878a978303e63acc0bd7300f6e8a9 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 16 Jul 2024 11:29:09 +0200
Subject: [PATCH 10/17] bump ruff version

---
 .pre-commit-config.yaml      | 2 +-
 pyproject.toml               | 2 +-
 sklearn/_min_dependencies.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index abe14acc7778c..e8730b679a5d6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ repos:
     -   id: trailing-whitespace
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.2.1
+    rev: v0.5.1
     hooks:
     -   id: ruff
         args: ["--fix", "--output-format=full"]
diff --git a/pyproject.toml b/pyproject.toml
index 5bd410388e35f..589ee326ef37e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -81,7 +81,7 @@ tests = [
     "pandas>=1.1.5",
     "pytest>=7.1.2",
     "pytest-cov>=2.9.0",
-    "ruff>=0.2.1",
+    "ruff>=0.5.1",
     "black>=24.3.0",
     "mypy>=1.9",
     "pyamg>=4.0.0",
diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py
index f7df37bedda0c..eefaac0f76411 100644
--- a/sklearn/_min_dependencies.py
+++ b/sklearn/_min_dependencies.py
@@ -32,7 +32,7 @@
     "memory_profiler": ("0.57.0", "benchmark, docs"),
     "pytest": (PYTEST_MIN_VERSION, "tests"),
     "pytest-cov": ("2.9.0", "tests"),
-    "ruff": ("0.2.1", "tests"),
+    "ruff": ("0.5.1", "tests"),
     "black": ("24.3.0", "tests"),
     "mypy": ("1.9", "tests"),
     "pyamg": ("4.0.0", "tests"),

From b8edc15b26f1fa7143d96420e96091be9f263f85 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Wed, 17 Jul 2024 16:24:25 +0200
Subject: [PATCH 11/17] remove leftover authors

---
 examples/linear_model/plot_ard.py                         | 2 --
 examples/linear_model/plot_bayesian_ridge_curvefit.py     | 2 --
 examples/linear_model/plot_lasso_and_elasticnet.py        | 2 --
 examples/linear_model/plot_ridge_coeffs.py                | 2 --
 .../plot_sparse_logistic_regression_20newsgroups.py       | 2 --
 examples/manifold/plot_compare_methods.py                 | 2 --
 examples/mixture/plot_gmm_init.py                         | 3 ---
 .../model_selection/plot_grid_search_refit_callable.py    | 2 --
 examples/neighbors/plot_kde_1d.py                         | 2 --
 sklearn/_loss/link.py                                     | 2 --
 sklearn/cluster/_bisect_k_means.py                        | 2 --
 sklearn/decomposition/_lda.py                             | 2 --
 sklearn/decomposition/_truncated_svd.py                   | 5 -----
 sklearn/ensemble/_hist_gradient_boosting/binning.py       | 2 --
 .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 2 --
 sklearn/ensemble/_hist_gradient_boosting/grower.py        | 2 --
 sklearn/ensemble/_hist_gradient_boosting/predictor.py     | 2 --
 sklearn/feature_selection/_mutual_info.py                 | 2 --
 sklearn/feature_selection/_variance_threshold.py          | 3 +--
 sklearn/linear_model/_logistic.py                         | 8 --------
 sklearn/manifold/_mds.py                                  | 3 ---
 sklearn/neighbors/_kde.py                                 | 1 -
 22 files changed, 1 insertion(+), 54 deletions(-)

diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py
index 9b1c355ef0ef5..46cc619718afb 100644
--- a/examples/linear_model/plot_ard.py
+++ b/examples/linear_model/plot_ard.py
@@ -22,8 +22,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Arturo Amor <david-arturo.amor-quiroz@inria.fr>
-
 # %%
 # Models robustness to recover the ground truth weights
 # =====================================================
diff --git a/examples/linear_model/plot_bayesian_ridge_curvefit.py b/examples/linear_model/plot_bayesian_ridge_curvefit.py
index f1c86a196c2a3..45679580dbf34 100644
--- a/examples/linear_model/plot_bayesian_ridge_curvefit.py
+++ b/examples/linear_model/plot_bayesian_ridge_curvefit.py
@@ -29,8 +29,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Yoshihiro Uchida <nimbus1after2a1sun7shower@gmail.com>
-
 # %%
 # Generate sinusoidal data with noise
 # -----------------------------------
diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py
index f0eb2400c95ef..2db597fac96ac 100644
--- a/examples/linear_model/plot_lasso_and_elasticnet.py
+++ b/examples/linear_model/plot_lasso_and_elasticnet.py
@@ -24,8 +24,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Arturo Amor <david-arturo.amor-quiroz@inria.fr>
-
 # %%
 # Generate synthetic dataset
 # --------------------------
diff --git a/examples/linear_model/plot_ridge_coeffs.py b/examples/linear_model/plot_ridge_coeffs.py
index 0cff8c79a55bb..1ad7962f8bfa3 100644
--- a/examples/linear_model/plot_ridge_coeffs.py
+++ b/examples/linear_model/plot_ridge_coeffs.py
@@ -54,8 +54,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Kornel Kielczewski -- <kornel.k@plusnet.pl>
-
 # %%
 # Purpose of this example
 # -----------------------
diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
index fc6b1c57d7ad7..fdf914f3a7ab2 100644
--- a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
+++ b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
@@ -23,8 +23,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Arthur Mensch
-
 import timeit
 import warnings
 
diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py
index 9c123aadda8ea..30ce4e5d8d897 100644
--- a/examples/manifold/plot_compare_methods.py
+++ b/examples/manifold/plot_compare_methods.py
@@ -23,8 +23,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Jake Vanderplas -- <vanderplas@astro.washington.edu>
-
 # %%
 # Dataset preparation
 # -------------------
diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py
index dd3d1c8a22692..0178d4a07af11 100644
--- a/examples/mixture/plot_gmm_init.py
+++ b/examples/mixture/plot_gmm_init.py
@@ -36,9 +36,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Gordon Walsh <gordon.p.walsh@gmail.com>
-# Data generation code from Jake Vanderplas <vanderplas@astro.washington.edu>
-
 from timeit import default_timer as timer
 
 import matplotlib.pyplot as plt
diff --git a/examples/model_selection/plot_grid_search_refit_callable.py b/examples/model_selection/plot_grid_search_refit_callable.py
index 9a8bf3c70d9cc..2b13ee5ad584c 100644
--- a/examples/model_selection/plot_grid_search_refit_callable.py
+++ b/examples/model_selection/plot_grid_search_refit_callable.py
@@ -21,8 +21,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Wenhao Zhang <wenhaoz@ucla.edu>
-
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/neighbors/plot_kde_1d.py b/examples/neighbors/plot_kde_1d.py
index 83734ec70bcfc..ed5a454e476ad 100644
--- a/examples/neighbors/plot_kde_1d.py
+++ b/examples/neighbors/plot_kde_1d.py
@@ -31,8 +31,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Jake Vanderplas <jakevdp@cs.washington.edu>
-#
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy.stats import norm
diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py
index 1bb290aecc64a..53dff6c2e9285 100644
--- a/sklearn/_loss/link.py
+++ b/sklearn/_loss/link.py
@@ -5,8 +5,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Christian Lorentzen <lorentzen.ch@gmail.com>
-
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 
diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py
index d615447d913eb..134d998c90a92 100644
--- a/sklearn/cluster/_bisect_k_means.py
+++ b/sklearn/cluster/_bisect_k_means.py
@@ -3,8 +3,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Michal Krawczyk <mkrwczyk.1@gmail.com>
-
 import warnings
 
 import numpy as np
diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py
index f987449f1d057..ed48eb8befa63 100644
--- a/sklearn/decomposition/_lda.py
+++ b/sklearn/decomposition/_lda.py
@@ -11,8 +11,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Chyi-Kwei Yau
-# Author: Matthew D. Hoffman (original onlineldavb implementation)
 from numbers import Integral, Real
 
 import numpy as np
diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py
index b50ed239c6b19..a68e96fdd0c03 100644
--- a/sklearn/decomposition/_truncated_svd.py
+++ b/sklearn/decomposition/_truncated_svd.py
@@ -3,11 +3,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Lars Buitinck
-#         Olivier Grisel <olivier.grisel@ensta.org>
-#         Michael Becker <mike@beckerfuffle.com>
-# License: 3-clause BSD.
-
 from numbers import Integral, Real
 
 import numpy as np
diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py
index 551b8ffe92b06..ed1bca8558f81 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/binning.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py
@@ -9,8 +9,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Nicolas Hug
-
 import numpy as np
 
 from ...base import BaseEstimator, TransformerMixin
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 0735cd70b134e..8db6f7e4d5ff4 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -3,8 +3,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Nicolas Hug
-
 import itertools
 import warnings
 from abc import ABC, abstractmethod
diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index 98de6cd428e67..a71e564056f8f 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -8,8 +8,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Nicolas Hug
-
 import numbers
 from heapq import heappop, heappush
 from timeit import default_timer as time
diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
index 7ab9542ef6e0f..59bb6499c4501 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
@@ -5,8 +5,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Nicolas Hug
-
 import numpy as np
 
 from ._predictor import (
diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py
index 42afdfb10dc3a..ede6fa9a21c34 100644
--- a/sklearn/feature_selection/_mutual_info.py
+++ b/sklearn/feature_selection/_mutual_info.py
@@ -1,7 +1,5 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
-# Author: Nikolay Mayorov <n59_ru@hotmail.com>
-# License: 3-clause BSD
 
 from numbers import Integral
 
diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py
index a9b360037590f..7494b72c1acb8 100644
--- a/sklearn/feature_selection/_variance_threshold.py
+++ b/sklearn/feature_selection/_variance_threshold.py
@@ -1,7 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
-# Author: Lars Buitinck
-# License: 3-clause BSD
+
 from numbers import Real
 
 import numpy as np
diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
index 6362a7fa45395..fd34daed115e2 100644
--- a/sklearn/linear_model/_logistic.py
+++ b/sklearn/linear_model/_logistic.py
@@ -5,14 +5,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
-#         Fabian Pedregosa <f@bianp.net>
-#         Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
-#         Manoj Kumar <manojkumarsivaraj334@gmail.com>
-#         Lars Buitinck
-#         Simon Wu <s8wu@uwaterloo.ca>
-#         Arthur Mensch <arthur.mensch@m4x.org
-
 import numbers
 import warnings
 from numbers import Integral, Real
diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py
index db8ae2ff40444..67e8b3f913f06 100644
--- a/sklearn/manifold/_mds.py
+++ b/sklearn/manifold/_mds.py
@@ -5,9 +5,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# author: Nelle Varoquaux <nelle.varoquaux@gmail.com>
-# SPDX-License-Identifier: BSD-3-Clause
-
 import warnings
 from numbers import Integral, Real
 
diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py
index 0e4b45cc77e36..73c50e848ae2b 100644
--- a/sklearn/neighbors/_kde.py
+++ b/sklearn/neighbors/_kde.py
@@ -6,7 +6,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Author: Jake Vanderplas <jakevdp@cs.washington.edu>
 import itertools
 from numbers import Integral, Real
 

From 96f80c7ad25cc411aeb7b674bc23d340d08163f3 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Wed, 17 Jul 2024 16:39:45 +0200
Subject: [PATCH 12/17] Jeremie's comments

---
 pyproject.toml                  | 2 +-
 sklearn/utils/_show_versions.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 40c04d3840552..2e26327324b9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -163,7 +163,7 @@ ignore=[
 ]
 
 [tool.ruff.lint.flake8-copyright]
-notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\-License\\-Identifier:\\ BSD\\-3\\-Clause"
+notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\-License\\-Identifier:\\ BSD\\-3\\-Clause\\\n"
 
 [tool.ruff.lint.per-file-ignores]
 # It's fine not to put the import at the top of the file in the examples
diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py
index 43d2c15dbd7a5..cbdece30db326 100644
--- a/sklearn/utils/_show_versions.py
+++ b/sklearn/utils/_show_versions.py
@@ -7,8 +7,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# SPDX-License-Identifier: BSD-3-Clause
-
 import platform
 import sys
 

From b7bb8284d6485e10029fcbe3ad23dd55b9eb93d1 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Fri, 19 Jul 2024 17:29:19 +0200
Subject: [PATCH 13/17] remove more leftover authors

---
 benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py | 2 --
 examples/applications/plot_digits_denoising.py              | 3 ---
 examples/cluster/plot_inductive_clustering.py               | 3 ---
 examples/compose/plot_compare_reduction.py                  | 4 ----
 examples/model_selection/plot_likelihood_ratios.py          | 2 --
 sklearn/cluster/_mean_shift.py                              | 5 -----
 sklearn/cluster/tests/test_feature_agglomeration.py         | 1 -
 sklearn/metrics/_scorer.py                                  | 5 -----
 sklearn/preprocessing/_csr_polynomial_expansion.pyx         | 5 +++--
 9 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
index 26789c173688f..a468f7b3e1abf 100644
--- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
+++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
@@ -36,8 +36,6 @@
 of components (this takes more time).
 """
 
-# Authors: Sylvain MARIE, Schneider Electric
-
 import time
 
 import matplotlib.pyplot as plt
diff --git a/examples/applications/plot_digits_denoising.py b/examples/applications/plot_digits_denoising.py
index 73b5a8034f8d6..8ca31da6a74d2 100644
--- a/examples/applications/plot_digits_denoising.py
+++ b/examples/applications/plot_digits_denoising.py
@@ -24,9 +24,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Authors: Guillaume Lemaitre <guillaume.lemaitre@inria.fr>
-# Licence: BSD 3 clause
-
 # %%
 # Load the dataset via OpenML
 # ---------------------------
diff --git a/examples/cluster/plot_inductive_clustering.py b/examples/cluster/plot_inductive_clustering.py
index b92a814cd4ef3..29846b15cdb60 100644
--- a/examples/cluster/plot_inductive_clustering.py
+++ b/examples/cluster/plot_inductive_clustering.py
@@ -23,9 +23,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Authors: Chirag Nagpal
-#          Christos Aridas
-
 import matplotlib.pyplot as plt
 
 from sklearn.base import BaseEstimator, clone
diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py
index 2aeb9fae4af5e..cfe22d646244c 100644
--- a/examples/compose/plot_compare_reduction.py
+++ b/examples/compose/plot_compare_reduction.py
@@ -23,10 +23,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Authors: Robert McGibbon
-#          Joel Nothman
-#          Guillaume Lemaitre
-
 # %%
 # Illustration of ``Pipeline`` and ``GridSearchCV``
 ###############################################################################
diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py
index 2fc3ad3d040f5..b5a68eb79810f 100644
--- a/examples/model_selection/plot_likelihood_ratios.py
+++ b/examples/model_selection/plot_likelihood_ratios.py
@@ -28,8 +28,6 @@ class proportion than the target application.
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Authors:  Arturo Amor <david-arturo.amor-quiroz@inria.fr>
-#           Olivier Grisel <olivier.grisel@ensta.org>
 # %%
 # Pre-test vs. post-test analysis
 # ===============================
diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
index db5c22d9650bf..900494536381a 100644
--- a/sklearn/cluster/_mean_shift.py
+++ b/sklearn/cluster/_mean_shift.py
@@ -12,11 +12,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Authors: Conrad Lee <conradlee@gmail.com>
-#          Alexandre Gramfort <alexandre.gramfort@inria.fr>
-#          Gael Varoquaux <gael.varoquaux@normalesup.org>
-#          Martino Sorbaro <martino.sorbaro@ed.ac.uk>
-
 import warnings
 from collections import defaultdict
 from numbers import Integral, Real
diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py
index 488dd638ad125..ef8596c0813f8 100644
--- a/sklearn/cluster/tests/test_feature_agglomeration.py
+++ b/sklearn/cluster/tests/test_feature_agglomeration.py
@@ -2,7 +2,6 @@
 Tests for sklearn.cluster._feature_agglomeration
 """
 
-# Authors: Sergul Aydore 2017
 import warnings
 
 import numpy as np
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 385ed1564bc58..b735da25d577c 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -16,11 +16,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Authors: Andreas Mueller <amueller@ais.uni-bonn.de>
-#          Lars Buitinck
-#          Arnaud Joly <arnaud.v.joly@gmail.com>
-# License: Simplified BSD
-
 import copy
 import warnings
 from collections import Counter
diff --git a/sklearn/preprocessing/_csr_polynomial_expansion.pyx b/sklearn/preprocessing/_csr_polynomial_expansion.pyx
index 017af83f035b2..38e5c3069d252 100644
--- a/sklearn/preprocessing/_csr_polynomial_expansion.pyx
+++ b/sklearn/preprocessing/_csr_polynomial_expansion.pyx
@@ -1,5 +1,6 @@
-# Authors: Andrew nystrom <awnystrom@gmail.com>
-#          Meekail Zain <zainmeekail@gmail.com>
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 from ..utils._typedefs cimport uint8_t, int64_t, intp_t
 
 ctypedef uint8_t FLAG_t

From 1c0d9e65c03b035684b671532e14147bec029286 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Fri, 19 Jul 2024 17:33:58 +0200
Subject: [PATCH 14/17] remove Code Source instances

---
 examples/classification/plot_classifier_comparison.py        | 5 -----
 examples/cluster/plot_cluster_iris.py                        | 4 ----
 examples/cluster/plot_digits_agglomeration.py                | 4 ----
 examples/compose/plot_digits_pipe.py                         | 4 ----
 examples/datasets/plot_digits_last_image.py                  | 4 ----
 examples/datasets/plot_iris_dataset.py                       | 4 ----
 examples/decomposition/plot_pca_iris.py                      | 3 ---
 examples/linear_model/plot_iris_logistic.py                  | 4 ----
 examples/linear_model/plot_logistic.py                       | 3 ---
 examples/linear_model/plot_ols.py                            | 3 ---
 examples/linear_model/plot_ols_3d.py                         | 4 ----
 examples/linear_model/plot_ols_ridge_variance.py             | 5 -----
 examples/preprocessing/plot_discretization_classification.py | 5 -----
 examples/svm/plot_svm_kernels.py                             | 3 ---
 examples/svm/plot_svm_margin.py                              | 4 ----
 examples/svm/plot_svm_tie_breaking.py                        | 3 ---
 16 files changed, 62 deletions(-)

diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py
index 3108618091b4f..5747d00ba7950 100644
--- a/examples/classification/plot_classifier_comparison.py
+++ b/examples/classification/plot_classifier_comparison.py
@@ -22,11 +22,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-#              Andreas Müller
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib.colors import ListedColormap
diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py
index e469eeb3a86ff..1a34a9b3534bc 100644
--- a/examples/cluster/plot_cluster_iris.py
+++ b/examples/cluster/plot_cluster_iris.py
@@ -21,10 +21,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 
 # Though the following import is not directly being used, it is required
diff --git a/examples/cluster/plot_digits_agglomeration.py b/examples/cluster/plot_digits_agglomeration.py
index d4a427905d91f..8de14b0729f53 100644
--- a/examples/cluster/plot_digits_agglomeration.py
+++ b/examples/cluster/plot_digits_agglomeration.py
@@ -11,10 +11,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py
index c23e9b3b96a08..8a202bb5bd74c 100644
--- a/examples/compose/plot_digits_pipe.py
+++ b/examples/compose/plot_digits_pipe.py
@@ -13,10 +13,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 import polars as pl
diff --git a/examples/datasets/plot_digits_last_image.py b/examples/datasets/plot_digits_last_image.py
index 836db4b37758c..eada3b12588da 100644
--- a/examples/datasets/plot_digits_last_image.py
+++ b/examples/datasets/plot_digits_last_image.py
@@ -17,10 +17,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 
 from sklearn import datasets
diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index ea9c89bbb6082..d9560e51ef245 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -18,10 +18,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 # %%
 # Loading the iris dataset
 # ------------------------
diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py
index 66c7bc7994129..1ceecc0058b67 100644
--- a/examples/decomposition/plot_pca_iris.py
+++ b/examples/decomposition/plot_pca_iris.py
@@ -13,9 +13,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 
 # unused but required import for doing 3d projections with matplotlib < 3.2
diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py
index 288de6a886550..481312c94c789 100644
--- a/examples/linear_model/plot_iris_logistic.py
+++ b/examples/linear_model/plot_iris_logistic.py
@@ -13,10 +13,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 
 from sklearn import datasets
diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py
index 8ddd4deef2a8f..b54c1fbf1340d 100644
--- a/examples/linear_model/plot_logistic.py
+++ b/examples/linear_model/plot_logistic.py
@@ -12,9 +12,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gael Varoquaux
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy.special import expit
diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py
index bb4a242cdbad2..8aaa35ed8d899 100644
--- a/examples/linear_model/plot_ols.py
+++ b/examples/linear_model/plot_ols.py
@@ -17,9 +17,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py
index 38fbcca546297..cd848f659e8d8 100644
--- a/examples/linear_model/plot_ols_3d.py
+++ b/examples/linear_model/plot_ols_3d.py
@@ -12,10 +12,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 # %%
 # First we load the diabetes dataset.
 
diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py
index 86c28b4d3ae23..a65cc6eb7b7d1 100644
--- a/examples/linear_model/plot_ols_ridge_variance.py
+++ b/examples/linear_model/plot_ols_ridge_variance.py
@@ -22,11 +22,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
-
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py
index 310089e296883..1eeb9f169bf3b 100644
--- a/examples/preprocessing/plot_discretization_classification.py
+++ b/examples/preprocessing/plot_discretization_classification.py
@@ -31,11 +31,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Tom Dupré la Tour
-# Adapted from plot_classifier_comparison by Gaël Varoquaux and Andreas Müller
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib.colors import ListedColormap
diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py
index 480293a03b9f0..798e62bbb7b4e 100644
--- a/examples/svm/plot_svm_kernels.py
+++ b/examples/svm/plot_svm_kernels.py
@@ -39,9 +39,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# SPDX-License-Identifier: BSD-3-Clause
-
 # %%
 # Creating a dataset
 # ------------------
diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py
index 671b7f4902bc8..f38858bb714a9 100644
--- a/examples/svm/plot_svm_margin.py
+++ b/examples/svm/plot_svm_margin.py
@@ -16,10 +16,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Gaël Varoquaux
-# Modified for documentation by Jaques Grobler
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/svm/plot_svm_tie_breaking.py b/examples/svm/plot_svm_tie_breaking.py
index a9276efd44572..b5f4fb8dd18c3 100644
--- a/examples/svm/plot_svm_tie_breaking.py
+++ b/examples/svm/plot_svm_tie_breaking.py
@@ -17,9 +17,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Code source: Andreas Mueller, Adrin Jalali
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 

From b5db517f0c810254ab60c9a676824b1b44e1faf8 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Fri, 19 Jul 2024 17:37:31 +0200
Subject: [PATCH 15/17] remove duplicate license statements

---
 examples/ensemble/plot_adaboost_multiclass.py | 3 ---
 examples/neighbors/plot_nca_classification.py | 2 --
 examples/neighbors/plot_nca_dim_reduction.py  | 2 --
 examples/neighbors/plot_nca_illustration.py   | 2 --
 sklearn/neural_network/__init__.py            | 2 --
 sklearn/utils/optimize.py                     | 2 --
 6 files changed, 13 deletions(-)

diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py
index db0200997d9e0..a18ff4e09c7bb 100644
--- a/examples/ensemble/plot_adaboost_multiclass.py
+++ b/examples/ensemble/plot_adaboost_multiclass.py
@@ -28,9 +28,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Noel Dawe <noel.dawe@gmail.com>
-# SPDX-License-Identifier: BSD-3-Clause
-
 # %%
 # Creating the dataset
 # --------------------
diff --git a/examples/neighbors/plot_nca_classification.py b/examples/neighbors/plot_nca_classification.py
index 496038cec7f88..b8d69b82fec42 100644
--- a/examples/neighbors/plot_nca_classification.py
+++ b/examples/neighbors/plot_nca_classification.py
@@ -18,8 +18,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 from matplotlib.colors import ListedColormap
 
diff --git a/examples/neighbors/plot_nca_dim_reduction.py b/examples/neighbors/plot_nca_dim_reduction.py
index 1e6305549a46c..fcf2b0f602d20 100644
--- a/examples/neighbors/plot_nca_dim_reduction.py
+++ b/examples/neighbors/plot_nca_dim_reduction.py
@@ -31,8 +31,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/neighbors/plot_nca_illustration.py b/examples/neighbors/plot_nca_illustration.py
index 8d847f28d6e4d..e67bdb4b2d4d7 100644
--- a/examples/neighbors/plot_nca_illustration.py
+++ b/examples/neighbors/plot_nca_illustration.py
@@ -13,8 +13,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# SPDX-License-Identifier: BSD-3-Clause
-
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib import cm
diff --git a/sklearn/neural_network/__init__.py b/sklearn/neural_network/__init__.py
index fa580a8b40642..fa5980ce24f5c 100644
--- a/sklearn/neural_network/__init__.py
+++ b/sklearn/neural_network/__init__.py
@@ -3,8 +3,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# SPDX-License-Identifier: BSD-3-Clause
-
 from ._multilayer_perceptron import MLPClassifier, MLPRegressor
 from ._rbm import BernoulliRBM
 
diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py
index 519f87470343c..fe60136c12d09 100644
--- a/sklearn/utils/optimize.py
+++ b/sklearn/utils/optimize.py
@@ -14,8 +14,6 @@
 
 # This is a modified file from scipy.optimize
 # Original authors: Travis Oliphant, Eric Jones
-# Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour
-# SPDX-License-Identifier: BSD-3-Clause
 
 import warnings
 

From 3c71f31837a07db63eb0096f8a182f4c6bf40202 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Mon, 22 Jul 2024 13:59:21 +0200
Subject: [PATCH 16/17] ensure the newline after

---
 benchmarks/bench_plot_parallel_pairwise.py                      | 1 +
 examples/calibration/plot_calibration_curve.py                  | 1 +
 examples/calibration/plot_compare_calibration.py                | 1 +
 examples/cluster/plot_agglomerative_dendrogram.py               | 1 +
 examples/cluster/plot_hdbscan.py                                | 1 +
 .../linear_model/plot_poisson_regression_non_normal_loss.py     | 1 +
 .../linear_model/plot_tweedie_regression_insurance_claims.py    | 1 +
 examples/neighbors/plot_caching_nearest_neighbors.py            | 1 +
 sklearn/_build_utils/tempita.py                                 | 1 +
 sklearn/cluster/tests/test_hierarchical.py                      | 1 +
 sklearn/cluster/tests/test_optics.py                            | 1 +
 sklearn/compose/_column_transformer.py                          | 1 +
 sklearn/conftest.py                                             | 1 +
 sklearn/covariance/_graph_lasso.py                              | 1 +
 sklearn/datasets/_base.py                                       | 1 +
 sklearn/datasets/_openml.py                                     | 1 +
 sklearn/ensemble/tests/test_bagging.py                          | 1 +
 sklearn/feature_extraction/_stop_words.py                       | 1 +
 sklearn/impute/_iterative.py                                    | 1 +
 sklearn/inspection/_pd_utils.py                                 | 2 ++
 sklearn/inspection/_plot/decision_boundary.py                   | 1 +
 sklearn/inspection/_plot/partial_dependence.py                  | 1 +
 sklearn/kernel_ridge.py                                         | 1 +
 sklearn/linear_model/_glm/__init__.py                           | 1 +
 sklearn/linear_model/_glm/_newton_solver.py                     | 1 +
 sklearn/linear_model/_glm/glm.py                                | 1 +
 sklearn/linear_model/_glm/tests/test_glm.py                     | 1 +
 sklearn/linear_model/_passive_aggressive.py                     | 1 +
 sklearn/linear_model/_perceptron.py                             | 1 +
 sklearn/linear_model/_quantile.py                               | 1 +
 sklearn/linear_model/_sgd_fast.pxd                              | 1 +
 sklearn/linear_model/_stochastic_gradient.py                    | 1 +
 sklearn/linear_model/tests/test_theil_sen.py                    | 1 +
 sklearn/manifold/_isomap.py                                     | 1 +
 sklearn/metrics/_pairwise_distances_reduction/__init__.py       | 1 +
 sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py    | 1 +
 sklearn/metrics/_plot/confusion_matrix.py                       | 1 +
 sklearn/metrics/_plot/det_curve.py                              | 1 +
 sklearn/metrics/_plot/precision_recall_curve.py                 | 1 +
 sklearn/metrics/_plot/regression.py                             | 1 +
 sklearn/metrics/_plot/roc_curve.py                              | 1 +
 sklearn/metrics/cluster/_bicluster.py                           | 1 +
 sklearn/mixture/tests/test_bayesian_mixture.py                  | 1 +
 sklearn/model_selection/_classification_threshold.py            | 1 +
 sklearn/model_selection/_plot.py                                | 1 +
 sklearn/model_selection/_search_successive_halving.py           | 1 +
 sklearn/naive_bayes.py                                          | 1 +
 sklearn/neighbors/_base.py                                      | 1 +
 sklearn/neighbors/_classification.py                            | 1 +
 sklearn/neighbors/_graph.py                                     | 1 +
 sklearn/neighbors/_regression.py                                | 1 -
 sklearn/preprocessing/_function_transformer.py                  | 1 +
 sklearn/preprocessing/_target_encoder.py                        | 1 +
 sklearn/semi_supervised/_label_propagation.py                   | 1 +
 sklearn/svm/_base.py                                            | 1 +
 sklearn/svm/_classes.py                                         | 1 +
 sklearn/tree/_export.py                                         | 1 +
 sklearn/utils/_arpack.py                                        | 1 +
 sklearn/utils/_available_if.py                                  | 1 +
 sklearn/utils/_bunch.py                                         | 1 +
 sklearn/utils/_chunking.py                                      | 1 +
 sklearn/utils/_encode.py                                        | 1 +
 sklearn/utils/_estimator_html_repr.py                           | 1 +
 sklearn/utils/_fast_dict.pxd                                    | 1 +
 sklearn/utils/_indexing.py                                      | 1 +
 sklearn/utils/_joblib.py                                        | 1 +
 sklearn/utils/_mask.py                                          | 1 +
 sklearn/utils/_missing.py                                       | 1 +
 sklearn/utils/_mocking.py                                       | 1 +
 sklearn/utils/_optional_dependencies.py                         | 2 ++
 sklearn/utils/_param_validation.py                              | 1 +
 sklearn/utils/_plotting.py                                      | 1 +
 sklearn/utils/_random.pyx                                       | 1 +
 sklearn/utils/_set_output.py                                    | 1 +
 sklearn/utils/_tags.py                                          | 1 +
 sklearn/utils/_testing.py                                       | 1 +
 sklearn/utils/_user_interface.py                                | 1 +
 sklearn/utils/deprecation.py                                    | 1 +
 sklearn/utils/metaestimators.py                                 | 1 +
 sklearn/utils/random.py                                         | 1 +
 sklearn/utils/sparsefuncs.py                                    | 1 +
 sklearn/utils/stats.py                                          | 1 +
 sklearn/utils/tests/test_extmath.py                             | 1 +
 83 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/benchmarks/bench_plot_parallel_pairwise.py b/benchmarks/bench_plot_parallel_pairwise.py
index a6eb6c04bffab..5b7cf81f8fce4 100644
--- a/benchmarks/bench_plot_parallel_pairwise.py
+++ b/benchmarks/bench_plot_parallel_pairwise.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import time
 
 import matplotlib.pyplot as plt
diff --git a/examples/calibration/plot_calibration_curve.py b/examples/calibration/plot_calibration_curve.py
index 03bc28e25740a..1c5e297026ae7 100644
--- a/examples/calibration/plot_calibration_curve.py
+++ b/examples/calibration/plot_calibration_curve.py
@@ -14,6 +14,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 # Dataset
 # -------
diff --git a/examples/calibration/plot_compare_calibration.py b/examples/calibration/plot_compare_calibration.py
index 33898d8f151f3..aa60de1032765 100644
--- a/examples/calibration/plot_compare_calibration.py
+++ b/examples/calibration/plot_compare_calibration.py
@@ -19,6 +19,7 @@
 # %%
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 #
 # Dataset
 # -------
diff --git a/examples/cluster/plot_agglomerative_dendrogram.py b/examples/cluster/plot_agglomerative_dendrogram.py
index 2acb3675ff7c1..bea2a5e84653a 100644
--- a/examples/cluster/plot_agglomerative_dendrogram.py
+++ b/examples/cluster/plot_agglomerative_dendrogram.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 """
 =========================================
 Plot Hierarchical Clustering Dendrogram
diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py
index 07cea76ae072a..64d4936694bf3 100644
--- a/examples/cluster/plot_hdbscan.py
+++ b/examples/cluster/plot_hdbscan.py
@@ -15,6 +15,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 # %%
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 180ee3b70671c..741a92767e953 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 """
 ======================================
 Poisson regression and non-normal loss
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 31a91fb37c766..1e987bfaa6dc2 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 """
 ======================================
 Tweedie regression on insurance claims
diff --git a/examples/neighbors/plot_caching_nearest_neighbors.py b/examples/neighbors/plot_caching_nearest_neighbors.py
index f5c3ea200943f..ea6a884c3d486 100644
--- a/examples/neighbors/plot_caching_nearest_neighbors.py
+++ b/examples/neighbors/plot_caching_nearest_neighbors.py
@@ -20,6 +20,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from tempfile import TemporaryDirectory
 
 import matplotlib.pyplot as plt
diff --git a/sklearn/_build_utils/tempita.py b/sklearn/_build_utils/tempita.py
index fd59fe51c7ba6..c92ea17d2a9b9 100644
--- a/sklearn/_build_utils/tempita.py
+++ b/sklearn/_build_utils/tempita.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import argparse
 import os
 
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index a36e5537a3636..65f8184e3f444 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -5,6 +5,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import itertools
 import shutil
 from functools import partial
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index 9c1c13a9e12e0..95324704f6371 100644
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 
 import numpy as np
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index a5aa7db17d4ae..5e08a68b63c16 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -6,6 +6,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from collections import Counter, UserList
 from itertools import chain
diff --git a/sklearn/conftest.py b/sklearn/conftest.py
index 3dbca84a33ea9..a22c2ec5d39f6 100644
--- a/sklearn/conftest.py
+++ b/sklearn/conftest.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import builtins
 import platform
 import sys
diff --git a/sklearn/covariance/_graph_lasso.py b/sklearn/covariance/_graph_lasso.py
index 2007aa4085bd2..1de2aeee6c590 100644
--- a/sklearn/covariance/_graph_lasso.py
+++ b/sklearn/covariance/_graph_lasso.py
@@ -4,6 +4,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import operator
 import sys
 import time
diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index 62055d296402b..aaf3d738f85f4 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -4,6 +4,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import csv
 import gzip
 import hashlib
diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py
index e270676272b03..4790431506bce 100644
--- a/sklearn/datasets/_openml.py
+++ b/sklearn/datasets/_openml.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import gzip
 import hashlib
 import json
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index e0558917c59ba..4b1c4323d509a 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -4,6 +4,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from itertools import cycle, product
 
 import joblib
diff --git a/sklearn/feature_extraction/_stop_words.py b/sklearn/feature_extraction/_stop_words.py
index ac5c9f495ae84..6bc8e6d2f37dc 100644
--- a/sklearn/feature_extraction/_stop_words.py
+++ b/sklearn/feature_extraction/_stop_words.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 # This list of English stop words is taken from the "Glasgow Information
 # Retrieval Group". The original list can be found at
 # http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words
diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py
index 22cede929ebb3..e2d06844611c9 100644
--- a/sklearn/impute/_iterative.py
+++ b/sklearn/impute/_iterative.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from collections import namedtuple
 from numbers import Integral, Real
diff --git a/sklearn/inspection/_pd_utils.py b/sklearn/inspection/_pd_utils.py
index 4d890212e2838..a48ba4d9a4490 100644
--- a/sklearn/inspection/_pd_utils.py
+++ b/sklearn/inspection/_pd_utils.py
@@ -1,5 +1,7 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
+
 def _check_feature_names(X, feature_names=None):
     """Check feature names.
 
diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py
index b87316f670cc9..3a9cc17df72a2 100644
--- a/sklearn/inspection/_plot/decision_boundary.py
+++ b/sklearn/inspection/_plot/decision_boundary.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 from ...base import is_regressor
diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py
index 8a5118df6862b..ecb421ccdd68a 100644
--- a/sklearn/inspection/_plot/partial_dependence.py
+++ b/sklearn/inspection/_plot/partial_dependence.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numbers
 from itertools import chain
 from math import ceil
diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py
index 31eade5255e75..443dc7cc9e483 100644
--- a/sklearn/kernel_ridge.py
+++ b/sklearn/kernel_ridge.py
@@ -2,6 +2,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from numbers import Real
 
 import numpy as np
diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py
index 199b938b023d0..d0a51e65d3211 100644
--- a/sklearn/linear_model/_glm/__init__.py
+++ b/sklearn/linear_model/_glm/__init__.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from .glm import (
     GammaRegressor,
     PoissonRegressor,
diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py
index b2be604d931c5..400ac79c7c55c 100644
--- a/sklearn/linear_model/_glm/_newton_solver.py
+++ b/sklearn/linear_model/_glm/_newton_solver.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 """
 Newton solver for Generalized Linear Models
 """
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 14caa4fd733c2..b8f8d9b11f99b 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 """
 Generalized Linear Models with Exponential Dispersion Family
 """
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 7f6ec64c15ad4..bf30b071e290b 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import itertools
 import warnings
 from functools import partial
diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py
index 8d4dc2d3a2c23..61eb06edae85f 100644
--- a/sklearn/linear_model/_passive_aggressive.py
+++ b/sklearn/linear_model/_passive_aggressive.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from numbers import Real
 
 from ..base import _fit_context
diff --git a/sklearn/linear_model/_perceptron.py b/sklearn/linear_model/_perceptron.py
index d15668bebfef1..e93200ba385fa 100644
--- a/sklearn/linear_model/_perceptron.py
+++ b/sklearn/linear_model/_perceptron.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from numbers import Real
 
 from ..utils._param_validation import Interval, StrOptions
diff --git a/sklearn/linear_model/_quantile.py b/sklearn/linear_model/_quantile.py
index 79d2e6b67ca5e..d70d69a7d45ff 100644
--- a/sklearn/linear_model/_quantile.py
+++ b/sklearn/linear_model/_quantile.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from numbers import Real
 
diff --git a/sklearn/linear_model/_sgd_fast.pxd b/sklearn/linear_model/_sgd_fast.pxd
index c6991b8aa5bc2..bf21557b61e81 100644
--- a/sklearn/linear_model/_sgd_fast.pxd
+++ b/sklearn/linear_model/_sgd_fast.pxd
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: BSD-3-Clause
+
 """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx"""
 
 cdef class LossFunction:
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 8c7bf6179de5b..ae1db0a49a442 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 """Classification, regression and One-Class SVM using Stochastic Gradient
 Descent (SGD).
 """
diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py
index b59ad639f1dcc..216415f2ee927 100644
--- a/sklearn/linear_model/tests/test_theil_sen.py
+++ b/sklearn/linear_model/tests/test_theil_sen.py
@@ -4,6 +4,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import os
 import re
 import sys
diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py
index dee67461d7fcb..f0102a1aaa979 100644
--- a/sklearn/manifold/_isomap.py
+++ b/sklearn/manifold/_isomap.py
@@ -2,6 +2,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from numbers import Integral, Real
 
diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py
index 6c8deb34dc448..926d54ea74217 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py
+++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 #
 # Pairwise Distances Reductions
 # =============================
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
index 83bb8cd2e0d5c..d8307cbe84eaa 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
+++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from abc import abstractmethod
 from typing import List
 
diff --git a/sklearn/metrics/_plot/confusion_matrix.py b/sklearn/metrics/_plot/confusion_matrix.py
index 0d5f5d84719ad..f1c9a8a3e1db5 100644
--- a/sklearn/metrics/_plot/confusion_matrix.py
+++ b/sklearn/metrics/_plot/confusion_matrix.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from itertools import product
 
 import numpy as np
diff --git a/sklearn/metrics/_plot/det_curve.py b/sklearn/metrics/_plot/det_curve.py
index 712a87237c35a..7a9b68fb2e7e9 100644
--- a/sklearn/metrics/_plot/det_curve.py
+++ b/sklearn/metrics/_plot/det_curve.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import scipy as sp
 
 from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
diff --git a/sklearn/metrics/_plot/precision_recall_curve.py b/sklearn/metrics/_plot/precision_recall_curve.py
index 869c8bc95e675..95698ee43c22b 100644
--- a/sklearn/metrics/_plot/precision_recall_curve.py
+++ b/sklearn/metrics/_plot/precision_recall_curve.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from collections import Counter
 
 from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
diff --git a/sklearn/metrics/_plot/regression.py b/sklearn/metrics/_plot/regression.py
index b079dc02524b3..11450c8311799 100644
--- a/sklearn/metrics/_plot/regression.py
+++ b/sklearn/metrics/_plot/regression.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numbers
 
 import numpy as np
diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py
index cb8718705e831..e9d4ca5d5672d 100644
--- a/sklearn/metrics/_plot/roc_curve.py
+++ b/sklearn/metrics/_plot/roc_curve.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
 from .._ranking import auc, roc_curve
 
diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py
index c6fa0775bde54..b718a5a226598 100644
--- a/sklearn/metrics/cluster/_bicluster.py
+++ b/sklearn/metrics/cluster/_bicluster.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 from scipy.optimize import linear_sum_assignment
 
diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py
index 925cd2fdb615b..3ef292d523f83 100644
--- a/sklearn/mixture/tests/test_bayesian_mixture.py
+++ b/sklearn/mixture/tests/test_bayesian_mixture.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import copy
 
 import numpy as np
diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py
index d29ad5d28c322..820b90f238723 100644
--- a/sklearn/model_selection/_classification_threshold.py
+++ b/sklearn/model_selection/_classification_threshold.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from collections.abc import MutableMapping
 from numbers import Integral, Real
 
diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py
index 0565675e87ee7..b16e0f4c1019a 100644
--- a/sklearn/model_selection/_plot.py
+++ b/sklearn/model_selection/_plot.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 from ..utils._optional_dependencies import check_matplotlib_support
diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py
index 373dbfac22be5..5980d40cb8e40 100644
--- a/sklearn/model_selection/_search_successive_halving.py
+++ b/sklearn/model_selection/_search_successive_halving.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from abc import abstractmethod
 from copy import deepcopy
 from math import ceil, floor, log
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 7992a911c1be1..e2d5a3dc24de8 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -6,6 +6,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from abc import ABCMeta, abstractmethod
 from numbers import Integral, Real
diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py
index 750dd485ed586..3dfd2df16fabd 100644
--- a/sklearn/neighbors/_base.py
+++ b/sklearn/neighbors/_base.py
@@ -2,6 +2,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import itertools
 import numbers
 import warnings
diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py
index c45be05b5fe04..9c88f5f456e0a 100644
--- a/sklearn/neighbors/_classification.py
+++ b/sklearn/neighbors/_classification.py
@@ -2,6 +2,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from numbers import Integral
 
diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py
index d22b25f785e38..7e378b20cbadc 100644
--- a/sklearn/neighbors/_graph.py
+++ b/sklearn/neighbors/_graph.py
@@ -2,6 +2,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import itertools
 
 from ..base import ClassNamePrefixFeaturesOutMixin, TransformerMixin, _fit_context
diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py
index 98a5734c5140b..5b96a64a8bc28 100644
--- a/sklearn/neighbors/_regression.py
+++ b/sklearn/neighbors/_regression.py
@@ -2,7 +2,6 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
-#                           University of Copenhagen
 
 import warnings
 
diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index 9855924b61537..4dbe1e833322c 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from functools import partial
 
diff --git a/sklearn/preprocessing/_target_encoder.py b/sklearn/preprocessing/_target_encoder.py
index 779618026def1..1855f340c624d 100644
--- a/sklearn/preprocessing/_target_encoder.py
+++ b/sklearn/preprocessing/_target_encoder.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from numbers import Integral, Real
 
 import numpy as np
diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py
index 9b8f4b8da7b37..ef32313c3c75f 100644
--- a/sklearn/semi_supervised/_label_propagation.py
+++ b/sklearn/semi_supervised/_label_propagation.py
@@ -54,6 +54,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from abc import ABCMeta, abstractmethod
 from numbers import Integral, Real
diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
index 22adea71cd27c..fb61c407e1fb8 100644
--- a/sklearn/svm/_base.py
+++ b/sklearn/svm/_base.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from abc import ABCMeta, abstractmethod
 from numbers import Integral, Real
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index 8ecdca6f47b56..9ce346780a8eb 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from numbers import Integral, Real
 
 import numpy as np
diff --git a/sklearn/tree/_export.py b/sklearn/tree/_export.py
index 14b9be332f676..9cb55f7aa1aa4 100644
--- a/sklearn/tree/_export.py
+++ b/sklearn/tree/_export.py
@@ -4,6 +4,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from collections.abc import Iterable
 from io import StringIO
 from numbers import Integral
diff --git a/sklearn/utils/_arpack.py b/sklearn/utils/_arpack.py
index 3087a0f4dbf0a..ba82127f98c43 100644
--- a/sklearn/utils/_arpack.py
+++ b/sklearn/utils/_arpack.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from .validation import check_random_state
 
 
diff --git a/sklearn/utils/_available_if.py b/sklearn/utils/_available_if.py
index 9230a35ec9ab0..b0da84189d1f3 100644
--- a/sklearn/utils/_available_if.py
+++ b/sklearn/utils/_available_if.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from functools import update_wrapper, wraps
 from types import MethodType
 
diff --git a/sklearn/utils/_bunch.py b/sklearn/utils/_bunch.py
index 4d474e88eb80e..a11e80e366135 100644
--- a/sklearn/utils/_bunch.py
+++ b/sklearn/utils/_bunch.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 
 
diff --git a/sklearn/utils/_chunking.py b/sklearn/utils/_chunking.py
index e2462fa7f7eda..6cb5bb819cec7 100644
--- a/sklearn/utils/_chunking.py
+++ b/sklearn/utils/_chunking.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import warnings
 from itertools import islice
 from numbers import Integral
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index 897980e43ce11..479b11e0f59a2 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from collections import Counter
 from contextlib import suppress
 from typing import NamedTuple
diff --git a/sklearn/utils/_estimator_html_repr.py b/sklearn/utils/_estimator_html_repr.py
index 1e5db459a04e3..5a9cd4186ffa8 100644
--- a/sklearn/utils/_estimator_html_repr.py
+++ b/sklearn/utils/_estimator_html_repr.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import html
 import itertools
 from contextlib import closing
diff --git a/sklearn/utils/_fast_dict.pxd b/sklearn/utils/_fast_dict.pxd
index f9f9bf31db1ee..e37f254661ce6 100644
--- a/sklearn/utils/_fast_dict.pxd
+++ b/sklearn/utils/_fast_dict.pxd
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 """
 Uses C++ map containers for fast dict-like behavior with keys being
 integers, and values float.
diff --git a/sklearn/utils/_indexing.py b/sklearn/utils/_indexing.py
index fd9ed46c3b506..6b4b4779db269 100644
--- a/sklearn/utils/_indexing.py
+++ b/sklearn/utils/_indexing.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numbers
 import sys
 import warnings
diff --git a/sklearn/utils/_joblib.py b/sklearn/utils/_joblib.py
index 837dd5102e918..03c10397eea1c 100644
--- a/sklearn/utils/_joblib.py
+++ b/sklearn/utils/_joblib.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 # TODO(1.7): remove this file
 
 import warnings as _warnings
diff --git a/sklearn/utils/_mask.py b/sklearn/utils/_mask.py
index 95fefaca6e67c..da21c8e68b72d 100644
--- a/sklearn/utils/_mask.py
+++ b/sklearn/utils/_mask.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from contextlib import suppress
 
 import numpy as np
diff --git a/sklearn/utils/_missing.py b/sklearn/utils/_missing.py
index f2e024df887be..daeb9ba68cc1c 100644
--- a/sklearn/utils/_missing.py
+++ b/sklearn/utils/_missing.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import math
 import numbers
 from contextlib import suppress
diff --git a/sklearn/utils/_mocking.py b/sklearn/utils/_mocking.py
index f4606d2a98c04..6653a4a17a45e 100644
--- a/sklearn/utils/_mocking.py
+++ b/sklearn/utils/_mocking.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 from ..base import BaseEstimator, ClassifierMixin
diff --git a/sklearn/utils/_optional_dependencies.py b/sklearn/utils/_optional_dependencies.py
index b2e970502c123..1de7f4479b242 100644
--- a/sklearn/utils/_optional_dependencies.py
+++ b/sklearn/utils/_optional_dependencies.py
@@ -1,5 +1,7 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
+
 def check_matplotlib_support(caller_name):
     """Raise ImportError with detailed error message if mpl is not installed.
 
diff --git a/sklearn/utils/_param_validation.py b/sklearn/utils/_param_validation.py
index e1de052bd01d3..53c9eeee65af4 100644
--- a/sklearn/utils/_param_validation.py
+++ b/sklearn/utils/_param_validation.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import functools
 import math
 import operator
diff --git a/sklearn/utils/_plotting.py b/sklearn/utils/_plotting.py
index ab3e21bafa134..8d2c7d3bf101b 100644
--- a/sklearn/utils/_plotting.py
+++ b/sklearn/utils/_plotting.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 from . import check_consistent_length
diff --git a/sklearn/utils/_random.pyx b/sklearn/utils/_random.pyx
index 838db540a210d..f0e649e60fe7c 100644
--- a/sklearn/utils/_random.pyx
+++ b/sklearn/utils/_random.pyx
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 """
 Random utility function
 =======================
diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py
index 510be7469b96f..9f1504bd513d9 100644
--- a/sklearn/utils/_set_output.py
+++ b/sklearn/utils/_set_output.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import importlib
 from functools import wraps
 from typing import Protocol, runtime_checkable
diff --git a/sklearn/utils/_tags.py b/sklearn/utils/_tags.py
index af2ca444b5d33..db8473721d2b6 100644
--- a/sklearn/utils/_tags.py
+++ b/sklearn/utils/_tags.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 _DEFAULT_TAGS = {
diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py
index 961091e4af71a..d75ca9e19cdff 100644
--- a/sklearn/utils/_testing.py
+++ b/sklearn/utils/_testing.py
@@ -2,6 +2,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import atexit
 import contextlib
 import functools
diff --git a/sklearn/utils/_user_interface.py b/sklearn/utils/_user_interface.py
index f48b934dc7c8b..8e7550b09be2c 100644
--- a/sklearn/utils/_user_interface.py
+++ b/sklearn/utils/_user_interface.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import timeit
 from contextlib import contextmanager
 
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index e6ad56264e1c3..df218029829c1 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import functools
 import warnings
 
diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py
index 000722e253d14..f962acb48f74e 100644
--- a/sklearn/utils/metaestimators.py
+++ b/sklearn/utils/metaestimators.py
@@ -2,6 +2,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 from abc import ABCMeta, abstractmethod
 from contextlib import suppress
 from typing import Any, List
diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py
index e7408de304b89..aad8b84828514 100644
--- a/sklearn/utils/random.py
+++ b/sklearn/utils/random.py
@@ -2,6 +2,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import array
 
 import numpy as np
diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py
index 8e721c6b3852c..fb29de8ad7c6e 100644
--- a/sklearn/utils/sparsefuncs.py
+++ b/sklearn/utils/sparsefuncs.py
@@ -2,6 +2,7 @@
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 import scipy.sparse as sp
 from scipy.sparse.linalg import LinearOperator
diff --git a/sklearn/utils/stats.py b/sklearn/utils/stats.py
index b931cb6b6f90d..0fc3fae8a88f0 100644
--- a/sklearn/utils/stats.py
+++ b/sklearn/utils/stats.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 
 from .extmath import stable_cumsum
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index 5b27a52d29ccf..66d3ec74f5490 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -1,5 +1,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
+
 import numpy as np
 import pytest
 from scipy import linalg, sparse

From 660d1a30d7a1d90ee490df3c6e8ef55f61405c74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?=
 <jeremie@probabl.ai>
Date: Mon, 22 Jul 2024 18:00:48 +0200
Subject: [PATCH 17/17] Update pyproject.toml

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index f51fc07a4ba1e..5cd7cd9b54391 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -177,6 +177,7 @@ notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\
 "build_tools/*"=["CPY001"]
 "sklearn/_build_utils/*"=["CPY001"]
 "maint_tools/*"=["CPY001"]
+".spin/*"=["CPY001"]
 ".github/*"=["CPY001"]
 # __doc__ is too long (>4096 chars) and therefore false positive on copyright check
 "examples/model_selection/plot_precision_recall.py"=["CPY001"]