From f05d7c4ca5f2afb2dfefc7a1255610ae8dc199f7 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 18 Jun 2024 20:37:49 +0200 Subject: [PATCH 01/17] MNT enable ruff-copyright-check --- pyproject.toml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ff7df45c1d843..4f2a33a12b38f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -140,8 +140,10 @@ exclude=[ ] [tool.ruff.lint] +# This enables us to use CPY001: copyright header check +preview = true # all rules can be found here: https://beta.ruff.rs/docs/rules/ -select = ["E", "F", "W", "I"] +select = ["E", "F", "W", "I", "CPY001"] ignore=[ # space before : (needed for how black formats slicing) "E203", @@ -151,11 +153,21 @@ ignore=[ "E741", ] +[tool.ruff.lint.flake8-copyright] +notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\-License\\-Identifier:\\ BSD\\-3\\-Clause" + [tool.ruff.lint.per-file-ignores] # It's fine not to put the import at the top of the file in the examples # folder. "examples/*"=["E402"] "doc/conf.py"=["E402"] +"**/tests/*"=["CPY001"] +"asv_benchmarks/*"=["CPY001"] +"benchmarks/*"=["CPY001"] +"doc/*"=["CPY001"] +"build_tools/*"=["CPY001"] +"maint_tools/*"=["CPY001"] +".github/*"=["CPY001"] [tool.cython-lint] From ed17f9664dbe843e7dc90292c986599ce93927a9 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 18 Jun 2024 21:05:49 +0200 Subject: [PATCH 02/17] add missing notes --- .../plot_cyclical_feature_engineering.py | 3 + .../applications/plot_digits_denoising.py | 3 + .../applications/plot_face_recognition.py | 3 + .../plot_time_series_lagged_features.py | 853 +++++++++--------- .../bicluster/plot_bicluster_newsgroups.py | 3 + .../plot_classifier_comparison.py | 3 + examples/classification/plot_lda.py | 3 + examples/classification/plot_lda_qda.py | 3 + examples/cluster/plot_affinity_propagation.py | 3 + examples/cluster/plot_bisect_kmeans.py | 3 + examples/cluster/plot_cluster_comparison.py | 3 + examples/cluster/plot_cluster_iris.py | 3 + examples/cluster/plot_dbscan.py | 3 + examples/cluster/plot_dict_face_patches.py | 3 + examples/cluster/plot_digits_agglomeration.py | 3 + examples/cluster/plot_hdbscan.py | 3 + examples/cluster/plot_inductive_clustering.py | 3 + examples/cluster/plot_kmeans_digits.py | 3 + examples/cluster/plot_kmeans_plusplus.py | 3 + .../plot_kmeans_silhouette_analysis.py | 3 + examples/cluster/plot_linkage_comparison.py | 3 + examples/cluster/plot_mean_shift.py | 3 + examples/cluster/plot_mini_batch_kmeans.py | 3 + examples/compose/plot_compare_reduction.py | 3 + examples/compose/plot_digits_pipe.py | 3 + .../covariance/plot_covariance_estimation.py | 3 + examples/covariance/plot_lw_vs_oas.py | 3 + .../covariance/plot_mahalanobis_distances.py | 3 + .../plot_robust_vs_empirical_covariance.py | 3 + .../plot_compare_cross_decomposition.py | 3 + .../cross_decomposition/plot_pcr_vs_pls.py | 3 + examples/datasets/plot_digits_last_image.py | 3 + examples/datasets/plot_iris_dataset.py | 3 + examples/datasets/plot_random_dataset.py | 3 + .../plot_random_multilabel_dataset.py | 3 + .../decomposition/plot_faces_decomposition.py | 3 + .../plot_ica_blind_source_separation.py | 3 + .../decomposition/plot_image_denoising.py | 3 + examples/decomposition/plot_pca_iris.py | 3 + examples/decomposition/plot_pca_vs_lda.py | 3 + examples/decomposition/plot_sparse_coding.py | 3 + examples/ensemble/plot_adaboost_multiclass.py | 3 + examples/ensemble/plot_forest_importances.py | 3 + .../ensemble/plot_forest_importances_faces.py | 3 + examples/ensemble/plot_forest_iris.py | 3 + .../plot_gradient_boosting_categorical.py | 3 + .../plot_gradient_boosting_early_stopping.py | 3 + .../plot_gradient_boosting_quantile.py | 3 + examples/ensemble/plot_isolation_forest.py | 3 + .../ensemble/plot_monotonic_constraints.py | 3 + .../ensemble/plot_random_forest_embedding.py | 3 + .../ensemble/plot_voting_decision_regions.py | 3 + examples/ensemble/plot_voting_probas.py | 3 + examples/ensemble/plot_voting_regressor.py | 3 + examples/exercises/plot_cv_diabetes.py | 3 + .../plot_digits_classification_exercise.py | 3 + examples/exercises/plot_iris_exercise.py | 3 + .../feature_selection/plot_f_test_vs_mi.py | 3 + .../plot_feature_selection.py | 3 + .../plot_feature_selection_pipeline.py | 3 + examples/feature_selection/plot_rfe_digits.py | 3 + .../plot_rfe_with_cross_validation.py | 3 + .../plot_select_from_model_diabetes.py | 3 + examples/gaussian_process/plot_gpc_iris.py | 3 + .../plot_gpr_on_structured_data.py | 3 + ...t_iterative_imputer_variants_comparison.py | 3 + .../inspection/plot_causal_interpretation.py | 3 + ...linear_model_coefficient_interpretation.py | 3 + .../inspection/plot_partial_dependence.py | 3 + .../inspection/plot_permutation_importance.py | 3 + ...t_permutation_importance_multicollinear.py | 3 + examples/linear_model/plot_ard.py | 3 + .../plot_bayesian_ridge_curvefit.py | 3 + ...puted_gram_matrix_with_weighted_samples.py | 3 + examples/linear_model/plot_iris_logistic.py | 3 + .../linear_model/plot_lasso_and_elasticnet.py | 3 + .../plot_lasso_dense_vs_sparse_data.py | 3 + examples/linear_model/plot_logistic.py | 3 + examples/linear_model/plot_nnls.py | 3 + examples/linear_model/plot_ols.py | 3 + examples/linear_model/plot_ols_3d.py | 3 + .../linear_model/plot_ols_ridge_variance.py | 3 + examples/linear_model/plot_omp.py | 3 + examples/linear_model/plot_ransac.py | 3 + examples/linear_model/plot_ridge_coeffs.py | 3 + examples/linear_model/plot_robust_fit.py | 3 + examples/linear_model/plot_sgd_iris.py | 3 + .../linear_model/plot_sgd_loss_functions.py | 3 + examples/linear_model/plot_sgd_penalties.py | 3 + .../plot_sgd_separating_hyperplane.py | 3 + .../linear_model/plot_sgd_weighted_samples.py | 3 + .../linear_model/plot_sgdocsvm_vs_ocsvm.py | 3 + ...sparse_logistic_regression_20newsgroups.py | 3 + examples/manifold/plot_compare_methods.py | 3 + examples/manifold/plot_swissroll.py | 3 + .../plot_display_object_visualization.py | 3 + .../plot_estimator_representation.py | 3 + .../plot_johnson_lindenstrauss_bound.py | 3 + .../miscellaneous/plot_metadata_routing.py | 3 + .../plot_multioutput_face_completion.py | 3 + ...ot_partial_dependence_visualization_api.py | 3 + .../miscellaneous/plot_pipeline_display.py | 3 + .../plot_roc_curve_visualization_api.py | 3 + examples/mixture/plot_gmm.py | 3 + examples/mixture/plot_gmm_init.py | 3 + examples/mixture/plot_gmm_pdf.py | 3 + examples/mixture/plot_gmm_selection.py | 3 + examples/mixture/plot_gmm_sin.py | 3 + .../model_selection/plot_confusion_matrix.py | 3 + .../plot_cost_sensitive_learning.py | 3 + examples/model_selection/plot_cv_indices.py | 3 + examples/model_selection/plot_cv_predict.py | 3 + examples/model_selection/plot_det.py | 3 + .../plot_grid_search_digits.py | 3 + .../plot_grid_search_refit_callable.py | 3 + .../model_selection/plot_grid_search_stats.py | 3 + .../model_selection/plot_learning_curve.py | 3 + .../model_selection/plot_likelihood_ratios.py | 3 + .../plot_nested_cross_validation_iris.py | 3 + .../model_selection/plot_precision_recall.py | 3 + .../model_selection/plot_randomized_search.py | 3 + examples/model_selection/plot_roc.py | 3 + examples/model_selection/plot_roc_crossval.py | 3 + .../plot_successive_halving_heatmap.py | 3 + .../plot_successive_halving_iterations.py | 3 + .../plot_tuned_decision_threshold.py | 3 + .../plot_underfitting_overfitting.py | 3 + .../model_selection/plot_validation_curve.py | 3 + .../multiclass/plot_multiclass_overview.py | 3 + examples/neighbors/plot_classification.py | 3 + .../neighbors/plot_digits_kde_sampling.py | 3 + examples/neighbors/plot_kde_1d.py | 3 + .../neighbors/plot_lof_novelty_detection.py | 3 + .../neighbors/plot_lof_outlier_detection.py | 3 + examples/neighbors/plot_nca_classification.py | 3 + examples/neighbors/plot_nca_dim_reduction.py | 3 + examples/neighbors/plot_nca_illustration.py | 3 + examples/neighbors/plot_nearest_centroid.py | 3 + .../plot_mlp_training_curves.py | 3 + .../neural_networks/plot_mnist_filters.py | 3 + .../plot_discretization_classification.py | 3 + examples/preprocessing/plot_target_encoder.py | 3 + .../plot_target_encoder_cross_val.py | 3 + .../plot_release_highlights_0_22_0.py | 3 + .../plot_semi_supervised_newsgroups.py | 3 + examples/svm/plot_custom_kernel.py | 3 + examples/svm/plot_iris_svc.py | 3 + .../svm/plot_linearsvc_support_vectors.py | 3 + examples/svm/plot_oneclass.py | 3 + examples/svm/plot_rbf_parameters.py | 3 + examples/svm/plot_separating_hyperplane.py | 3 + .../plot_separating_hyperplane_unbalanced.py | 3 + examples/svm/plot_svm_anova.py | 3 + examples/svm/plot_svm_kernels.py | 3 + examples/svm/plot_svm_margin.py | 3 + examples/svm/plot_svm_regression.py | 3 + examples/svm/plot_svm_tie_breaking.py | 3 + examples/svm/plot_weighted_samples.py | 3 + examples/tree/plot_cost_complexity_pruning.py | 3 + examples/tree/plot_iris_dtc.py | 3 + examples/tree/plot_tree_regression.py | 3 + .../tree/plot_tree_regression_multioutput.py | 3 + examples/tree/plot_unveil_tree_structure.py | 3 + setup.py | 2 + sklearn/__check_build/__init__.py | 3 + sklearn/__init__.py | 3 + sklearn/_build_utils/__init__.py | 3 + sklearn/_build_utils/openmp_helpers.py | 3 + sklearn/_build_utils/pre_build_helpers.py | 3 + sklearn/_build_utils/tempita.py | 2 + sklearn/_build_utils/version.py | 3 + sklearn/_config.py | 3 + sklearn/_distributor_init.py | 3 + sklearn/_loss/__init__.py | 3 + sklearn/_loss/link.py | 3 + sklearn/_loss/loss.py | 3 + sklearn/_min_dependencies.py | 3 + sklearn/cluster/__init__.py | 3 + sklearn/cluster/_agglomerative.py | 3 + sklearn/cluster/_bisect_k_means.py | 3 + sklearn/cluster/_hdbscan/__init__.py | 2 + sklearn/cluster/_hdbscan/hdbscan.py | 3 + sklearn/cluster/_mean_shift.py | 3 + sklearn/cluster/_optics.py | 3 + sklearn/compose/__init__.py | 3 + sklearn/conftest.py | 2 + sklearn/covariance/__init__.py | 3 + sklearn/cross_decomposition/__init__.py | 3 + sklearn/datasets/__init__.py | 3 + sklearn/datasets/_arff_parser.py | 3 + sklearn/datasets/_kddcup99.py | 3 + sklearn/datasets/_openml.py | 2 + sklearn/datasets/data/__init__.py | 2 + sklearn/datasets/descr/__init__.py | 2 + sklearn/datasets/images/__init__.py | 2 + sklearn/decomposition/__init__.py | 3 + sklearn/decomposition/_lda.py | 3 + sklearn/decomposition/_truncated_svd.py | 3 + sklearn/ensemble/__init__.py | 3 + .../_hist_gradient_boosting/__init__.py | 3 + .../_hist_gradient_boosting/binning.py | 3 + .../gradient_boosting.py | 3 + .../_hist_gradient_boosting/grower.py | 3 + .../_hist_gradient_boosting/predictor.py | 3 + .../ensemble/_hist_gradient_boosting/utils.py | 3 + sklearn/exceptions.py | 3 + sklearn/experimental/__init__.py | 3 + .../experimental/enable_halving_search_cv.py | 3 + .../enable_hist_gradient_boosting.py | 3 + .../experimental/enable_iterative_imputer.py | 3 + sklearn/feature_extraction/__init__.py | 3 + sklearn/feature_extraction/_stop_words.py | 2 + sklearn/feature_selection/__init__.py | 3 + sklearn/feature_selection/_mutual_info.py | 2 + sklearn/feature_selection/_sequential.py | 3 + .../feature_selection/_variance_threshold.py | 2 + sklearn/impute/__init__.py | 3 + sklearn/impute/_iterative.py | 2 + sklearn/inspection/__init__.py | 3 + sklearn/inspection/_pd_utils.py | 2 + sklearn/inspection/_permutation_importance.py | 3 + sklearn/inspection/_plot/__init__.py | 2 + sklearn/inspection/_plot/decision_boundary.py | 2 + .../inspection/_plot/partial_dependence.py | 2 + sklearn/linear_model/__init__.py | 3 + sklearn/linear_model/_linear_loss.py | 3 + sklearn/linear_model/_logistic.py | 3 + sklearn/manifold/__init__.py | 3 + sklearn/manifold/_mds.py | 3 + sklearn/metrics/__init__.py | 3 + .../_pairwise_distances_reduction/__init__.py | 2 + .../_dispatcher.py | 2 + sklearn/metrics/_plot/__init__.py | 2 + sklearn/metrics/_plot/confusion_matrix.py | 2 + sklearn/metrics/_plot/det_curve.py | 2 + .../metrics/_plot/precision_recall_curve.py | 2 + sklearn/metrics/_plot/regression.py | 2 + sklearn/metrics/_plot/roc_curve.py | 2 + sklearn/metrics/_scorer.py | 3 + sklearn/metrics/cluster/__init__.py | 3 + sklearn/metrics/cluster/_bicluster.py | 2 + sklearn/mixture/__init__.py | 3 + sklearn/model_selection/__init__.py | 3 + .../_classification_threshold.py | 2 + sklearn/model_selection/_plot.py | 2 + .../_search_successive_halving.py | 2 + sklearn/neighbors/__init__.py | 3 + sklearn/neighbors/_kde.py | 3 + sklearn/neighbors/_unsupervised.py | 3 + sklearn/neural_network/__init__.py | 3 + sklearn/preprocessing/__init__.py | 3 + .../preprocessing/_function_transformer.py | 2 + sklearn/preprocessing/_polynomial.py | 3 + sklearn/preprocessing/_target_encoder.py | 2 + sklearn/semi_supervised/__init__.py | 3 + sklearn/svm/_base.py | 2 + sklearn/svm/_classes.py | 2 + sklearn/tree/__init__.py | 3 + sklearn/utils/__init__.py | 3 + sklearn/utils/_arpack.py | 2 + sklearn/utils/_array_api.py | 3 + sklearn/utils/_available_if.py | 2 + sklearn/utils/_bunch.py | 2 + sklearn/utils/_chunking.py | 2 + sklearn/utils/_encode.py | 2 + sklearn/utils/_estimator_html_repr.py | 2 + sklearn/utils/_indexing.py | 2 + sklearn/utils/_joblib.py | 2 + sklearn/utils/_mask.py | 2 + sklearn/utils/_missing.py | 2 + sklearn/utils/_mocking.py | 2 + sklearn/utils/_optional_dependencies.py | 2 + sklearn/utils/_param_validation.py | 2 + sklearn/utils/_plotting.py | 2 + sklearn/utils/_pprint.py | 3 + sklearn/utils/_response.py | 3 + sklearn/utils/_set_output.py | 2 + sklearn/utils/_show_versions.py | 3 + sklearn/utils/_tags.py | 2 + sklearn/utils/_user_interface.py | 2 + sklearn/utils/deprecation.py | 2 + sklearn/utils/discovery.py | 3 + sklearn/utils/estimator_checks.py | 3 + sklearn/utils/optimize.py | 3 + sklearn/utils/parallel.py | 3 + sklearn/utils/stats.py | 2 + 286 files changed, 1232 insertions(+), 425 deletions(-) diff --git a/examples/applications/plot_cyclical_feature_engineering.py b/examples/applications/plot_cyclical_feature_engineering.py index a23e98d331dc0..f7c561da48f8b 100644 --- a/examples/applications/plot_cyclical_feature_engineering.py +++ b/examples/applications/plot_cyclical_feature_engineering.py @@ -13,6 +13,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Data exploration on the Bike Sharing Demand dataset # --------------------------------------------------- diff --git a/examples/applications/plot_digits_denoising.py b/examples/applications/plot_digits_denoising.py index 9d4b0cae79d06..73b5a8034f8d6 100644 --- a/examples/applications/plot_digits_denoising.py +++ b/examples/applications/plot_digits_denoising.py @@ -21,6 +21,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Authors: Guillaume Lemaitre # Licence: BSD 3 clause diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py index 97a67fad52776..4ace1b81ef7a0 100644 --- a/examples/applications/plot_face_recognition.py +++ b/examples/applications/plot_face_recognition.py @@ -12,6 +12,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% from time import time diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py index 9159825cbbd43..53b29fa940958 100644 --- a/examples/applications/plot_time_series_lagged_features.py +++ b/examples/applications/plot_time_series_lagged_features.py @@ -1,425 +1,428 @@ -""" -=========================================== -Lagged features for time series forecasting -=========================================== - -This example demonstrates how Polars-engineered lagged features can be used -for time series forecasting with -:class:`~sklearn.ensemble.HistGradientBoostingRegressor` on the Bike Sharing -Demand dataset. - -See the example on -:ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py` -for some data exploration on this dataset and a demo on periodic feature -engineering. - -""" - -# %% -# Analyzing the Bike Sharing Demand dataset -# ----------------------------------------- -# -# We start by loading the data from the OpenML repository -# as a pandas dataframe. This will be replaced with Polars -# once `fetch_openml` adds a native support for it. -# We convert to Polars for feature engineering, as it automatically caches -# common subexpressions which are reused in multiple expressions -# (like `pl.col("count").shift(1)` below). See -# https://docs.pola.rs/user-guide/lazy/optimizations/ for more information. - -import numpy as np -import polars as pl - -from sklearn.datasets import fetch_openml - -pl.Config.set_fmt_str_lengths(20) - -bike_sharing = fetch_openml( - "Bike_Sharing_Demand", version=2, as_frame=True, parser="pandas" -) -df = bike_sharing.frame -df = pl.DataFrame({col: df[col].to_numpy() for col in df.columns}) - -# %% -# Next, we take a look at the statistical summary of the dataset -# so that we can better understand the data that we are working with. -import polars.selectors as cs - -summary = df.select(cs.numeric()).describe() -summary - -# %% -# Let us look at the count of the seasons `"fall"`, `"spring"`, `"summer"` -# and `"winter"` present in the dataset to confirm they are balanced. - -import matplotlib.pyplot as plt - -df["season"].value_counts() - - -# %% -# Generating Polars-engineered lagged features -# -------------------------------------------- -# Let's consider the problem of predicting the demand at the -# next hour given past demands. Since the demand is a continuous -# variable, one could intuitively use any regression model. However, we do -# not have the usual `(X_train, y_train)` dataset. Instead, we just have -# the `y_train` demand data sequentially organized by time. -lagged_df = df.select( - "count", - *[pl.col("count").shift(i).alias(f"lagged_count_{i}h") for i in [1, 2, 3]], - lagged_count_1d=pl.col("count").shift(24), - lagged_count_1d_1h=pl.col("count").shift(24 + 1), - lagged_count_7d=pl.col("count").shift(7 * 24), - lagged_count_7d_1h=pl.col("count").shift(7 * 24 + 1), - lagged_mean_24h=pl.col("count").shift(1).rolling_mean(24), - lagged_max_24h=pl.col("count").shift(1).rolling_max(24), - lagged_min_24h=pl.col("count").shift(1).rolling_min(24), - lagged_mean_7d=pl.col("count").shift(1).rolling_mean(7 * 24), - lagged_max_7d=pl.col("count").shift(1).rolling_max(7 * 24), - lagged_min_7d=pl.col("count").shift(1).rolling_min(7 * 24), -) -lagged_df.tail(10) - -# %% -# Watch out however, the first lines have undefined values because their own -# past is unknown. This depends on how much lag we used: -lagged_df.head(10) - -# %% -# We can now separate the lagged features in a matrix `X` and the target variable -# (the counts to predict) in an array of the same first dimension `y`. -lagged_df = lagged_df.drop_nulls() -X = lagged_df.drop("count") -y = lagged_df["count"] -print("X shape: {}\ny shape: {}".format(X.shape, y.shape)) - -# %% -# Naive evaluation of the next hour bike demand regression -# -------------------------------------------------------- -# Let's randomly split our tabularized dataset to train a gradient -# boosting regression tree (GBRT) model and evaluate it using Mean -# Absolute Percentage Error (MAPE). If our model is aimed at forecasting -# (i.e., predicting future data from past data), we should not use training -# data that are ulterior to the testing data. In time series machine learning -# the "i.i.d" (independent and identically distributed) assumption does not -# hold true as the data points are not independent and have a temporal -# relationship. -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.model_selection import train_test_split - -X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=42 -) - -model = HistGradientBoostingRegressor().fit(X_train, y_train) - -# %% -# Taking a look at the performance of the model. -from sklearn.metrics import mean_absolute_percentage_error - -y_pred = model.predict(X_test) -mean_absolute_percentage_error(y_test, y_pred) - -# %% -# Proper next hour forecasting evaluation -# --------------------------------------- -# Let's use a proper evaluation splitting strategies that takes into account -# the temporal structure of the dataset to evaluate our model's ability to -# predict data points in the future (to avoid cheating by reading values from -# the lagged features in the training set). -from sklearn.model_selection import TimeSeriesSplit - -ts_cv = TimeSeriesSplit( - n_splits=3, # to keep the notebook fast enough on common laptops - gap=48, # 2 days data gap between train and test - max_train_size=10000, # keep train sets of comparable sizes - test_size=3000, # for 2 or 3 digits of precision in scores -) -all_splits = list(ts_cv.split(X, y)) - -# %% -# Training the model and evaluating its performance based on MAPE. -train_idx, test_idx = all_splits[0] -X_train, X_test = X[train_idx, :], X[test_idx, :] -y_train, y_test = y[train_idx], y[test_idx] - -model = HistGradientBoostingRegressor().fit(X_train, y_train) -y_pred = model.predict(X_test) -mean_absolute_percentage_error(y_test, y_pred) - -# %% -# The generalization error measured via a shuffled trained test split -# is too optimistic. The generalization via a time-based split is likely to -# be more representative of the true performance of the regression model. -# Let's assess this variability of our error evaluation with proper -# cross-validation: -from sklearn.model_selection import cross_val_score - -cv_mape_scores = -cross_val_score( - model, X, y, cv=ts_cv, scoring="neg_mean_absolute_percentage_error" -) -cv_mape_scores - -# %% -# The variability across splits is quite large! In a real life setting -# it would be advised to use more splits to better assess the variability. -# Let's report the mean CV scores and their standard deviation from now on. -print(f"CV MAPE: {cv_mape_scores.mean():.3f} ± {cv_mape_scores.std():.3f}") - -# %% -# We can compute several combinations of evaluation metrics and loss functions, -# which are reported a bit below. -from collections import defaultdict - -from sklearn.metrics import ( - make_scorer, - mean_absolute_error, - mean_pinball_loss, - root_mean_squared_error, -) -from sklearn.model_selection import cross_validate - - -def consolidate_scores(cv_results, scores, metric): - if metric == "MAPE": - scores[metric].append(f"{value.mean():.2f} ± {value.std():.2f}") - else: - scores[metric].append(f"{value.mean():.1f} ± {value.std():.1f}") - - return scores - - -scoring = { - "MAPE": make_scorer(mean_absolute_percentage_error), - "RMSE": make_scorer(root_mean_squared_error), - "MAE": make_scorer(mean_absolute_error), - "pinball_loss_05": make_scorer(mean_pinball_loss, alpha=0.05), - "pinball_loss_50": make_scorer(mean_pinball_loss, alpha=0.50), - "pinball_loss_95": make_scorer(mean_pinball_loss, alpha=0.95), -} -loss_functions = ["squared_error", "poisson", "absolute_error"] -scores = defaultdict(list) -for loss_func in loss_functions: - model = HistGradientBoostingRegressor(loss=loss_func) - cv_results = cross_validate( - model, - X, - y, - cv=ts_cv, - scoring=scoring, - n_jobs=2, - ) - time = cv_results["fit_time"] - scores["loss"].append(loss_func) - scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s") - - for key, value in cv_results.items(): - if key.startswith("test_"): - metric = key.split("test_")[1] - scores = consolidate_scores(cv_results, scores, metric) - - -# %% -# Modeling predictive uncertainty via quantile regression -# ------------------------------------------------------- -# Instead of modeling the expected value of the distribution of -# :math:`Y|X` like the least squares and Poisson losses do, one could try to -# estimate quantiles of the conditional distribution. -# -# :math:`Y|X=x_i` is expected to be a random variable for a given data point -# :math:`x_i` because we expect that the number of rentals cannot be 100% -# accurately predicted from the features. It can be influenced by other -# variables not properly captured by the existing lagged features. For -# instance whether or not it will rain in the next hour cannot be fully -# anticipated from the past hours bike rental data. This is what we -# call aleatoric uncertainty. -# -# Quantile regression makes it possible to give a finer description of that -# distribution without making strong assumptions on its shape. -quantile_list = [0.05, 0.5, 0.95] - -for quantile in quantile_list: - model = HistGradientBoostingRegressor(loss="quantile", quantile=quantile) - cv_results = cross_validate( - model, - X, - y, - cv=ts_cv, - scoring=scoring, - n_jobs=2, - ) - time = cv_results["fit_time"] - scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s") - - scores["loss"].append(f"quantile {int(quantile*100)}") - for key, value in cv_results.items(): - if key.startswith("test_"): - metric = key.split("test_")[1] - scores = consolidate_scores(cv_results, scores, metric) - -scores_df = pl.DataFrame(scores) -scores_df - - -# %% -# Let us take a look at the losses that minimise each metric. -def min_arg(col): - col_split = pl.col(col).str.split(" ") - return pl.arg_sort_by( - col_split.list.get(0).cast(pl.Float64), - col_split.list.get(2).cast(pl.Float64), - ).first() - - -scores_df.select( - pl.col("loss").get(min_arg(col_name)).alias(col_name) - for col_name in scores_df.columns - if col_name != "loss" -) - -# %% -# Even if the score distributions overlap due to the variance in the dataset, -# it is true that the average RMSE is lower when `loss="squared_error"`, whereas -# the average MAPE is lower when `loss="absolute_error"` as expected. That is -# also the case for the Mean Pinball Loss with the quantiles 5 and 95. The score -# corresponding to the 50 quantile loss is overlapping with the score obtained -# by minimizing other loss functions, which is also the case for the MAE. -# -# A qualitative look at the predictions -# ------------------------------------- -# We can now visualize the performance of the model with regards -# to the 5th percentile, median and the 95th percentile: -all_splits = list(ts_cv.split(X, y)) -train_idx, test_idx = all_splits[0] - -X_train, X_test = X[train_idx, :], X[test_idx, :] -y_train, y_test = y[train_idx], y[test_idx] - -max_iter = 50 -gbrt_mean_poisson = HistGradientBoostingRegressor(loss="poisson", max_iter=max_iter) -gbrt_mean_poisson.fit(X_train, y_train) -mean_predictions = gbrt_mean_poisson.predict(X_test) - -gbrt_median = HistGradientBoostingRegressor( - loss="quantile", quantile=0.5, max_iter=max_iter -) -gbrt_median.fit(X_train, y_train) -median_predictions = gbrt_median.predict(X_test) - -gbrt_percentile_5 = HistGradientBoostingRegressor( - loss="quantile", quantile=0.05, max_iter=max_iter -) -gbrt_percentile_5.fit(X_train, y_train) -percentile_5_predictions = gbrt_percentile_5.predict(X_test) - -gbrt_percentile_95 = HistGradientBoostingRegressor( - loss="quantile", quantile=0.95, max_iter=max_iter -) -gbrt_percentile_95.fit(X_train, y_train) -percentile_95_predictions = gbrt_percentile_95.predict(X_test) - -# %% -# We can now take a look at the predictions made by the regression models: -last_hours = slice(-96, None) -fig, ax = plt.subplots(figsize=(15, 7)) -plt.title("Predictions by regression models") -ax.plot( - y_test[last_hours], - "x-", - alpha=0.2, - label="Actual demand", - color="black", -) -ax.plot( - median_predictions[last_hours], - "^-", - label="GBRT median", -) -ax.plot( - mean_predictions[last_hours], - "x-", - label="GBRT mean (Poisson)", -) -ax.fill_between( - np.arange(96), - percentile_5_predictions[last_hours], - percentile_95_predictions[last_hours], - alpha=0.3, - label="GBRT 90% interval", -) -_ = ax.legend() - -# %% -# Here it's interesting to notice that the blue area between the 5% and 95% -# percentile estimators has a width that varies with the time of the day: -# -# - At night, the blue band is much narrower: the pair of models is quite -# certain that there will be a small number of bike rentals. And furthermore -# these seem correct in the sense that the actual demand stays in that blue -# band. -# - During the day, the blue band is much wider: the uncertainty grows, probably -# because of the variability of the weather that can have a very large impact, -# especially on week-ends. -# - We can also see that during week-days, the commute pattern is still visible in -# the 5% and 95% estimations. -# - Finally, it is expected that 10% of the time, the actual demand does not lie -# between the 5% and 95% percentile estimates. On this test span, the actual -# demand seems to be higher, especially during the rush hours. It might reveal that -# our 95% percentile estimator underestimates the demand peaks. This could be be -# quantitatively confirmed by computing empirical coverage numbers as done in -# the :ref:`calibration of confidence intervals `. -# -# Looking at the performance of non-linear regression models vs -# the best models: -from sklearn.metrics import PredictionErrorDisplay - -fig, axes = plt.subplots(ncols=3, figsize=(15, 6), sharey=True) -fig.suptitle("Non-linear regression models") -predictions = [ - median_predictions, - percentile_5_predictions, - percentile_95_predictions, -] -labels = [ - "Median", - "5th percentile", - "95th percentile", -] -for ax, pred, label in zip(axes, predictions, labels): - PredictionErrorDisplay.from_predictions( - y_true=y_test, - y_pred=pred, - kind="residual_vs_predicted", - scatter_kwargs={"alpha": 0.3}, - ax=ax, - ) - ax.set(xlabel="Predicted demand", ylabel="True demand") - ax.legend(["Best model", label]) - -plt.show() - -# %% -# Conclusion -# ---------- -# Through this example we explored time series forecasting using lagged -# features. We compared a naive regression (using the standardized -# :class:`~sklearn.model_selection.train_test_split`) with a proper time -# series evaluation strategy using -# :class:`~sklearn.model_selection.TimeSeriesSplit`. We observed that the -# model trained using :class:`~sklearn.model_selection.train_test_split`, -# having a default value of `shuffle` set to `True` produced an overly -# optimistic Mean Average Percentage Error (MAPE). The results -# produced from the time-based split better represent the performance -# of our time-series regression model. We also analyzed the predictive uncertainty -# of our model via Quantile Regression. Predictions based on the 5th and -# 95th percentile using `loss="quantile"` provide us with a quantitative estimate -# of the uncertainty of the forecasts made by our time series regression model. -# Uncertainty estimation can also be performed -# using `MAPIE `_, -# that provides an implementation based on recent work on conformal prediction -# methods and estimates both aleatoric and epistemic uncertainty at the same time. -# Furthermore, functionalities provided -# by `sktime `_ -# can be used to extend scikit-learn estimators by making use of recursive time -# series forecasting, that enables dynamic predictions of future values. +""" +=========================================== +Lagged features for time series forecasting +=========================================== + +This example demonstrates how Polars-engineered lagged features can be used +for time series forecasting with +:class:`~sklearn.ensemble.HistGradientBoostingRegressor` on the Bike Sharing +Demand dataset. + +See the example on +:ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py` +for some data exploration on this dataset and a demo on periodic feature +engineering. + +""" + +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + +# %% +# Analyzing the Bike Sharing Demand dataset +# ----------------------------------------- +# +# We start by loading the data from the OpenML repository +# as a pandas dataframe. This will be replaced with Polars +# once `fetch_openml` adds a native support for it. +# We convert to Polars for feature engineering, as it automatically caches +# common subexpressions which are reused in multiple expressions +# (like `pl.col("count").shift(1)` below). See +# https://docs.pola.rs/user-guide/lazy/optimizations/ for more information. + +import numpy as np +import polars as pl + +from sklearn.datasets import fetch_openml + +pl.Config.set_fmt_str_lengths(20) + +bike_sharing = fetch_openml( + "Bike_Sharing_Demand", version=2, as_frame=True, parser="pandas" +) +df = bike_sharing.frame +df = pl.DataFrame({col: df[col].to_numpy() for col in df.columns}) + +# %% +# Next, we take a look at the statistical summary of the dataset +# so that we can better understand the data that we are working with. +import polars.selectors as cs + +summary = df.select(cs.numeric()).describe() +summary + +# %% +# Let us look at the count of the seasons `"fall"`, `"spring"`, `"summer"` +# and `"winter"` present in the dataset to confirm they are balanced. + +import matplotlib.pyplot as plt + +df["season"].value_counts() + + +# %% +# Generating Polars-engineered lagged features +# -------------------------------------------- +# Let's consider the problem of predicting the demand at the +# next hour given past demands. Since the demand is a continuous +# variable, one could intuitively use any regression model. However, we do +# not have the usual `(X_train, y_train)` dataset. Instead, we just have +# the `y_train` demand data sequentially organized by time. +lagged_df = df.select( + "count", + *[pl.col("count").shift(i).alias(f"lagged_count_{i}h") for i in [1, 2, 3]], + lagged_count_1d=pl.col("count").shift(24), + lagged_count_1d_1h=pl.col("count").shift(24 + 1), + lagged_count_7d=pl.col("count").shift(7 * 24), + lagged_count_7d_1h=pl.col("count").shift(7 * 24 + 1), + lagged_mean_24h=pl.col("count").shift(1).rolling_mean(24), + lagged_max_24h=pl.col("count").shift(1).rolling_max(24), + lagged_min_24h=pl.col("count").shift(1).rolling_min(24), + lagged_mean_7d=pl.col("count").shift(1).rolling_mean(7 * 24), + lagged_max_7d=pl.col("count").shift(1).rolling_max(7 * 24), + lagged_min_7d=pl.col("count").shift(1).rolling_min(7 * 24), +) +lagged_df.tail(10) + +# %% +# Watch out however, the first lines have undefined values because their own +# past is unknown. This depends on how much lag we used: +lagged_df.head(10) + +# %% +# We can now separate the lagged features in a matrix `X` and the target variable +# (the counts to predict) in an array of the same first dimension `y`. +lagged_df = lagged_df.drop_nulls() +X = lagged_df.drop("count") +y = lagged_df["count"] +print("X shape: {}\ny shape: {}".format(X.shape, y.shape)) + +# %% +# Naive evaluation of the next hour bike demand regression +# -------------------------------------------------------- +# Let's randomly split our tabularized dataset to train a gradient +# boosting regression tree (GBRT) model and evaluate it using Mean +# Absolute Percentage Error (MAPE). If our model is aimed at forecasting +# (i.e., predicting future data from past data), we should not use training +# data that are ulterior to the testing data. In time series machine learning +# the "i.i.d" (independent and identically distributed) assumption does not +# hold true as the data points are not independent and have a temporal +# relationship. +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.model_selection import train_test_split + +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 +) + +model = HistGradientBoostingRegressor().fit(X_train, y_train) + +# %% +# Taking a look at the performance of the model. +from sklearn.metrics import mean_absolute_percentage_error + +y_pred = model.predict(X_test) +mean_absolute_percentage_error(y_test, y_pred) + +# %% +# Proper next hour forecasting evaluation +# --------------------------------------- +# Let's use a proper evaluation splitting strategies that takes into account +# the temporal structure of the dataset to evaluate our model's ability to +# predict data points in the future (to avoid cheating by reading values from +# the lagged features in the training set). +from sklearn.model_selection import TimeSeriesSplit + +ts_cv = TimeSeriesSplit( + n_splits=3, # to keep the notebook fast enough on common laptops + gap=48, # 2 days data gap between train and test + max_train_size=10000, # keep train sets of comparable sizes + test_size=3000, # for 2 or 3 digits of precision in scores +) +all_splits = list(ts_cv.split(X, y)) + +# %% +# Training the model and evaluating its performance based on MAPE. +train_idx, test_idx = all_splits[0] +X_train, X_test = X[train_idx, :], X[test_idx, :] +y_train, y_test = y[train_idx], y[test_idx] + +model = HistGradientBoostingRegressor().fit(X_train, y_train) +y_pred = model.predict(X_test) +mean_absolute_percentage_error(y_test, y_pred) + +# %% +# The generalization error measured via a shuffled trained test split +# is too optimistic. The generalization via a time-based split is likely to +# be more representative of the true performance of the regression model. +# Let's assess this variability of our error evaluation with proper +# cross-validation: +from sklearn.model_selection import cross_val_score + +cv_mape_scores = -cross_val_score( + model, X, y, cv=ts_cv, scoring="neg_mean_absolute_percentage_error" +) +cv_mape_scores + +# %% +# The variability across splits is quite large! In a real life setting +# it would be advised to use more splits to better assess the variability. +# Let's report the mean CV scores and their standard deviation from now on. +print(f"CV MAPE: {cv_mape_scores.mean():.3f} ± {cv_mape_scores.std():.3f}") + +# %% +# We can compute several combinations of evaluation metrics and loss functions, +# which are reported a bit below. +from collections import defaultdict + +from sklearn.metrics import ( + make_scorer, + mean_absolute_error, + mean_pinball_loss, + root_mean_squared_error, +) +from sklearn.model_selection import cross_validate + + +def consolidate_scores(cv_results, scores, metric): + if metric == "MAPE": + scores[metric].append(f"{value.mean():.2f} ± {value.std():.2f}") + else: + scores[metric].append(f"{value.mean():.1f} ± {value.std():.1f}") + + return scores + + +scoring = { + "MAPE": make_scorer(mean_absolute_percentage_error), + "RMSE": make_scorer(root_mean_squared_error), + "MAE": make_scorer(mean_absolute_error), + "pinball_loss_05": make_scorer(mean_pinball_loss, alpha=0.05), + "pinball_loss_50": make_scorer(mean_pinball_loss, alpha=0.50), + "pinball_loss_95": make_scorer(mean_pinball_loss, alpha=0.95), +} +loss_functions = ["squared_error", "poisson", "absolute_error"] +scores = defaultdict(list) +for loss_func in loss_functions: + model = HistGradientBoostingRegressor(loss=loss_func) + cv_results = cross_validate( + model, + X, + y, + cv=ts_cv, + scoring=scoring, + n_jobs=2, + ) + time = cv_results["fit_time"] + scores["loss"].append(loss_func) + scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s") + + for key, value in cv_results.items(): + if key.startswith("test_"): + metric = key.split("test_")[1] + scores = consolidate_scores(cv_results, scores, metric) + + +# %% +# Modeling predictive uncertainty via quantile regression +# ------------------------------------------------------- +# Instead of modeling the expected value of the distribution of +# :math:`Y|X` like the least squares and Poisson losses do, one could try to +# estimate quantiles of the conditional distribution. +# +# :math:`Y|X=x_i` is expected to be a random variable for a given data point +# :math:`x_i` because we expect that the number of rentals cannot be 100% +# accurately predicted from the features. It can be influenced by other +# variables not properly captured by the existing lagged features. For +# instance whether or not it will rain in the next hour cannot be fully +# anticipated from the past hours bike rental data. This is what we +# call aleatoric uncertainty. +# +# Quantile regression makes it possible to give a finer description of that +# distribution without making strong assumptions on its shape. +quantile_list = [0.05, 0.5, 0.95] + +for quantile in quantile_list: + model = HistGradientBoostingRegressor(loss="quantile", quantile=quantile) + cv_results = cross_validate( + model, + X, + y, + cv=ts_cv, + scoring=scoring, + n_jobs=2, + ) + time = cv_results["fit_time"] + scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s") + + scores["loss"].append(f"quantile {int(quantile*100)}") + for key, value in cv_results.items(): + if key.startswith("test_"): + metric = key.split("test_")[1] + scores = consolidate_scores(cv_results, scores, metric) + +scores_df = pl.DataFrame(scores) +scores_df + + +# %% +# Let us take a look at the losses that minimise each metric. +def min_arg(col): + col_split = pl.col(col).str.split(" ") + return pl.arg_sort_by( + col_split.list.get(0).cast(pl.Float64), + col_split.list.get(2).cast(pl.Float64), + ).first() + + +scores_df.select( + pl.col("loss").get(min_arg(col_name)).alias(col_name) + for col_name in scores_df.columns + if col_name != "loss" +) + +# %% +# Even if the score distributions overlap due to the variance in the dataset, +# it is true that the average RMSE is lower when `loss="squared_error"`, whereas +# the average MAPE is lower when `loss="absolute_error"` as expected. That is +# also the case for the Mean Pinball Loss with the quantiles 5 and 95. The score +# corresponding to the 50 quantile loss is overlapping with the score obtained +# by minimizing other loss functions, which is also the case for the MAE. +# +# A qualitative look at the predictions +# ------------------------------------- +# We can now visualize the performance of the model with regards +# to the 5th percentile, median and the 95th percentile: +all_splits = list(ts_cv.split(X, y)) +train_idx, test_idx = all_splits[0] + +X_train, X_test = X[train_idx, :], X[test_idx, :] +y_train, y_test = y[train_idx], y[test_idx] + +max_iter = 50 +gbrt_mean_poisson = HistGradientBoostingRegressor(loss="poisson", max_iter=max_iter) +gbrt_mean_poisson.fit(X_train, y_train) +mean_predictions = gbrt_mean_poisson.predict(X_test) + +gbrt_median = HistGradientBoostingRegressor( + loss="quantile", quantile=0.5, max_iter=max_iter +) +gbrt_median.fit(X_train, y_train) +median_predictions = gbrt_median.predict(X_test) + +gbrt_percentile_5 = HistGradientBoostingRegressor( + loss="quantile", quantile=0.05, max_iter=max_iter +) +gbrt_percentile_5.fit(X_train, y_train) +percentile_5_predictions = gbrt_percentile_5.predict(X_test) + +gbrt_percentile_95 = HistGradientBoostingRegressor( + loss="quantile", quantile=0.95, max_iter=max_iter +) +gbrt_percentile_95.fit(X_train, y_train) +percentile_95_predictions = gbrt_percentile_95.predict(X_test) + +# %% +# We can now take a look at the predictions made by the regression models: +last_hours = slice(-96, None) +fig, ax = plt.subplots(figsize=(15, 7)) +plt.title("Predictions by regression models") +ax.plot( + y_test[last_hours], + "x-", + alpha=0.2, + label="Actual demand", + color="black", +) +ax.plot( + median_predictions[last_hours], + "^-", + label="GBRT median", +) +ax.plot( + mean_predictions[last_hours], + "x-", + label="GBRT mean (Poisson)", +) +ax.fill_between( + np.arange(96), + percentile_5_predictions[last_hours], + percentile_95_predictions[last_hours], + alpha=0.3, + label="GBRT 90% interval", +) +_ = ax.legend() + +# %% +# Here it's interesting to notice that the blue area between the 5% and 95% +# percentile estimators has a width that varies with the time of the day: +# +# - At night, the blue band is much narrower: the pair of models is quite +# certain that there will be a small number of bike rentals. And furthermore +# these seem correct in the sense that the actual demand stays in that blue +# band. +# - During the day, the blue band is much wider: the uncertainty grows, probably +# because of the variability of the weather that can have a very large impact, +# especially on week-ends. +# - We can also see that during week-days, the commute pattern is still visible in +# the 5% and 95% estimations. +# - Finally, it is expected that 10% of the time, the actual demand does not lie +# between the 5% and 95% percentile estimates. On this test span, the actual +# demand seems to be higher, especially during the rush hours. It might reveal that +# our 95% percentile estimator underestimates the demand peaks. This could be be +# quantitatively confirmed by computing empirical coverage numbers as done in +# the :ref:`calibration of confidence intervals `. +# +# Looking at the performance of non-linear regression models vs +# the best models: +from sklearn.metrics import PredictionErrorDisplay + +fig, axes = plt.subplots(ncols=3, figsize=(15, 6), sharey=True) +fig.suptitle("Non-linear regression models") +predictions = [ + median_predictions, + percentile_5_predictions, + percentile_95_predictions, +] +labels = [ + "Median", + "5th percentile", + "95th percentile", +] +for ax, pred, label in zip(axes, predictions, labels): + PredictionErrorDisplay.from_predictions( + y_true=y_test, + y_pred=pred, + kind="residual_vs_predicted", + scatter_kwargs={"alpha": 0.3}, + ax=ax, + ) + ax.set(xlabel="Predicted demand", ylabel="True demand") + ax.legend(["Best model", label]) + +plt.show() + +# %% +# Conclusion +# ---------- +# Through this example we explored time series forecasting using lagged +# features. We compared a naive regression (using the standardized +# :class:`~sklearn.model_selection.train_test_split`) with a proper time +# series evaluation strategy using +# :class:`~sklearn.model_selection.TimeSeriesSplit`. We observed that the +# model trained using :class:`~sklearn.model_selection.train_test_split`, +# having a default value of `shuffle` set to `True` produced an overly +# optimistic Mean Average Percentage Error (MAPE). The results +# produced from the time-based split better represent the performance +# of our time-series regression model. We also analyzed the predictive uncertainty +# of our model via Quantile Regression. Predictions based on the 5th and +# 95th percentile using `loss="quantile"` provide us with a quantitative estimate +# of the uncertainty of the forecasts made by our time series regression model. +# Uncertainty estimation can also be performed +# using `MAPIE `_, +# that provides an implementation based on recent work on conformal prediction +# methods and estimates both aleatoric and epistemic uncertainty at the same time. +# Furthermore, functionalities provided +# by `sktime `_ +# can be used to extend scikit-learn estimators by making use of recursive time +# series forecasting, that enables dynamic predictions of future values. diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py index 0fef820bb9f2a..d110f3e6de9c5 100644 --- a/examples/bicluster/plot_bicluster_newsgroups.py +++ b/examples/bicluster/plot_bicluster_newsgroups.py @@ -23,6 +23,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import operator from collections import defaultdict from time import time diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py index 4fb8a142e2606..3108618091b4f 100644 --- a/examples/classification/plot_classifier_comparison.py +++ b/examples/classification/plot_classifier_comparison.py @@ -19,6 +19,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Andreas Müller # Modified for documentation by Jaques Grobler diff --git a/examples/classification/plot_lda.py b/examples/classification/plot_lda.py index 88135079529c8..cf052a9379b22 100644 --- a/examples/classification/plot_lda.py +++ b/examples/classification/plot_lda.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py index 0691f52390a06..599659fdac2dc 100644 --- a/examples/classification/plot_lda_qda.py +++ b/examples/classification/plot_lda_qda.py @@ -11,6 +11,9 @@ deviation with QDA. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Data generation # --------------- diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py index e286104636d67..2066212abea5d 100644 --- a/examples/cluster/plot_affinity_propagation.py +++ b/examples/cluster/plot_affinity_propagation.py @@ -9,6 +9,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numpy as np from sklearn import metrics diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index a562ebbc96ba5..7fc738bf08218 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt from sklearn.cluster import BisectingKMeans, KMeans diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py index bc6f158c02ed0..539c07cfd442e 100644 --- a/examples/cluster/plot_cluster_comparison.py +++ b/examples/cluster/plot_cluster_comparison.py @@ -24,6 +24,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import time import warnings from itertools import cycle, islice diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index e3a6d6a3d8664..e469eeb3a86ff 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -18,6 +18,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py index 0b0bd64ecf62b..af56701db846f 100644 --- a/examples/cluster/plot_dbscan.py +++ b/examples/cluster/plot_dbscan.py @@ -12,6 +12,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Data generation # --------------- diff --git a/examples/cluster/plot_dict_face_patches.py b/examples/cluster/plot_dict_face_patches.py index 99b241bfdeea9..27eeb07ec7867 100644 --- a/examples/cluster/plot_dict_face_patches.py +++ b/examples/cluster/plot_dict_face_patches.py @@ -21,6 +21,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load the data # ------------- diff --git a/examples/cluster/plot_digits_agglomeration.py b/examples/cluster/plot_digits_agglomeration.py index 3773a300cb51a..d4a427905d91f 100644 --- a/examples/cluster/plot_digits_agglomeration.py +++ b/examples/cluster/plot_digits_agglomeration.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py index 630ab1f150fcb..07cea76ae072a 100644 --- a/examples/cluster/plot_hdbscan.py +++ b/examples/cluster/plot_hdbscan.py @@ -12,6 +12,9 @@ We first define a couple utility functions for convenience. """ + +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause # %% import matplotlib.pyplot as plt import numpy as np diff --git a/examples/cluster/plot_inductive_clustering.py b/examples/cluster/plot_inductive_clustering.py index b6464459160e3..b92a814cd4ef3 100644 --- a/examples/cluster/plot_inductive_clustering.py +++ b/examples/cluster/plot_inductive_clustering.py @@ -20,6 +20,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Authors: Chirag Nagpal # Christos Aridas diff --git a/examples/cluster/plot_kmeans_digits.py b/examples/cluster/plot_kmeans_digits.py index d61ec91d13d52..3e7c70b9d08a9 100644 --- a/examples/cluster/plot_kmeans_digits.py +++ b/examples/cluster/plot_kmeans_digits.py @@ -25,6 +25,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load the dataset # ---------------- diff --git a/examples/cluster/plot_kmeans_plusplus.py b/examples/cluster/plot_kmeans_plusplus.py index 69ea738635ddf..355426a2a4872 100644 --- a/examples/cluster/plot_kmeans_plusplus.py +++ b/examples/cluster/plot_kmeans_plusplus.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt from sklearn.cluster import kmeans_plusplus diff --git a/examples/cluster/plot_kmeans_silhouette_analysis.py b/examples/cluster/plot_kmeans_silhouette_analysis.py index a999e83fcac5d..4b5c8d2c6d66d 100644 --- a/examples/cluster/plot_kmeans_silhouette_analysis.py +++ b/examples/cluster/plot_kmeans_silhouette_analysis.py @@ -31,6 +31,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.cm as cm import matplotlib.pyplot as plt import numpy as np diff --git a/examples/cluster/plot_linkage_comparison.py b/examples/cluster/plot_linkage_comparison.py index 793fee059d797..c08dedfbab1bc 100644 --- a/examples/cluster/plot_linkage_comparison.py +++ b/examples/cluster/plot_linkage_comparison.py @@ -23,6 +23,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import time import warnings from itertools import cycle, islice diff --git a/examples/cluster/plot_mean_shift.py b/examples/cluster/plot_mean_shift.py index aacbc7f216405..456a1c4ac2020 100644 --- a/examples/cluster/plot_mean_shift.py +++ b/examples/cluster/plot_mean_shift.py @@ -11,6 +11,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numpy as np from sklearn.cluster import MeanShift, estimate_bandwidth diff --git a/examples/cluster/plot_mini_batch_kmeans.py b/examples/cluster/plot_mini_batch_kmeans.py index 3a6e8aa63786b..d189ed0e02a5c 100644 --- a/examples/cluster/plot_mini_batch_kmeans.py +++ b/examples/cluster/plot_mini_batch_kmeans.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Generate the data # ----------------- diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py index 529366c6244f2..2aeb9fae4af5e 100644 --- a/examples/compose/plot_compare_reduction.py +++ b/examples/compose/plot_compare_reduction.py @@ -20,6 +20,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Authors: Robert McGibbon # Joel Nothman # Guillaume Lemaitre diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py index 511878fcd4d99..c23e9b3b96a08 100644 --- a/examples/compose/plot_digits_pipe.py +++ b/examples/compose/plot_digits_pipe.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index 04baa0fd98bc0..1fdede5364eec 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -15,6 +15,9 @@ trade-off. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Generate sample data # -------------------- diff --git a/examples/covariance/plot_lw_vs_oas.py b/examples/covariance/plot_lw_vs_oas.py index 107f6bd1c29cc..c1c41bc811a85 100644 --- a/examples/covariance/plot_lw_vs_oas.py +++ b/examples/covariance/plot_lw_vs_oas.py @@ -21,6 +21,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np from scipy.linalg import cholesky, toeplitz diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py index 537d3fa0d9d8a..a1507c3ef162e 100644 --- a/examples/covariance/plot_mahalanobis_distances.py +++ b/examples/covariance/plot_mahalanobis_distances.py @@ -62,6 +62,9 @@ """ # noqa: E501 +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Generate data # -------------- diff --git a/examples/covariance/plot_robust_vs_empirical_covariance.py b/examples/covariance/plot_robust_vs_empirical_covariance.py index c61a97ddd979b..54871c495e82c 100644 --- a/examples/covariance/plot_robust_vs_empirical_covariance.py +++ b/examples/covariance/plot_robust_vs_empirical_covariance.py @@ -53,6 +53,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.font_manager import matplotlib.pyplot as plt import numpy as np diff --git a/examples/cross_decomposition/plot_compare_cross_decomposition.py b/examples/cross_decomposition/plot_compare_cross_decomposition.py index 762c42dfdf31c..1fce2f70bc42a 100644 --- a/examples/cross_decomposition/plot_compare_cross_decomposition.py +++ b/examples/cross_decomposition/plot_compare_cross_decomposition.py @@ -21,6 +21,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Dataset based latent variables model # ------------------------------------ diff --git a/examples/cross_decomposition/plot_pcr_vs_pls.py b/examples/cross_decomposition/plot_pcr_vs_pls.py index 895c75dc1a728..e0606a5c3dd42 100644 --- a/examples/cross_decomposition/plot_pcr_vs_pls.py +++ b/examples/cross_decomposition/plot_pcr_vs_pls.py @@ -33,6 +33,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # The data # -------- diff --git a/examples/datasets/plot_digits_last_image.py b/examples/datasets/plot_digits_last_image.py index ce6c48e58715e..836db4b37758c 100644 --- a/examples/datasets/plot_digits_last_image.py +++ b/examples/datasets/plot_digits_last_image.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index d36ebca1ebe83..ea9c89bbb6082 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -15,6 +15,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/datasets/plot_random_dataset.py b/examples/datasets/plot_random_dataset.py index e5cbdb080b59f..50ae2def9eaae 100644 --- a/examples/datasets/plot_random_dataset.py +++ b/examples/datasets/plot_random_dataset.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt from sklearn.datasets import make_blobs, make_classification, make_gaussian_quantiles diff --git a/examples/datasets/plot_random_multilabel_dataset.py b/examples/datasets/plot_random_multilabel_dataset.py index e6e2d6ad9edcf..36a8bc23e4d11 100644 --- a/examples/datasets/plot_random_multilabel_dataset.py +++ b/examples/datasets/plot_random_multilabel_dataset.py @@ -35,6 +35,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py index 2ed22a52f7d34..7082c922e1086 100644 --- a/examples/decomposition/plot_faces_decomposition.py +++ b/examples/decomposition/plot_faces_decomposition.py @@ -13,6 +13,9 @@ - License: BSD 3 clause """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Dataset preparation # ------------------- diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py index 584d6b9509589..e8d571d814a1b 100644 --- a/examples/decomposition/plot_ica_blind_source_separation.py +++ b/examples/decomposition/plot_ica_blind_source_separation.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Generate sample data # -------------------- diff --git a/examples/decomposition/plot_image_denoising.py b/examples/decomposition/plot_image_denoising.py index 646669d1469ff..5248fdff5a8ca 100644 --- a/examples/decomposition/plot_image_denoising.py +++ b/examples/decomposition/plot_image_denoising.py @@ -32,6 +32,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Generate distorted image # ------------------------ diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py index f574e14bdfd17..66c7bc7994129 100644 --- a/examples/decomposition/plot_pca_iris.py +++ b/examples/decomposition/plot_pca_iris.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/decomposition/plot_pca_vs_lda.py b/examples/decomposition/plot_pca_vs_lda.py index e88a0aff7882f..4679a410af76a 100644 --- a/examples/decomposition/plot_pca_vs_lda.py +++ b/examples/decomposition/plot_pca_vs_lda.py @@ -18,6 +18,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt from sklearn import datasets diff --git a/examples/decomposition/plot_sparse_coding.py b/examples/decomposition/plot_sparse_coding.py index c45cd3c83b04f..778f718c2ac87 100644 --- a/examples/decomposition/plot_sparse_coding.py +++ b/examples/decomposition/plot_sparse_coding.py @@ -16,6 +16,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index 8ae7372b0874f..db0200997d9e0 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -25,6 +25,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Noel Dawe # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/ensemble/plot_forest_importances.py b/examples/ensemble/plot_forest_importances.py index 269451168dd7a..b77e30c327fb4 100644 --- a/examples/ensemble/plot_forest_importances.py +++ b/examples/ensemble/plot_forest_importances.py @@ -13,6 +13,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt # %% diff --git a/examples/ensemble/plot_forest_importances_faces.py b/examples/ensemble/plot_forest_importances_faces.py index 8b8e8751ec5a2..5b8db11b1692e 100644 --- a/examples/ensemble/plot_forest_importances_faces.py +++ b/examples/ensemble/plot_forest_importances_faces.py @@ -12,6 +12,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Loading the data and model fitting # ---------------------------------- diff --git a/examples/ensemble/plot_forest_iris.py b/examples/ensemble/plot_forest_iris.py index c2056ce1905d1..78a28e521ff90 100644 --- a/examples/ensemble/plot_forest_iris.py +++ b/examples/ensemble/plot_forest_iris.py @@ -42,6 +42,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np from matplotlib.colors import ListedColormap diff --git a/examples/ensemble/plot_gradient_boosting_categorical.py b/examples/ensemble/plot_gradient_boosting_categorical.py index 2e260a4be1802..e80c0fb6fdc6e 100644 --- a/examples/ensemble/plot_gradient_boosting_categorical.py +++ b/examples/ensemble/plot_gradient_boosting_categorical.py @@ -27,6 +27,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load Ames Housing dataset # ------------------------- diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py index 6c239e97d66ee..39e8b19a3125f 100644 --- a/examples/ensemble/plot_gradient_boosting_early_stopping.py +++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py @@ -32,6 +32,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Data Preparation # ---------------- diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py index 723a494b04db8..3e2c44568de3c 100644 --- a/examples/ensemble/plot_gradient_boosting_quantile.py +++ b/examples/ensemble/plot_gradient_boosting_quantile.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Generate some data for a synthetic regression problem by applying the # function f to uniformly sampled random inputs. diff --git a/examples/ensemble/plot_isolation_forest.py b/examples/ensemble/plot_isolation_forest.py index f5fad1d7b9ea9..2bd5bc9e99a0e 100644 --- a/examples/ensemble/plot_isolation_forest.py +++ b/examples/ensemble/plot_isolation_forest.py @@ -16,6 +16,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Data generation # --------------- diff --git a/examples/ensemble/plot_monotonic_constraints.py b/examples/ensemble/plot_monotonic_constraints.py index dcd5f05af626c..40fb61eae19e9 100644 --- a/examples/ensemble/plot_monotonic_constraints.py +++ b/examples/ensemble/plot_monotonic_constraints.py @@ -20,6 +20,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% import matplotlib.pyplot as plt import numpy as np diff --git a/examples/ensemble/plot_random_forest_embedding.py b/examples/ensemble/plot_random_forest_embedding.py index fe26e04ca7789..d3d595df232a9 100644 --- a/examples/ensemble/plot_random_forest_embedding.py +++ b/examples/ensemble/plot_random_forest_embedding.py @@ -26,6 +26,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index 90441c6d28339..d40d831fb911f 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -23,6 +23,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from itertools import product import matplotlib.pyplot as plt diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py index 424959e6d5072..848358ca1d208 100644 --- a/examples/ensemble/plot_voting_probas.py +++ b/examples/ensemble/plot_voting_probas.py @@ -23,6 +23,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/ensemble/plot_voting_regressor.py b/examples/ensemble/plot_voting_regressor.py index d33becca505e3..6ccc4e81b700a 100644 --- a/examples/ensemble/plot_voting_regressor.py +++ b/examples/ensemble/plot_voting_regressor.py @@ -23,6 +23,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt from sklearn.datasets import load_diabetes diff --git a/examples/exercises/plot_cv_diabetes.py b/examples/exercises/plot_cv_diabetes.py index 9d0232de9e660..5e582b4b21571 100644 --- a/examples/exercises/plot_cv_diabetes.py +++ b/examples/exercises/plot_cv_diabetes.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load dataset and apply GridSearchCV # ----------------------------------- diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py index 25b0171c66421..d65006178ca4f 100644 --- a/examples/exercises/plot_digits_classification_exercise.py +++ b/examples/exercises/plot_digits_classification_exercise.py @@ -12,6 +12,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from sklearn import datasets, linear_model, neighbors X_digits, y_digits = datasets.load_digits(return_X_y=True) diff --git a/examples/exercises/plot_iris_exercise.py b/examples/exercises/plot_iris_exercise.py index 07687b920e1b8..8dcc4368ab620 100644 --- a/examples/exercises/plot_iris_exercise.py +++ b/examples/exercises/plot_iris_exercise.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/feature_selection/plot_f_test_vs_mi.py b/examples/feature_selection/plot_f_test_vs_mi.py index 5c015e7e4fd58..e3c75d39e0a27 100644 --- a/examples/feature_selection/plot_f_test_vs_mi.py +++ b/examples/feature_selection/plot_f_test_vs_mi.py @@ -23,6 +23,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/feature_selection/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py index 2cf64cb6ea598..520747f417db1 100644 --- a/examples/feature_selection/plot_feature_selection.py +++ b/examples/feature_selection/plot_feature_selection.py @@ -16,6 +16,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Generate sample data # -------------------- diff --git a/examples/feature_selection/plot_feature_selection_pipeline.py b/examples/feature_selection/plot_feature_selection_pipeline.py index 1d7c44050ea78..b33bfeda0a37a 100644 --- a/examples/feature_selection/plot_feature_selection_pipeline.py +++ b/examples/feature_selection/plot_feature_selection_pipeline.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # We will start by generating a binary classification dataset. Subsequently, we # will divide the dataset into two subsets. diff --git a/examples/feature_selection/plot_rfe_digits.py b/examples/feature_selection/plot_rfe_digits.py index 198a3d6f3af90..360a9bd92837f 100644 --- a/examples/feature_selection/plot_rfe_digits.py +++ b/examples/feature_selection/plot_rfe_digits.py @@ -18,6 +18,9 @@ """ # noqa: E501 +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt from sklearn.datasets import load_digits diff --git a/examples/feature_selection/plot_rfe_with_cross_validation.py b/examples/feature_selection/plot_rfe_with_cross_validation.py index 6e4a8ae0ee8c5..4e3e45384e026 100644 --- a/examples/feature_selection/plot_rfe_with_cross_validation.py +++ b/examples/feature_selection/plot_rfe_with_cross_validation.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Data generation # --------------- diff --git a/examples/feature_selection/plot_select_from_model_diabetes.py b/examples/feature_selection/plot_select_from_model_diabetes.py index f008d8d6e8b68..9359e9a982742 100644 --- a/examples/feature_selection/plot_select_from_model_diabetes.py +++ b/examples/feature_selection/plot_select_from_model_diabetes.py @@ -19,6 +19,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Loading the data # ---------------- diff --git a/examples/gaussian_process/plot_gpc_iris.py b/examples/gaussian_process/plot_gpc_iris.py index 88c536d8824c8..a01d9ac081d7e 100644 --- a/examples/gaussian_process/plot_gpc_iris.py +++ b/examples/gaussian_process/plot_gpc_iris.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/gaussian_process/plot_gpr_on_structured_data.py b/examples/gaussian_process/plot_gpr_on_structured_data.py index e702f1fe0769a..f3a8de5d018ef 100644 --- a/examples/gaussian_process/plot_gpr_on_structured_data.py +++ b/examples/gaussian_process/plot_gpr_on_structured_data.py @@ -38,6 +38,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% import numpy as np diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 445a08c05f02f..f06875a5f7fcd 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -44,6 +44,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np import pandas as pd diff --git a/examples/inspection/plot_causal_interpretation.py b/examples/inspection/plot_causal_interpretation.py index 68f10b5304842..cf6c72f29951d 100644 --- a/examples/inspection/plot_causal_interpretation.py +++ b/examples/inspection/plot_causal_interpretation.py @@ -15,6 +15,9 @@ identifying that causal effect. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # The dataset: simulated hourly wages # ----------------------------------- diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py index 0e11f01937ebc..f0ec361f4fa81 100644 --- a/examples/inspection/plot_linear_model_coefficient_interpretation.py +++ b/examples/inspection/plot_linear_model_coefficient_interpretation.py @@ -39,6 +39,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% import matplotlib.pyplot as plt import numpy as np diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index 4c3e0f409eeff..eace8afeb96a0 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -34,6 +34,9 @@ Graphical Statistics, 24(1): 44-65 <1309.6392>` """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Bike sharing dataset preprocessing # ---------------------------------- diff --git a/examples/inspection/plot_permutation_importance.py b/examples/inspection/plot_permutation_importance.py index 66b40fcd25bc8..73c5179a09b87 100644 --- a/examples/inspection/plot_permutation_importance.py +++ b/examples/inspection/plot_permutation_importance.py @@ -25,6 +25,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Data Loading and Feature Engineering # ------------------------------------ diff --git a/examples/inspection/plot_permutation_importance_multicollinear.py b/examples/inspection/plot_permutation_importance_multicollinear.py index a8fe52b1565d9..212c22cdf64e0 100644 --- a/examples/inspection/plot_permutation_importance_multicollinear.py +++ b/examples/inspection/plot_permutation_importance_multicollinear.py @@ -21,6 +21,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Random Forest Feature Importance on Breast Cancer Data # ------------------------------------------------------ diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py index e39baa111c4e2..9b1c355ef0ef5 100644 --- a/examples/linear_model/plot_ard.py +++ b/examples/linear_model/plot_ard.py @@ -19,6 +19,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Arturo Amor # %% diff --git a/examples/linear_model/plot_bayesian_ridge_curvefit.py b/examples/linear_model/plot_bayesian_ridge_curvefit.py index 12f48b9ce347d..f1c86a196c2a3 100644 --- a/examples/linear_model/plot_bayesian_ridge_curvefit.py +++ b/examples/linear_model/plot_bayesian_ridge_curvefit.py @@ -26,6 +26,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Yoshihiro Uchida # %% diff --git a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py index b31d95348c083..e118847a8737c 100644 --- a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py +++ b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py @@ -17,6 +17,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Let's start by loading the dataset and creating some sample weights. import numpy as np diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py index ca7cb00d5f878..288de6a886550 100644 --- a/examples/linear_model/plot_iris_logistic.py +++ b/examples/linear_model/plot_iris_logistic.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py index 78ab9624b64a4..f0eb2400c95ef 100644 --- a/examples/linear_model/plot_lasso_and_elasticnet.py +++ b/examples/linear_model/plot_lasso_and_elasticnet.py @@ -21,6 +21,9 @@ compared with the ground-truth. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Arturo Amor # %% diff --git a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py index a797d5d708160..920994da1ffb5 100644 --- a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py +++ b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from time import time from scipy import linalg, sparse diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py index a22bc5cb39119..8ddd4deef2a8f 100644 --- a/examples/linear_model/plot_logistic.py +++ b/examples/linear_model/plot_logistic.py @@ -9,6 +9,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gael Varoquaux # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/linear_model/plot_nnls.py b/examples/linear_model/plot_nnls.py index 05a8550ec166b..9ab19e19a1882 100644 --- a/examples/linear_model/plot_nnls.py +++ b/examples/linear_model/plot_nnls.py @@ -9,6 +9,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py index d94bfadf435a9..bb4a242cdbad2 100644 --- a/examples/linear_model/plot_ols.py +++ b/examples/linear_model/plot_ols.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py index 328131a3b81c0..38fbcca546297 100644 --- a/examples/linear_model/plot_ols_3d.py +++ b/examples/linear_model/plot_ols_3d.py @@ -9,6 +9,9 @@ give us much regarding `y` when compared to just feature 1. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py index 324770887afcf..86c28b4d3ae23 100644 --- a/examples/linear_model/plot_ols_ridge_variance.py +++ b/examples/linear_model/plot_ols_ridge_variance.py @@ -19,6 +19,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/linear_model/plot_omp.py b/examples/linear_model/plot_omp.py index aa6044173b8ce..815b3c9425fdf 100644 --- a/examples/linear_model/plot_omp.py +++ b/examples/linear_model/plot_omp.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_ransac.py b/examples/linear_model/plot_ransac.py index 7b89150c4bd20..ecef43e79f9bf 100644 --- a/examples/linear_model/plot_ransac.py +++ b/examples/linear_model/plot_ransac.py @@ -15,6 +15,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numpy as np from matplotlib import pyplot as plt diff --git a/examples/linear_model/plot_ridge_coeffs.py b/examples/linear_model/plot_ridge_coeffs.py index 4bfb1f4c29325..0cff8c79a55bb 100644 --- a/examples/linear_model/plot_ridge_coeffs.py +++ b/examples/linear_model/plot_ridge_coeffs.py @@ -51,6 +51,9 @@ capable of generalizing well to unseen data while avoiding overfitting. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Kornel Kielczewski -- # %% diff --git a/examples/linear_model/plot_robust_fit.py b/examples/linear_model/plot_robust_fit.py index 79213c9a8e83e..2b447e6175cdc 100644 --- a/examples/linear_model/plot_robust_fit.py +++ b/examples/linear_model/plot_robust_fit.py @@ -30,6 +30,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numpy as np from matplotlib import pyplot as plt diff --git a/examples/linear_model/plot_sgd_iris.py b/examples/linear_model/plot_sgd_iris.py index 838f612d4659c..46dc2e7c31cd1 100644 --- a/examples/linear_model/plot_sgd_iris.py +++ b/examples/linear_model/plot_sgd_iris.py @@ -9,6 +9,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_sgd_loss_functions.py b/examples/linear_model/plot_sgd_loss_functions.py index 140562184b946..b0c61da6ddcc1 100644 --- a/examples/linear_model/plot_sgd_loss_functions.py +++ b/examples/linear_model/plot_sgd_loss_functions.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_sgd_penalties.py b/examples/linear_model/plot_sgd_penalties.py index ff71dba5f20a3..6f8830b52fe7a 100644 --- a/examples/linear_model/plot_sgd_penalties.py +++ b/examples/linear_model/plot_sgd_penalties.py @@ -11,6 +11,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_sgd_separating_hyperplane.py b/examples/linear_model/plot_sgd_separating_hyperplane.py index e84ab7c519ae9..90f7502900291 100644 --- a/examples/linear_model/plot_sgd_separating_hyperplane.py +++ b/examples/linear_model/plot_sgd_separating_hyperplane.py @@ -9,6 +9,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_sgd_weighted_samples.py b/examples/linear_model/plot_sgd_weighted_samples.py index 4d605e99b4e49..e9e6587004e70 100644 --- a/examples/linear_model/plot_sgd_weighted_samples.py +++ b/examples/linear_model/plot_sgd_weighted_samples.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py index 60e9cd8078802..aabc8058dc407 100644 --- a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py +++ b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py @@ -19,6 +19,9 @@ """ # noqa: E501 +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% import matplotlib import matplotlib.lines as mlines diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py index 404250a855e0a..fc6b1c57d7ad7 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py +++ b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py @@ -20,6 +20,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Arthur Mensch import timeit diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py index a3d3947d5b85f..9c123aadda8ea 100644 --- a/examples/manifold/plot_compare_methods.py +++ b/examples/manifold/plot_compare_methods.py @@ -20,6 +20,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Jake Vanderplas -- # %% diff --git a/examples/manifold/plot_swissroll.py b/examples/manifold/plot_swissroll.py index 65df88588efef..803dc391ba4c2 100644 --- a/examples/manifold/plot_swissroll.py +++ b/examples/manifold/plot_swissroll.py @@ -9,6 +9,9 @@ in the data. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Swiss Roll # --------------------------------------------------- diff --git a/examples/miscellaneous/plot_display_object_visualization.py b/examples/miscellaneous/plot_display_object_visualization.py index 075413379a92c..cf39a1407d28e 100644 --- a/examples/miscellaneous/plot_display_object_visualization.py +++ b/examples/miscellaneous/plot_display_object_visualization.py @@ -15,6 +15,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load Data and train model # ------------------------- diff --git a/examples/miscellaneous/plot_estimator_representation.py b/examples/miscellaneous/plot_estimator_representation.py index 1c9e3745db0de..683f0c5785f20 100644 --- a/examples/miscellaneous/plot_estimator_representation.py +++ b/examples/miscellaneous/plot_estimator_representation.py @@ -7,6 +7,9 @@ displayed. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from sklearn.compose import make_column_transformer from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression diff --git a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py index 85161a6ee51bb..5528eada1ed4a 100644 --- a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py +++ b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py @@ -13,6 +13,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import sys from time import time diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py index e96b54436cf30..634ca304d125d 100644 --- a/examples/miscellaneous/plot_metadata_routing.py +++ b/examples/miscellaneous/plot_metadata_routing.py @@ -27,6 +27,9 @@ First a few imports and some random data for the rest of the script. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% import warnings diff --git a/examples/miscellaneous/plot_multioutput_face_completion.py b/examples/miscellaneous/plot_multioutput_face_completion.py index 62070bc05e488..a924da0d2b4a5 100644 --- a/examples/miscellaneous/plot_multioutput_face_completion.py +++ b/examples/miscellaneous/plot_multioutput_face_completion.py @@ -12,6 +12,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/miscellaneous/plot_partial_dependence_visualization_api.py b/examples/miscellaneous/plot_partial_dependence_visualization_api.py index 38a984fa5b0cd..8c98b40816496 100644 --- a/examples/miscellaneous/plot_partial_dependence_visualization_api.py +++ b/examples/miscellaneous/plot_partial_dependence_visualization_api.py @@ -13,6 +13,9 @@ """ # noqa: E501 +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import pandas as pd diff --git a/examples/miscellaneous/plot_pipeline_display.py b/examples/miscellaneous/plot_pipeline_display.py index 9642bb56b903f..bf9ff2e549f8b 100755 --- a/examples/miscellaneous/plot_pipeline_display.py +++ b/examples/miscellaneous/plot_pipeline_display.py @@ -11,6 +11,9 @@ steps in the pipeline. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Displaying a Pipeline with a Preprocessing Step and Classifier ################################################################################ diff --git a/examples/miscellaneous/plot_roc_curve_visualization_api.py b/examples/miscellaneous/plot_roc_curve_visualization_api.py index 7fc8df9724337..d377d321e061e 100644 --- a/examples/miscellaneous/plot_roc_curve_visualization_api.py +++ b/examples/miscellaneous/plot_roc_curve_visualization_api.py @@ -9,6 +9,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load Data and Train a SVC # ------------------------- diff --git a/examples/mixture/plot_gmm.py b/examples/mixture/plot_gmm.py index 82e48a8d13eb0..9a27b1c42f81a 100644 --- a/examples/mixture/plot_gmm.py +++ b/examples/mixture/plot_gmm.py @@ -24,6 +24,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import itertools import matplotlib as mpl diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py index 410a843cf78db..dd3d1c8a22692 100644 --- a/examples/mixture/plot_gmm_init.py +++ b/examples/mixture/plot_gmm_init.py @@ -33,6 +33,9 @@ time to initialize and low number of GaussianMixture iterations to converge. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Gordon Walsh # Data generation code from Jake Vanderplas diff --git a/examples/mixture/plot_gmm_pdf.py b/examples/mixture/plot_gmm_pdf.py index 062bdfd4d6d67..be70578402f55 100644 --- a/examples/mixture/plot_gmm_pdf.py +++ b/examples/mixture/plot_gmm_pdf.py @@ -9,6 +9,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np from matplotlib.colors import LogNorm diff --git a/examples/mixture/plot_gmm_selection.py b/examples/mixture/plot_gmm_selection.py index cd84c03ab7d13..ef256aa4f8e0f 100644 --- a/examples/mixture/plot_gmm_selection.py +++ b/examples/mixture/plot_gmm_selection.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Data generation # --------------- diff --git a/examples/mixture/plot_gmm_sin.py b/examples/mixture/plot_gmm_sin.py index 34af17b8920bc..fe9c12bbe5adc 100644 --- a/examples/mixture/plot_gmm_sin.py +++ b/examples/mixture/plot_gmm_sin.py @@ -39,6 +39,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import itertools import matplotlib as mpl diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 278083a994e58..9a0312d34f005 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -24,6 +24,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/model_selection/plot_cost_sensitive_learning.py b/examples/model_selection/plot_cost_sensitive_learning.py index 55707e78e137a..3021d5aaab53d 100644 --- a/examples/model_selection/plot_cost_sensitive_learning.py +++ b/examples/model_selection/plot_cost_sensitive_learning.py @@ -35,6 +35,9 @@ `_ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Cost-sensitive learning with constant gains and costs # ----------------------------------------------------- diff --git a/examples/model_selection/plot_cv_indices.py b/examples/model_selection/plot_cv_indices.py index d456546891069..b922fc75d7473 100644 --- a/examples/model_selection/plot_cv_indices.py +++ b/examples/model_selection/plot_cv_indices.py @@ -12,6 +12,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np from matplotlib.patches import Patch diff --git a/examples/model_selection/plot_cv_predict.py b/examples/model_selection/plot_cv_predict.py index bae1cffbd24e7..fa77749020d2b 100644 --- a/examples/model_selection/plot_cv_predict.py +++ b/examples/model_selection/plot_cv_predict.py @@ -9,6 +9,9 @@ errors. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # We will load the diabetes dataset and create an instance of a linear # regression model. diff --git a/examples/model_selection/plot_det.py b/examples/model_selection/plot_det.py index 3e56b8bd35d31..bf72fc8ade61f 100644 --- a/examples/model_selection/plot_det.py +++ b/examples/model_selection/plot_det.py @@ -33,6 +33,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Generate synthetic data # ----------------------- diff --git a/examples/model_selection/plot_grid_search_digits.py b/examples/model_selection/plot_grid_search_digits.py index ec4360692aaf3..f9d7adc2a404b 100644 --- a/examples/model_selection/plot_grid_search_digits.py +++ b/examples/model_selection/plot_grid_search_digits.py @@ -15,6 +15,9 @@ sections on :ref:`cross_validation` and :ref:`grid_search`. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # The dataset # ----------- diff --git a/examples/model_selection/plot_grid_search_refit_callable.py b/examples/model_selection/plot_grid_search_refit_callable.py index a851ee5f9bb19..9a8bf3c70d9cc 100644 --- a/examples/model_selection/plot_grid_search_refit_callable.py +++ b/examples/model_selection/plot_grid_search_refit_callable.py @@ -18,6 +18,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Wenhao Zhang import matplotlib.pyplot as plt diff --git a/examples/model_selection/plot_grid_search_stats.py b/examples/model_selection/plot_grid_search_stats.py index 9335b12055515..a4f1c8e1417ba 100644 --- a/examples/model_selection/plot_grid_search_stats.py +++ b/examples/model_selection/plot_grid_search_stats.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # We will start by simulating moon shaped data (where the ideal separation # between classes is non-linear), adding to it a moderate degree of noise. diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py index 450392679095f..d8060c67cbe15 100644 --- a/examples/model_selection/plot_learning_curve.py +++ b/examples/model_selection/plot_learning_curve.py @@ -13,6 +13,9 @@ accuracy. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Learning Curve # ============== diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py index 9a3f29def9e98..2fc3ad3d040f5 100644 --- a/examples/model_selection/plot_likelihood_ratios.py +++ b/examples/model_selection/plot_likelihood_ratios.py @@ -25,6 +25,9 @@ class proportion than the target application. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Authors: Arturo Amor # Olivier Grisel # %% diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py index 38defdadf4165..15082123761af 100644 --- a/examples/model_selection/plot_nested_cross_validation_iris.py +++ b/examples/model_selection/plot_nested_cross_validation_iris.py @@ -44,6 +44,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numpy as np from matplotlib import pyplot as plt diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py index 19a93c7324cbb..bb7c79ff27da2 100644 --- a/examples/model_selection/plot_precision_recall.py +++ b/examples/model_selection/plot_precision_recall.py @@ -92,6 +92,9 @@ :func:`sklearn.metrics.f1_score` """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # In binary classification settings # --------------------------------- diff --git a/examples/model_selection/plot_randomized_search.py b/examples/model_selection/plot_randomized_search.py index 140b359ff1934..7acd3a5550acf 100644 --- a/examples/model_selection/plot_randomized_search.py +++ b/examples/model_selection/plot_randomized_search.py @@ -20,6 +20,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from time import time import numpy as np diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py index 5a94afcdf1edf..1b2a9760342a3 100644 --- a/examples/model_selection/plot_roc.py +++ b/examples/model_selection/plot_roc.py @@ -33,6 +33,9 @@ curves and their respective AUC. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load and prepare data # ===================== diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py index 962b39754f8bd..fb6432a71ed79 100644 --- a/examples/model_selection/plot_roc_crossval.py +++ b/examples/model_selection/plot_roc_crossval.py @@ -27,6 +27,9 @@ generalize the metrics for multiclass classifiers. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load and prepare data # ===================== diff --git a/examples/model_selection/plot_successive_halving_heatmap.py b/examples/model_selection/plot_successive_halving_heatmap.py index 9b079e4b1351f..4d9b676443e5e 100644 --- a/examples/model_selection/plot_successive_halving_heatmap.py +++ b/examples/model_selection/plot_successive_halving_heatmap.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from time import time import matplotlib.pyplot as plt diff --git a/examples/model_selection/plot_successive_halving_iterations.py b/examples/model_selection/plot_successive_halving_iterations.py index 31805d308e269..31c1a0b9d5b34 100644 --- a/examples/model_selection/plot_successive_halving_iterations.py +++ b/examples/model_selection/plot_successive_halving_iterations.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np import pandas as pd diff --git a/examples/model_selection/plot_tuned_decision_threshold.py b/examples/model_selection/plot_tuned_decision_threshold.py index 7e997ee255e4d..59986a3910d00 100644 --- a/examples/model_selection/plot_tuned_decision_threshold.py +++ b/examples/model_selection/plot_tuned_decision_threshold.py @@ -14,6 +14,9 @@ threshold, depending on a metric of interest. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # The diabetes dataset # -------------------- diff --git a/examples/model_selection/plot_underfitting_overfitting.py b/examples/model_selection/plot_underfitting_overfitting.py index 412946fc9ca8b..a6151cd6b3c20 100644 --- a/examples/model_selection/plot_underfitting_overfitting.py +++ b/examples/model_selection/plot_underfitting_overfitting.py @@ -21,6 +21,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index 947d8ac2b2fdb..44a382fed0c17 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -13,6 +13,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/multiclass/plot_multiclass_overview.py b/examples/multiclass/plot_multiclass_overview.py index 9ef5405512b67..1a0fddc40571d 100644 --- a/examples/multiclass/plot_multiclass_overview.py +++ b/examples/multiclass/plot_multiclass_overview.py @@ -20,6 +20,9 @@ will review them. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # The Yeast UCI dataset # --------------------- diff --git a/examples/neighbors/plot_classification.py b/examples/neighbors/plot_classification.py index 43c45558054cf..1754869943ac7 100644 --- a/examples/neighbors/plot_classification.py +++ b/examples/neighbors/plot_classification.py @@ -8,6 +8,9 @@ decision boundary obtained with regards to the parameter `weights`. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load the data # ------------- diff --git a/examples/neighbors/plot_digits_kde_sampling.py b/examples/neighbors/plot_digits_kde_sampling.py index 045058eab09cc..d4860f117e4e9 100644 --- a/examples/neighbors/plot_digits_kde_sampling.py +++ b/examples/neighbors/plot_digits_kde_sampling.py @@ -11,6 +11,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/neighbors/plot_kde_1d.py b/examples/neighbors/plot_kde_1d.py index fc5b1914f23de..83734ec70bcfc 100644 --- a/examples/neighbors/plot_kde_1d.py +++ b/examples/neighbors/plot_kde_1d.py @@ -28,6 +28,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Jake Vanderplas # import matplotlib.pyplot as plt diff --git a/examples/neighbors/plot_lof_novelty_detection.py b/examples/neighbors/plot_lof_novelty_detection.py index 789efa66c7b5c..9743ec4e3df2f 100644 --- a/examples/neighbors/plot_lof_novelty_detection.py +++ b/examples/neighbors/plot_lof_novelty_detection.py @@ -25,6 +25,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib import matplotlib.lines as mlines import matplotlib.pyplot as plt diff --git a/examples/neighbors/plot_lof_outlier_detection.py b/examples/neighbors/plot_lof_outlier_detection.py index edb79294ce594..9b5e92579625b 100644 --- a/examples/neighbors/plot_lof_outlier_detection.py +++ b/examples/neighbors/plot_lof_outlier_detection.py @@ -22,6 +22,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Generate data with outliers # --------------------------- diff --git a/examples/neighbors/plot_nca_classification.py b/examples/neighbors/plot_nca_classification.py index 0703caf90056e..496038cec7f88 100644 --- a/examples/neighbors/plot_nca_classification.py +++ b/examples/neighbors/plot_nca_classification.py @@ -15,6 +15,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # SPDX-License-Identifier: BSD-3-Clause import matplotlib.pyplot as plt diff --git a/examples/neighbors/plot_nca_dim_reduction.py b/examples/neighbors/plot_nca_dim_reduction.py index 415618f6bc4c7..1e6305549a46c 100644 --- a/examples/neighbors/plot_nca_dim_reduction.py +++ b/examples/neighbors/plot_nca_dim_reduction.py @@ -28,6 +28,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # SPDX-License-Identifier: BSD-3-Clause import matplotlib.pyplot as plt diff --git a/examples/neighbors/plot_nca_illustration.py b/examples/neighbors/plot_nca_illustration.py index 7085817f4e9fa..8d847f28d6e4d 100644 --- a/examples/neighbors/plot_nca_illustration.py +++ b/examples/neighbors/plot_nca_illustration.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # SPDX-License-Identifier: BSD-3-Clause import matplotlib.pyplot as plt diff --git a/examples/neighbors/plot_nearest_centroid.py b/examples/neighbors/plot_nearest_centroid.py index c8f710d0a0377..1718e213f9252 100644 --- a/examples/neighbors/plot_nearest_centroid.py +++ b/examples/neighbors/plot_nearest_centroid.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np from matplotlib.colors import ListedColormap diff --git a/examples/neural_networks/plot_mlp_training_curves.py b/examples/neural_networks/plot_mlp_training_curves.py index 8ee285877caa8..7b63d0de1adfe 100644 --- a/examples/neural_networks/plot_mlp_training_curves.py +++ b/examples/neural_networks/plot_mlp_training_curves.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import warnings import matplotlib.pyplot as plt diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py index f37452a757d20..889e78e2e5e5b 100644 --- a/examples/neural_networks/plot_mnist_filters.py +++ b/examples/neural_networks/plot_mnist_filters.py @@ -24,6 +24,9 @@ to build this documentation on a regular basis. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import warnings import matplotlib.pyplot as plt diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py index 04a461a61799f..310089e296883 100644 --- a/examples/preprocessing/plot_discretization_classification.py +++ b/examples/preprocessing/plot_discretization_classification.py @@ -28,6 +28,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Tom Dupré la Tour # Adapted from plot_classifier_comparison by Gaël Varoquaux and Andreas Müller # diff --git a/examples/preprocessing/plot_target_encoder.py b/examples/preprocessing/plot_target_encoder.py index 98b73a9529679..04f3222d4e512 100644 --- a/examples/preprocessing/plot_target_encoder.py +++ b/examples/preprocessing/plot_target_encoder.py @@ -16,6 +16,9 @@ :ref:`User Guide `. for details. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Loading Data from OpenML # ======================== diff --git a/examples/preprocessing/plot_target_encoder_cross_val.py b/examples/preprocessing/plot_target_encoder_cross_val.py index 7244a1bf61cd6..3d51664710096 100644 --- a/examples/preprocessing/plot_target_encoder_cross_val.py +++ b/examples/preprocessing/plot_target_encoder_cross_val.py @@ -16,6 +16,9 @@ fitting procedure to prevent overfitting. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Create Synthetic Dataset # ======================== diff --git a/examples/release_highlights/plot_release_highlights_0_22_0.py b/examples/release_highlights/plot_release_highlights_0_22_0.py index 2e4c9185365a9..26763110402ec 100644 --- a/examples/release_highlights/plot_release_highlights_0_22_0.py +++ b/examples/release_highlights/plot_release_highlights_0_22_0.py @@ -20,6 +20,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # New plotting API # ---------------- diff --git a/examples/semi_supervised/plot_semi_supervised_newsgroups.py b/examples/semi_supervised/plot_semi_supervised_newsgroups.py index 19bcb13c5a99b..1ad7bf85953e7 100644 --- a/examples/semi_supervised/plot_semi_supervised_newsgroups.py +++ b/examples/semi_supervised/plot_semi_supervised_newsgroups.py @@ -11,6 +11,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numpy as np from sklearn.datasets import fetch_20newsgroups diff --git a/examples/svm/plot_custom_kernel.py b/examples/svm/plot_custom_kernel.py index cacd67ed056ac..d3816849f73b8 100644 --- a/examples/svm/plot_custom_kernel.py +++ b/examples/svm/plot_custom_kernel.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/svm/plot_iris_svc.py b/examples/svm/plot_iris_svc.py index d13a9fe49c803..77259f9d1ea2c 100644 --- a/examples/svm/plot_iris_svc.py +++ b/examples/svm/plot_iris_svc.py @@ -34,6 +34,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt from sklearn import datasets, svm diff --git a/examples/svm/plot_linearsvc_support_vectors.py b/examples/svm/plot_linearsvc_support_vectors.py index 7f82b6c8bb0fe..021e1c6b55962 100644 --- a/examples/svm/plot_linearsvc_support_vectors.py +++ b/examples/svm/plot_linearsvc_support_vectors.py @@ -9,6 +9,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/svm/plot_oneclass.py b/examples/svm/plot_oneclass.py index 4f44f42fe338e..0db71966db6a9 100644 --- a/examples/svm/plot_oneclass.py +++ b/examples/svm/plot_oneclass.py @@ -11,6 +11,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% import numpy as np diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index ba0154b477b46..356707e2d72b2 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -75,6 +75,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Utility class to move the midpoint of a colormap to be around # the values of interest. diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index 23f464169f516..842da314feb1a 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -9,6 +9,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt from sklearn import svm diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py index f9c615cc43d4f..d0814e1af065f 100644 --- a/examples/svm/plot_separating_hyperplane_unbalanced.py +++ b/examples/svm/plot_separating_hyperplane_unbalanced.py @@ -25,6 +25,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.lines as mlines import matplotlib.pyplot as plt diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index 3d5a934bf4884..1c2a78e79fdb9 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -10,6 +10,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # Load some data to play with # --------------------------- diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py index 9d859864084f0..480293a03b9f0 100644 --- a/examples/svm/plot_svm_kernels.py +++ b/examples/svm/plot_svm_kernels.py @@ -36,6 +36,9 @@ kernel (`"rbf"`) and the sigmoid kernel (`"sigmoid"`). """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py index de293f61b848d..671b7f4902bc8 100644 --- a/examples/svm/plot_svm_margin.py +++ b/examples/svm/plot_svm_margin.py @@ -13,6 +13,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py index ab34528a37af6..5da00ef1f88b7 100644 --- a/examples/svm/plot_svm_regression.py +++ b/examples/svm/plot_svm_regression.py @@ -7,6 +7,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/svm/plot_svm_tie_breaking.py b/examples/svm/plot_svm_tie_breaking.py index 0a43d4125aec5..a9276efd44572 100644 --- a/examples/svm/plot_svm_tie_breaking.py +++ b/examples/svm/plot_svm_tie_breaking.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Code source: Andreas Mueller, Adrin Jalali # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/svm/plot_weighted_samples.py b/examples/svm/plot_weighted_samples.py index c17742e091390..0fb5bedd6e59b 100644 --- a/examples/svm/plot_weighted_samples.py +++ b/examples/svm/plot_weighted_samples.py @@ -14,6 +14,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/tree/plot_cost_complexity_pruning.py b/examples/tree/plot_cost_complexity_pruning.py index b232389ea9ded..bdd1a2b0c358f 100644 --- a/examples/tree/plot_cost_complexity_pruning.py +++ b/examples/tree/plot_cost_complexity_pruning.py @@ -17,6 +17,9 @@ See also :ref:`minimal_cost_complexity_pruning` for details on pruning. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt from sklearn.datasets import load_breast_cancer diff --git a/examples/tree/plot_iris_dtc.py b/examples/tree/plot_iris_dtc.py index 61c3bec3460ed..9d4298919d515 100644 --- a/examples/tree/plot_iris_dtc.py +++ b/examples/tree/plot_iris_dtc.py @@ -15,6 +15,9 @@ We also show the tree structure of a model built on all of the features. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # %% # First load the copy of the Iris dataset shipped with scikit-learn: from sklearn.datasets import load_iris diff --git a/examples/tree/plot_tree_regression.py b/examples/tree/plot_tree_regression.py index 5a3da0b7b6d06..c499e95f428c4 100644 --- a/examples/tree/plot_tree_regression.py +++ b/examples/tree/plot_tree_regression.py @@ -14,6 +14,9 @@ details of the training data and learn from the noise, i.e. they overfit. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Import the necessary modules and libraries import matplotlib.pyplot as plt import numpy as np diff --git a/examples/tree/plot_tree_regression_multioutput.py b/examples/tree/plot_tree_regression_multioutput.py index b6d2800d2732d..0fed498c0087e 100644 --- a/examples/tree/plot_tree_regression_multioutput.py +++ b/examples/tree/plot_tree_regression_multioutput.py @@ -15,6 +15,9 @@ details of the training data and learn from the noise, i.e. they overfit. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/tree/plot_unveil_tree_structure.py b/examples/tree/plot_unveil_tree_structure.py index 19b7c643ec0f7..e87c9278450ad 100644 --- a/examples/tree/plot_unveil_tree_structure.py +++ b/examples/tree/plot_unveil_tree_structure.py @@ -16,6 +16,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numpy as np from matplotlib import pyplot as plt diff --git a/setup.py b/setup.py index 7e67210736305..97e7a67458180 100755 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause #! /usr/bin/env python # # Authors: The scikit-learn developers diff --git a/sklearn/__check_build/__init__.py b/sklearn/__check_build/__init__.py index ad1a3a818b14d..0f6508ece1c4d 100644 --- a/sklearn/__check_build/__init__.py +++ b/sklearn/__check_build/__init__.py @@ -2,6 +2,9 @@ compile scikit-learn properly. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import os INPLACE_MSG = """ diff --git a/sklearn/__init__.py b/sklearn/__init__.py index a61a2afde8855..03b375a47335e 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -1,5 +1,8 @@ """Configure global settings and get information about the working environment.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Machine learning module for Python # ================================== # diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py index ceb72441000c3..4e08b5dc873d7 100644 --- a/sklearn/_build_utils/__init__.py +++ b/sklearn/_build_utils/__init__.py @@ -2,6 +2,9 @@ Utilities useful during the build. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # author: Andy Mueller, Gael Varoquaux # license: BSD diff --git a/sklearn/_build_utils/openmp_helpers.py b/sklearn/_build_utils/openmp_helpers.py index 66e6089e33fef..6380258da391d 100644 --- a/sklearn/_build_utils/openmp_helpers.py +++ b/sklearn/_build_utils/openmp_helpers.py @@ -1,5 +1,8 @@ """Helpers for OpenMP support during the build.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # This code is adapted for a large part from the astropy openmp helpers, which # can be found at: https://github.com/astropy/extension-helpers/blob/master/extension_helpers/_openmp_helpers.py # noqa diff --git a/sklearn/_build_utils/pre_build_helpers.py b/sklearn/_build_utils/pre_build_helpers.py index 73adb26f5416b..ae05c8bf10957 100644 --- a/sklearn/_build_utils/pre_build_helpers.py +++ b/sklearn/_build_utils/pre_build_helpers.py @@ -1,5 +1,8 @@ """Helpers to check build environment before actual build of scikit-learn""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import glob import os import subprocess diff --git a/sklearn/_build_utils/tempita.py b/sklearn/_build_utils/tempita.py index 8da4b9c0e7ace..fd59fe51c7ba6 100644 --- a/sklearn/_build_utils/tempita.py +++ b/sklearn/_build_utils/tempita.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import argparse import os diff --git a/sklearn/_build_utils/version.py b/sklearn/_build_utils/version.py index 49a3cfb82bebd..0616f8c058c2b 100644 --- a/sklearn/_build_utils/version.py +++ b/sklearn/_build_utils/version.py @@ -1,6 +1,9 @@ #!/usr/bin/env python """Extract version number from __init__.py""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import os sklearn_init = os.path.join(os.path.dirname(__file__), "../__init__.py") diff --git a/sklearn/_config.py b/sklearn/_config.py index fc9392de68df6..05549c88a9ddc 100644 --- a/sklearn/_config.py +++ b/sklearn/_config.py @@ -1,5 +1,8 @@ """Global configuration state and functions for management""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import os import threading from contextlib import contextmanager as contextmanager diff --git a/sklearn/_distributor_init.py b/sklearn/_distributor_init.py index f0901034e83e4..d66d5d36955c1 100644 --- a/sklearn/_distributor_init.py +++ b/sklearn/_distributor_init.py @@ -8,3 +8,6 @@ The scikit-learn standard source distribution will not put code in this file, so you can safely replace this file with your own version. """ + +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/_loss/__init__.py b/sklearn/_loss/__init__.py index ee15e693c16f6..bc348bbca8a15 100644 --- a/sklearn/_loss/__init__.py +++ b/sklearn/_loss/__init__.py @@ -3,6 +3,9 @@ fitting classification and regression tasks. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from .loss import ( AbsoluteError, HalfBinomialLoss, diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py index a6560d58d91e6..1bb290aecc64a 100644 --- a/sklearn/_loss/link.py +++ b/sklearn/_loss/link.py @@ -2,6 +2,9 @@ Module contains classes for invertible (and differentiable) link functions. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Christian Lorentzen from abc import ABC, abstractmethod diff --git a/sklearn/_loss/loss.py b/sklearn/_loss/loss.py index 96863cc00fe01..b45ff3322699a 100644 --- a/sklearn/_loss/loss.py +++ b/sklearn/_loss/loss.py @@ -6,6 +6,9 @@ classification. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Goals: # - Provide a common private module for loss functions/classes. # - To be used in: diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index 9c108791b45bc..f7df37bedda0c 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -1,5 +1,8 @@ """All minimum dependencies for scikit-learn.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import argparse from collections import defaultdict diff --git a/sklearn/cluster/__init__.py b/sklearn/cluster/__init__.py index 5d497d2976ad1..a0545d3b90d56 100644 --- a/sklearn/cluster/__init__.py +++ b/sklearn/cluster/__init__.py @@ -1,5 +1,8 @@ """Popular unsupervised clustering algorithms.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._affinity_propagation import AffinityPropagation, affinity_propagation from ._agglomerative import ( AgglomerativeClustering, diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 55215d2a0312c..68fa315f11634 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -8,6 +8,9 @@ License: BSD 3 clause """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import warnings from heapq import heapify, heappop, heappush, heappushpop from numbers import Integral, Real diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py index 1d4a9e1d84c26..d615447d913eb 100644 --- a/sklearn/cluster/_bisect_k_means.py +++ b/sklearn/cluster/_bisect_k_means.py @@ -1,5 +1,8 @@ """Bisecting K-means clustering.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Michal Krawczyk import warnings diff --git a/sklearn/cluster/_hdbscan/__init__.py b/sklearn/cluster/_hdbscan/__init__.py index e69de29bb2d1d..67dd18fb94b59 100644 --- a/sklearn/cluster/_hdbscan/__init__.py +++ b/sklearn/cluster/_hdbscan/__init__.py @@ -0,0 +1,2 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py index 9933318313cc8..f999343dd875d 100644 --- a/sklearn/cluster/_hdbscan/hdbscan.py +++ b/sklearn/cluster/_hdbscan/hdbscan.py @@ -3,6 +3,9 @@ of Applications with Noise """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Authors: Leland McInnes # Steve Astels # John Healy diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py index a99a607f3cf0d..db5c22d9650bf 100644 --- a/sklearn/cluster/_mean_shift.py +++ b/sklearn/cluster/_mean_shift.py @@ -9,6 +9,9 @@ Seeding is performed using a binning technique for scalability. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Authors: Conrad Lee # Alexandre Gramfort # Gael Varoquaux diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index b2a0c4d642a00..46f795e94ffb2 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -10,6 +10,9 @@ License: BSD 3 clause """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import warnings from numbers import Integral, Real diff --git a/sklearn/compose/__init__.py b/sklearn/compose/__init__.py index e6e8fc6fd2c1b..9f20bc9856074 100644 --- a/sklearn/compose/__init__.py +++ b/sklearn/compose/__init__.py @@ -5,6 +5,9 @@ :class:`~sklearn.pipeline.FeatureUnion`. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._column_transformer import ( ColumnTransformer, make_column_selector, diff --git a/sklearn/conftest.py b/sklearn/conftest.py index 203c524561fdd..6df1d5ba473f6 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import builtins import platform import sys diff --git a/sklearn/covariance/__init__.py b/sklearn/covariance/__init__.py index 19ee1d8a09759..989f3372b42e0 100644 --- a/sklearn/covariance/__init__.py +++ b/sklearn/covariance/__init__.py @@ -5,6 +5,9 @@ closely related to the theory of Gaussian graphical models. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._elliptic_envelope import EllipticEnvelope from ._empirical_covariance import ( EmpiricalCovariance, diff --git a/sklearn/cross_decomposition/__init__.py b/sklearn/cross_decomposition/__init__.py index 15eb21467daee..cad873ed800c6 100644 --- a/sklearn/cross_decomposition/__init__.py +++ b/sklearn/cross_decomposition/__init__.py @@ -1,5 +1,8 @@ """Algorithms for cross decomposition.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._pls import CCA, PLSSVD, PLSCanonical, PLSRegression __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"] diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py index 58cddb099faff..0ba2cbe88efc4 100644 --- a/sklearn/datasets/__init__.py +++ b/sklearn/datasets/__init__.py @@ -1,5 +1,8 @@ """Utilities to load popular datasets and artificial data generators.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import textwrap from ._base import ( diff --git a/sklearn/datasets/_arff_parser.py b/sklearn/datasets/_arff_parser.py index 86dfeb37a6ef5..fb6e629a73c8d 100644 --- a/sklearn/datasets/_arff_parser.py +++ b/sklearn/datasets/_arff_parser.py @@ -1,5 +1,8 @@ """Implementation of ARFF parsers: via LIAC-ARFF and pandas.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import itertools import re from collections import OrderedDict diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py index 597fb9c9dece3..ab4db0522ef20 100644 --- a/sklearn/datasets/_kddcup99.py +++ b/sklearn/datasets/_kddcup99.py @@ -8,6 +8,9 @@ """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import errno import logging import os diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py index a423928ffff40..e270676272b03 100644 --- a/sklearn/datasets/_openml.py +++ b/sklearn/datasets/_openml.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import gzip import hashlib import json diff --git a/sklearn/datasets/data/__init__.py b/sklearn/datasets/data/__init__.py index e69de29bb2d1d..67dd18fb94b59 100644 --- a/sklearn/datasets/data/__init__.py +++ b/sklearn/datasets/data/__init__.py @@ -0,0 +1,2 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/datasets/descr/__init__.py b/sklearn/datasets/descr/__init__.py index e69de29bb2d1d..67dd18fb94b59 100644 --- a/sklearn/datasets/descr/__init__.py +++ b/sklearn/datasets/descr/__init__.py @@ -0,0 +1,2 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/datasets/images/__init__.py b/sklearn/datasets/images/__init__.py index e69de29bb2d1d..67dd18fb94b59 100644 --- a/sklearn/datasets/images/__init__.py +++ b/sklearn/datasets/images/__init__.py @@ -0,0 +1,2 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py index 4730bda31ebbd..cd013fe9c7a93 100644 --- a/sklearn/decomposition/__init__.py +++ b/sklearn/decomposition/__init__.py @@ -4,6 +4,9 @@ regarded as dimensionality reduction techniques. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ..utils.extmath import randomized_svd from ._dict_learning import ( DictionaryLearning, diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py index 4f91483a468a9..9b6f32f2e1926 100644 --- a/sklearn/decomposition/_lda.py +++ b/sklearn/decomposition/_lda.py @@ -8,6 +8,9 @@ Link: https://github.com/blei-lab/onlineldavb """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Chyi-Kwei Yau # Author: Matthew D. Hoffman (original onlineldavb implementation) from numbers import Integral, Real diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py index d978191f104f7..b50ed239c6b19 100644 --- a/sklearn/decomposition/_truncated_svd.py +++ b/sklearn/decomposition/_truncated_svd.py @@ -1,5 +1,8 @@ """Truncated SVD for sparse matrices, aka latent semantic analysis (LSA).""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Lars Buitinck # Olivier Grisel # Michael Becker diff --git a/sklearn/ensemble/__init__.py b/sklearn/ensemble/__init__.py index e49d744ed6391..2a8cf413be9da 100644 --- a/sklearn/ensemble/__init__.py +++ b/sklearn/ensemble/__init__.py @@ -1,5 +1,8 @@ """Ensemble-based methods for classification, regression and anomaly detection.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._bagging import BaggingClassifier, BaggingRegressor from ._base import BaseEnsemble from ._forest import ( diff --git a/sklearn/ensemble/_hist_gradient_boosting/__init__.py b/sklearn/ensemble/_hist_gradient_boosting/__init__.py index 879fae1189f87..5939d83c84838 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/__init__.py +++ b/sklearn/ensemble/_hist_gradient_boosting/__init__.py @@ -3,3 +3,6 @@ The implementation is a port from pygbm which is itself strongly inspired from LightGBM. """ + +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py index d23f6e7b00a82..f5bfa7d531094 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py @@ -6,6 +6,9 @@ approximately the same number of samples. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Nicolas Hug import numpy as np diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 78f8456e969de..043321de569a8 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -1,5 +1,8 @@ """Fast Gradient Boosting decision trees for classification and regression.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Nicolas Hug import itertools diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py index 419e2f26c2653..98de6cd428e67 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py @@ -5,6 +5,9 @@ the gradients and hessians of the training data. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Nicolas Hug import numbers diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py index 799c25aadcec3..7ab9542ef6e0f 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py @@ -2,6 +2,9 @@ This module contains the TreePredictor class which is used for prediction. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Nicolas Hug import numpy as np diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.py b/sklearn/ensemble/_hist_gradient_boosting/utils.py index 1ff17217164c8..429fbed611c22 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/utils.py +++ b/sklearn/ensemble/_hist_gradient_boosting/utils.py @@ -1,5 +1,8 @@ """This module contains utility routines.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ...base import is_classifier from .binning import _BinMapper diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py index 7f6b26163cc87..caba4e174817a 100644 --- a/sklearn/exceptions.py +++ b/sklearn/exceptions.py @@ -1,5 +1,8 @@ """Custom warnings and errors used across scikit-learn.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + __all__ = [ "NotFittedError", "ConvergenceWarning", diff --git a/sklearn/experimental/__init__.py b/sklearn/experimental/__init__.py index 205a11a3d524b..593d247e5bc40 100644 --- a/sklearn/experimental/__init__.py +++ b/sklearn/experimental/__init__.py @@ -5,3 +5,6 @@ The features and estimators that are experimental aren't subject to deprecation cycles. Use them at your own risks! """ + +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/experimental/enable_halving_search_cv.py b/sklearn/experimental/enable_halving_search_cv.py index dd399ef35b6f7..85f93b26459d0 100644 --- a/sklearn/experimental/enable_halving_search_cv.py +++ b/sklearn/experimental/enable_halving_search_cv.py @@ -19,6 +19,9 @@ flake8 to ignore the import, which appears as unused. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from .. import model_selection from ..model_selection._search_successive_halving import ( HalvingGridSearchCV, diff --git a/sklearn/experimental/enable_hist_gradient_boosting.py b/sklearn/experimental/enable_hist_gradient_boosting.py index 6fa4512ce39c6..9269b2d0b6d6c 100644 --- a/sklearn/experimental/enable_hist_gradient_boosting.py +++ b/sklearn/experimental/enable_hist_gradient_boosting.py @@ -7,6 +7,9 @@ normally from `sklearn.ensemble`. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Don't remove this file, we don't want to break users code just because the # feature isn't experimental anymore. diff --git a/sklearn/experimental/enable_iterative_imputer.py b/sklearn/experimental/enable_iterative_imputer.py index 0b906961ca184..544e0d60eea28 100644 --- a/sklearn/experimental/enable_iterative_imputer.py +++ b/sklearn/experimental/enable_iterative_imputer.py @@ -12,6 +12,9 @@ >>> from sklearn.impute import IterativeImputer """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from .. import impute from ..impute._iterative import IterativeImputer diff --git a/sklearn/feature_extraction/__init__.py b/sklearn/feature_extraction/__init__.py index dced9d46b4cc7..621c8683f441e 100644 --- a/sklearn/feature_extraction/__init__.py +++ b/sklearn/feature_extraction/__init__.py @@ -1,5 +1,8 @@ """Feature extraction from raw data.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from . import text from ._dict_vectorizer import DictVectorizer from ._hash import FeatureHasher diff --git a/sklearn/feature_extraction/_stop_words.py b/sklearn/feature_extraction/_stop_words.py index 37ae02a0f36c5..ac5c9f495ae84 100644 --- a/sklearn/feature_extraction/_stop_words.py +++ b/sklearn/feature_extraction/_stop_words.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause # This list of English stop words is taken from the "Glasgow Information # Retrieval Group". The original list can be found at # http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py index d431a5b016807..fbb8f54350630 100644 --- a/sklearn/feature_selection/__init__.py +++ b/sklearn/feature_selection/__init__.py @@ -4,6 +4,9 @@ algorithm. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._base import SelectorMixin from ._from_model import SelectFromModel from ._mutual_info import mutual_info_classif, mutual_info_regression diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py index f3808068f46a5..42afdfb10dc3a 100644 --- a/sklearn/feature_selection/_mutual_info.py +++ b/sklearn/feature_selection/_mutual_info.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause # Author: Nikolay Mayorov # License: 3-clause BSD diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py index 9c393724f9cea..471f9a373a3da 100644 --- a/sklearn/feature_selection/_sequential.py +++ b/sklearn/feature_selection/_sequential.py @@ -2,6 +2,9 @@ Sequential feature selection """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from numbers import Integral, Real import numpy as np diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index f97c75db1e34b..a9b360037590f 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause # Author: Lars Buitinck # License: 3-clause BSD from numbers import Real diff --git a/sklearn/impute/__init__.py b/sklearn/impute/__init__.py index a40c6b432d6d5..2f9ed9017c6cb 100644 --- a/sklearn/impute/__init__.py +++ b/sklearn/impute/__init__.py @@ -1,5 +1,8 @@ """Transformers for missing value imputation.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import typing from ._base import MissingIndicator, SimpleImputer diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py index 41f903061c34d..22cede929ebb3 100644 --- a/sklearn/impute/_iterative.py +++ b/sklearn/impute/_iterative.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import warnings from collections import namedtuple from numbers import Integral, Real diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index 312a19d2c9bce..8bb2b5dc575e9 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -1,5 +1,8 @@ """Tools for model inspection.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._partial_dependence import partial_dependence from ._permutation_importance import permutation_importance from ._plot.decision_boundary import DecisionBoundaryDisplay diff --git a/sklearn/inspection/_pd_utils.py b/sklearn/inspection/_pd_utils.py index 76f4d626fd53c..4d890212e2838 100644 --- a/sklearn/inspection/_pd_utils.py +++ b/sklearn/inspection/_pd_utils.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause def _check_feature_names(X, feature_names=None): """Check feature names. diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py index 659db143153cc..8ecd7237b077d 100644 --- a/sklearn/inspection/_permutation_importance.py +++ b/sklearn/inspection/_permutation_importance.py @@ -1,5 +1,8 @@ """Permutation importance for estimators.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numbers import numpy as np diff --git a/sklearn/inspection/_plot/__init__.py b/sklearn/inspection/_plot/__init__.py index e69de29bb2d1d..67dd18fb94b59 100644 --- a/sklearn/inspection/_plot/__init__.py +++ b/sklearn/inspection/_plot/__init__.py @@ -0,0 +1,2 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index 92e1a2527400e..b87316f670cc9 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numpy as np from ...base import is_regressor diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py index 3d516d727192e..8a5118df6862b 100644 --- a/sklearn/inspection/_plot/partial_dependence.py +++ b/sklearn/inspection/_plot/partial_dependence.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numbers from itertools import chain from math import ceil diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py index a2b0ffba0e728..182cd94b0260d 100644 --- a/sklearn/linear_model/__init__.py +++ b/sklearn/linear_model/__init__.py @@ -1,5 +1,8 @@ """A variety of linear models.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # See http://scikit-learn.sourceforge.net/modules/sgd.html and # http://scikit-learn.sourceforge.net/modules/linear_model.html for # complete documentation. diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py index e8c1466b30623..cfac0a2739115 100644 --- a/sklearn/linear_model/_linear_loss.py +++ b/sklearn/linear_model/_linear_loss.py @@ -2,6 +2,9 @@ Loss functions for linear models with raw_prediction = X @ coef """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numpy as np from scipy import sparse diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 23daa86f2146d..6362a7fa45395 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -2,6 +2,9 @@ Logistic Regression """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Gael Varoquaux # Fabian Pedregosa # Alexandre Gramfort diff --git a/sklearn/manifold/__init__.py b/sklearn/manifold/__init__.py index b3f53e84b04fb..2266b6e08af88 100644 --- a/sklearn/manifold/__init__.py +++ b/sklearn/manifold/__init__.py @@ -1,5 +1,8 @@ """Data embedding techniques.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._isomap import Isomap from ._locally_linear import LocallyLinearEmbedding, locally_linear_embedding from ._mds import MDS, smacof diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py index f57ce6c4c8d2c..db8ae2ff40444 100644 --- a/sklearn/manifold/_mds.py +++ b/sklearn/manifold/_mds.py @@ -2,6 +2,9 @@ Multi-dimensional Scaling (MDS). """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # author: Nelle Varoquaux # SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 81a67d9793bb0..787df39a21979 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -1,5 +1,8 @@ """Score functions, performance metrics, pairwise metrics and distance computations.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from . import cluster from ._classification import ( accuracy_score, diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py index 73d291995c31b..6c8deb34dc448 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py +++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause # # Pairwise Distances Reductions # ============================= diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index 956de3577bcee..83bb8cd2e0d5c 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from abc import abstractmethod from typing import List diff --git a/sklearn/metrics/_plot/__init__.py b/sklearn/metrics/_plot/__init__.py index e69de29bb2d1d..67dd18fb94b59 100644 --- a/sklearn/metrics/_plot/__init__.py +++ b/sklearn/metrics/_plot/__init__.py @@ -0,0 +1,2 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause diff --git a/sklearn/metrics/_plot/confusion_matrix.py b/sklearn/metrics/_plot/confusion_matrix.py index 01783367649f5..0d5f5d84719ad 100644 --- a/sklearn/metrics/_plot/confusion_matrix.py +++ b/sklearn/metrics/_plot/confusion_matrix.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from itertools import product import numpy as np diff --git a/sklearn/metrics/_plot/det_curve.py b/sklearn/metrics/_plot/det_curve.py index e7336b10f5bb6..712a87237c35a 100644 --- a/sklearn/metrics/_plot/det_curve.py +++ b/sklearn/metrics/_plot/det_curve.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import scipy as sp from ...utils._plotting import _BinaryClassifierCurveDisplayMixin diff --git a/sklearn/metrics/_plot/precision_recall_curve.py b/sklearn/metrics/_plot/precision_recall_curve.py index 852dbf3981b2c..869c8bc95e675 100644 --- a/sklearn/metrics/_plot/precision_recall_curve.py +++ b/sklearn/metrics/_plot/precision_recall_curve.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from collections import Counter from ...utils._plotting import _BinaryClassifierCurveDisplayMixin diff --git a/sklearn/metrics/_plot/regression.py b/sklearn/metrics/_plot/regression.py index 1a3dfa0127931..b079dc02524b3 100644 --- a/sklearn/metrics/_plot/regression.py +++ b/sklearn/metrics/_plot/regression.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numbers import numpy as np diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py index 292fb6e2e2f69..cb8718705e831 100644 --- a/sklearn/metrics/_plot/roc_curve.py +++ b/sklearn/metrics/_plot/roc_curve.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from ...utils._plotting import _BinaryClassifierCurveDisplayMixin from .._ranking import auc, roc_curve diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index c1a916aa0b5f3..6a67e7392cb1a 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -13,6 +13,9 @@ ground truth labeling (or ``None`` in the case of unsupervised models). """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Authors: Andreas Mueller # Lars Buitinck # Arnaud Joly diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py index 776b2f2dbd063..47c7ae161edf2 100644 --- a/sklearn/metrics/cluster/__init__.py +++ b/sklearn/metrics/cluster/__init__.py @@ -5,6 +5,9 @@ model itself. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._bicluster import consensus_score from ._supervised import ( adjusted_mutual_info_score, diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py index 713d0bee8fa2e..c58f0e6047ad1 100644 --- a/sklearn/metrics/cluster/_bicluster.py +++ b/sklearn/metrics/cluster/_bicluster.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numpy as np from scipy.optimize import linear_sum_assignment diff --git a/sklearn/mixture/__init__.py b/sklearn/mixture/__init__.py index 9eb449226a9cb..6832f110e4cc6 100644 --- a/sklearn/mixture/__init__.py +++ b/sklearn/mixture/__init__.py @@ -1,5 +1,8 @@ """Mixture modeling algorithms.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._bayesian_mixture import BayesianGaussianMixture from ._gaussian_mixture import GaussianMixture diff --git a/sklearn/model_selection/__init__.py b/sklearn/model_selection/__init__.py index 8a50d032dc164..55b548ce45814 100644 --- a/sklearn/model_selection/__init__.py +++ b/sklearn/model_selection/__init__.py @@ -1,5 +1,8 @@ """Tools for model selection, such as cross validation and hyper-parameter tuning.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import typing from ._classification_threshold import ( diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py index 1d221d3388434..d29ad5d28c322 100644 --- a/sklearn/model_selection/_classification_threshold.py +++ b/sklearn/model_selection/_classification_threshold.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from collections.abc import MutableMapping from numbers import Integral, Real diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py index 08518cf2482d4..0565675e87ee7 100644 --- a/sklearn/model_selection/_plot.py +++ b/sklearn/model_selection/_plot.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numpy as np from ..utils._optional_dependencies import check_matplotlib_support diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index b1cf5ee50965c..373dbfac22be5 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from abc import abstractmethod from copy import deepcopy from math import ceil, floor, log diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py index e2c1a50632564..02c4a28b9a6c4 100644 --- a/sklearn/neighbors/__init__.py +++ b/sklearn/neighbors/__init__.py @@ -1,5 +1,8 @@ """The k-nearest neighbors algorithms.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._ball_tree import BallTree from ._base import VALID_METRICS, VALID_METRICS_SPARSE, sort_graph_by_row_values from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py index a9e5fe011150a..0e4b45cc77e36 100644 --- a/sklearn/neighbors/_kde.py +++ b/sklearn/neighbors/_kde.py @@ -3,6 +3,9 @@ ------------------------- """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Author: Jake Vanderplas import itertools from numbers import Integral, Real diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py index 4185bbe15826b..8888fe18483c6 100644 --- a/sklearn/neighbors/_unsupervised.py +++ b/sklearn/neighbors/_unsupervised.py @@ -1,5 +1,8 @@ """Unsupervised nearest neighbors learner""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ..base import _fit_context from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin diff --git a/sklearn/neural_network/__init__.py b/sklearn/neural_network/__init__.py index 7bf5f1241de69..fa580a8b40642 100644 --- a/sklearn/neural_network/__init__.py +++ b/sklearn/neural_network/__init__.py @@ -1,5 +1,8 @@ """Models based on neural networks.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # SPDX-License-Identifier: BSD-3-Clause from ._multilayer_perceptron import MLPClassifier, MLPRegressor diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py index 9e49379c9122a..d5ea1fe15f036 100644 --- a/sklearn/preprocessing/__init__.py +++ b/sklearn/preprocessing/__init__.py @@ -1,5 +1,8 @@ """Methods for scaling, centering, normalization, binarization, and more.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._data import ( Binarizer, KernelCenterer, diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index c49684d0ebfbc..3f5d7ba4ad205 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import warnings import numpy as np diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py index f4c9fb032cfb0..2175e93e38694 100644 --- a/sklearn/preprocessing/_polynomial.py +++ b/sklearn/preprocessing/_polynomial.py @@ -2,6 +2,9 @@ This file contains preprocessing tools based on polynomials. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import collections from itertools import chain, combinations from itertools import combinations_with_replacement as combinations_w_r diff --git a/sklearn/preprocessing/_target_encoder.py b/sklearn/preprocessing/_target_encoder.py index b3b7c3d5e7bd9..779618026def1 100644 --- a/sklearn/preprocessing/_target_encoder.py +++ b/sklearn/preprocessing/_target_encoder.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from numbers import Integral, Real import numpy as np diff --git a/sklearn/semi_supervised/__init__.py b/sklearn/semi_supervised/__init__.py index 055c3f431b2c1..fba2488a753df 100644 --- a/sklearn/semi_supervised/__init__.py +++ b/sklearn/semi_supervised/__init__.py @@ -4,6 +4,9 @@ data for classification tasks. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._label_propagation import LabelPropagation, LabelSpreading from ._self_training import SelfTrainingClassifier diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py index 47d4027c50754..22adea71cd27c 100644 --- a/sklearn/svm/_base.py +++ b/sklearn/svm/_base.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import warnings from abc import ABCMeta, abstractmethod from numbers import Integral, Real diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index 5b547fcb98cd6..8ecdca6f47b56 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from numbers import Integral, Real import numpy as np diff --git a/sklearn/tree/__init__.py b/sklearn/tree/__init__.py index 23ab17aa0bbbe..c961a811fe05c 100644 --- a/sklearn/tree/__init__.py +++ b/sklearn/tree/__init__.py @@ -1,5 +1,8 @@ """Decision tree based models for classification and regression.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ._classes import ( BaseDecisionTree, DecisionTreeClassifier, diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 011347cb2d443..cb06d90572c7e 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1,5 +1,8 @@ """Various utilities to help with development.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import platform import warnings from collections.abc import Sequence diff --git a/sklearn/utils/_arpack.py b/sklearn/utils/_arpack.py index 3465ac98c2e81..3087a0f4dbf0a 100644 --- a/sklearn/utils/_arpack.py +++ b/sklearn/utils/_arpack.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from .validation import check_random_state diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index c222e26fcc82c..49da1a3b3d5db 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -1,5 +1,8 @@ """Tools to support array_api.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import itertools import math from functools import wraps diff --git a/sklearn/utils/_available_if.py b/sklearn/utils/_available_if.py index 2d9598df9de7e..9230a35ec9ab0 100644 --- a/sklearn/utils/_available_if.py +++ b/sklearn/utils/_available_if.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from functools import update_wrapper, wraps from types import MethodType diff --git a/sklearn/utils/_bunch.py b/sklearn/utils/_bunch.py index d90aeb7d93c74..4d474e88eb80e 100644 --- a/sklearn/utils/_bunch.py +++ b/sklearn/utils/_bunch.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import warnings diff --git a/sklearn/utils/_chunking.py b/sklearn/utils/_chunking.py index 7bf53d0626c85..e2462fa7f7eda 100644 --- a/sklearn/utils/_chunking.py +++ b/sklearn/utils/_chunking.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import warnings from itertools import islice from numbers import Integral diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py index 3fd4d45f522e6..897980e43ce11 100644 --- a/sklearn/utils/_encode.py +++ b/sklearn/utils/_encode.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from collections import Counter from contextlib import suppress from typing import NamedTuple diff --git a/sklearn/utils/_estimator_html_repr.py b/sklearn/utils/_estimator_html_repr.py index 5e465234f516b..1b1eab6054e30 100644 --- a/sklearn/utils/_estimator_html_repr.py +++ b/sklearn/utils/_estimator_html_repr.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import html import itertools from contextlib import closing diff --git a/sklearn/utils/_indexing.py b/sklearn/utils/_indexing.py index ca2327f2bb109..8923d568cbf33 100644 --- a/sklearn/utils/_indexing.py +++ b/sklearn/utils/_indexing.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numbers import sys import warnings diff --git a/sklearn/utils/_joblib.py b/sklearn/utils/_joblib.py index 7638a30e7b5fa..837dd5102e918 100644 --- a/sklearn/utils/_joblib.py +++ b/sklearn/utils/_joblib.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause # TODO(1.7): remove this file import warnings as _warnings diff --git a/sklearn/utils/_mask.py b/sklearn/utils/_mask.py index 0a66dc5a20a81..95fefaca6e67c 100644 --- a/sklearn/utils/_mask.py +++ b/sklearn/utils/_mask.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause from contextlib import suppress import numpy as np diff --git a/sklearn/utils/_missing.py b/sklearn/utils/_missing.py index b48381cfcf3bb..f2e024df887be 100644 --- a/sklearn/utils/_missing.py +++ b/sklearn/utils/_missing.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import math import numbers from contextlib import suppress diff --git a/sklearn/utils/_mocking.py b/sklearn/utils/_mocking.py index 0afed8c08cfaa..f4606d2a98c04 100644 --- a/sklearn/utils/_mocking.py +++ b/sklearn/utils/_mocking.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numpy as np from ..base import BaseEstimator, ClassifierMixin diff --git a/sklearn/utils/_optional_dependencies.py b/sklearn/utils/_optional_dependencies.py index 14ffeb1d5b6ee..b2e970502c123 100644 --- a/sklearn/utils/_optional_dependencies.py +++ b/sklearn/utils/_optional_dependencies.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause def check_matplotlib_support(caller_name): """Raise ImportError with detailed error message if mpl is not installed. diff --git a/sklearn/utils/_param_validation.py b/sklearn/utils/_param_validation.py index 56b7d0ee1fe4c..e1de052bd01d3 100644 --- a/sklearn/utils/_param_validation.py +++ b/sklearn/utils/_param_validation.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import functools import math import operator diff --git a/sklearn/utils/_plotting.py b/sklearn/utils/_plotting.py index 2db38baa9abfa..ab3e21bafa134 100644 --- a/sklearn/utils/_plotting.py +++ b/sklearn/utils/_plotting.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numpy as np from . import check_consistent_length diff --git a/sklearn/utils/_pprint.py b/sklearn/utils/_pprint.py index 9b33cd617a5fc..00d8391b3af64 100644 --- a/sklearn/utils/_pprint.py +++ b/sklearn/utils/_pprint.py @@ -1,6 +1,9 @@ """This module contains the _EstimatorPrettyPrinter class used in BaseEstimator.__repr__ for pretty-printing estimators""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 Python Software Foundation; # All Rights Reserved diff --git a/sklearn/utils/_response.py b/sklearn/utils/_response.py index 0381c872a94b0..86c430dbd23f2 100644 --- a/sklearn/utils/_response.py +++ b/sklearn/utils/_response.py @@ -3,6 +3,9 @@ It allows to make uniform checks and validation. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import numpy as np from ..base import is_classifier diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py index 42757dbb00fae..510be7469b96f 100644 --- a/sklearn/utils/_set_output.py +++ b/sklearn/utils/_set_output.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import importlib from functools import wraps from typing import Protocol, runtime_checkable diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py index a947e35838504..43d2c15dbd7a5 100644 --- a/sklearn/utils/_show_versions.py +++ b/sklearn/utils/_show_versions.py @@ -4,6 +4,9 @@ adapted from :func:`pandas.show_versions` """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # SPDX-License-Identifier: BSD-3-Clause import platform diff --git a/sklearn/utils/_tags.py b/sklearn/utils/_tags.py index c8f6ffb651a0d..af2ca444b5d33 100644 --- a/sklearn/utils/_tags.py +++ b/sklearn/utils/_tags.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numpy as np _DEFAULT_TAGS = { diff --git a/sklearn/utils/_user_interface.py b/sklearn/utils/_user_interface.py index 09e6f2b7bf849..f48b934dc7c8b 100644 --- a/sklearn/utils/_user_interface.py +++ b/sklearn/utils/_user_interface.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import timeit from contextlib import contextmanager diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py index a3225597701c7..e6ad56264e1c3 100644 --- a/sklearn/utils/deprecation.py +++ b/sklearn/utils/deprecation.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import functools import warnings diff --git a/sklearn/utils/discovery.py b/sklearn/utils/discovery.py index 7a6c73997ef8c..40d5b5f8cf714 100644 --- a/sklearn/utils/discovery.py +++ b/sklearn/utils/discovery.py @@ -1,5 +1,8 @@ """Utilities to discover scikit-learn objects.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import inspect import pkgutil from importlib import import_module diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 5ba1540094588..5264ec0a7e913 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1,5 +1,8 @@ """Various utilities to check the compatibility of estimators with scikit-learn API.""" +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import pickle import re import warnings diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py index ac91110651d94..ba2ddee605440 100644 --- a/sklearn/utils/optimize.py +++ b/sklearn/utils/optimize.py @@ -9,6 +9,9 @@ significant speedups. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + # This is a modified file from scipy.optimize # Original authors: Travis Oliphant, Eric Jones # Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour diff --git a/sklearn/utils/parallel.py b/sklearn/utils/parallel.py index 7d7349ebe2816..da7ad69ffc3bf 100644 --- a/sklearn/utils/parallel.py +++ b/sklearn/utils/parallel.py @@ -2,6 +2,9 @@ usage. """ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import functools import warnings from functools import update_wrapper diff --git a/sklearn/utils/stats.py b/sklearn/utils/stats.py index d0e22ea3694f4..b931cb6b6f90d 100644 --- a/sklearn/utils/stats.py +++ b/sklearn/utils/stats.py @@ -1,3 +1,5 @@ +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numpy as np from .extmath import stable_cumsum From b2d1fc274a694014927bd1deac3a69d4d9687c6c Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 18 Jun 2024 21:12:12 +0200 Subject: [PATCH 03/17] minor fixes --- examples/applications/plot_time_series_lagged_features.py | 2 +- examples/applications/plot_topics_extraction_with_nmf_lda.py | 2 +- examples/model_selection/plot_likelihood_ratios.py | 2 +- examples/model_selection/plot_roc.py | 4 ++-- pyproject.toml | 2 ++ setup.py | 2 +- sklearn/linear_model/_glm/_newton_solver.py | 2 +- sklearn/mixture/tests/test_bayesian_mixture.py | 2 +- sklearn/model_selection/_validation.py | 4 ++-- 9 files changed, 12 insertions(+), 10 deletions(-) diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py index 53b29fa940958..95d339aa0a1c1 100644 --- a/examples/applications/plot_time_series_lagged_features.py +++ b/examples/applications/plot_time_series_lagged_features.py @@ -255,7 +255,7 @@ def consolidate_scores(cv_results, scores, metric): time = cv_results["fit_time"] scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s") - scores["loss"].append(f"quantile {int(quantile*100)}") + scores["loss"].append(f"quantile {int(quantile * 100)}") for key, value in cv_results.items(): if key.startswith("test_"): metric = key.split("test_")[1] diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py index faeef5ae15a11..a6f774d01e2de 100644 --- a/examples/applications/plot_topics_extraction_with_nmf_lda.py +++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py @@ -50,7 +50,7 @@ def plot_top_words(model, feature_names, n_top_words, title): ax = axes[topic_idx] ax.barh(top_features, weights, height=0.7) - ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 30}) + ax.set_title(f"Topic {topic_idx + 1}", fontdict={"fontsize": 30}) ax.tick_params(axis="both", which="major", labelsize=20) for i in "top right left".split(): ax.spines[i].set_visible(False) diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py index 2fc3ad3d040f5..260bab2c90bf0 100644 --- a/examples/model_selection/plot_likelihood_ratios.py +++ b/examples/model_selection/plot_likelihood_ratios.py @@ -42,7 +42,7 @@ class proportion than the target application. from sklearn.datasets import make_classification X, y = make_classification(n_samples=10_000, weights=[0.9, 0.1], random_state=0) -print(f"Percentage of people carrying the disease: {100*y.mean():.2f}%") +print(f"Percentage of people carrying the disease: {100 * y.mean():.2f}%") # %% # A machine learning model is built to diagnose if a person with some given diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py index 1b2a9760342a3..fbc3463a112e5 100644 --- a/examples/model_selection/plot_roc.py +++ b/examples/model_selection/plot_roc.py @@ -151,9 +151,9 @@ # # We can briefly demo the effect of :func:`numpy.ravel`: -print(f"y_score:\n{y_score[0:2,:]}") +print(f"y_score:\n{y_score[0:2, :]}") print() -print(f"y_score.ravel():\n{y_score[0:2,:].ravel()}") +print(f"y_score.ravel():\n{y_score[0:2, :].ravel()}") # %% # In a multi-class classification setup with highly imbalanced classes, diff --git a/pyproject.toml b/pyproject.toml index 4f2a33a12b38f..c96e3cfb51ccc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,6 +151,8 @@ ignore=[ "E731", # do not use variables named 'l', 'O', or 'I' "E741", + # ignore assigned but unused variables + "F841", ] [tool.ruff.lint.flake8-copyright] diff --git a/setup.py b/setup.py index 97e7a67458180..4b7c298998a82 100755 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -#! /usr/bin/env python +# ! /usr/bin/env python # # Authors: The scikit-learn developers # License: 3-clause BSD diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py index b2be604d931c5..870f90c9b6255 100644 --- a/sklearn/linear_model/_glm/_newton_solver.py +++ b/sklearn/linear_model/_glm/_newton_solver.py @@ -253,7 +253,7 @@ def line_search(self, X, y, sample_weight): check = loss_improvement <= t * armijo_term if is_verbose: print( - f" line search iteration={i+1}, step size={t}\n" + f" line search iteration={i + 1}, step size={t}\n" f" check loss improvement <= armijo term: {loss_improvement} " f"<= {t * armijo_term} {check}" ) diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index 925cd2fdb615b..b9123c3a37c26 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -118,7 +118,7 @@ def test_bayesian_mixture_precisions_prior_initialisation(): ) msg = ( "The parameter 'degrees_of_freedom_prior' should be greater than" - f" {n_features -1}, but got {bad_degrees_of_freedom_prior_:.3f}." + f" {n_features - 1}, but got {bad_degrees_of_freedom_prior_:.3f}." ) with pytest.raises(ValueError, match=msg): bgmm.fit(X) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 92c93193010ff..4a3e352d6c705 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -844,9 +844,9 @@ def _fit_and_score( progress_msg = "" if verbose > 2: if split_progress is not None: - progress_msg = f" {split_progress[0]+1}/{split_progress[1]}" + progress_msg = f" {split_progress[0] + 1}/{split_progress[1]}" if candidate_progress and verbose > 9: - progress_msg += f"; {candidate_progress[0]+1}/{candidate_progress[1]}" + progress_msg += f"; {candidate_progress[0] + 1}/{candidate_progress[1]}" if verbose > 1: if parameters is None: From 6c1d14478e84650d639337bdf88d47aee5814078 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 18 Jun 2024 21:42:16 +0200 Subject: [PATCH 04/17] CLN fix the remaining ruff issues --- .../plot_tweedie_regression_insurance_claims.py | 2 +- pyproject.toml | 1 + sklearn/cluster/_optics.py | 2 +- sklearn/cluster/tests/test_dbscan.py | 2 +- sklearn/linear_model/tests/test_ridge.py | 4 ++-- sklearn/metrics/pairwise.py | 4 +++- sklearn/model_selection/_search.py | 4 ++-- sklearn/model_selection/_split.py | 2 +- sklearn/model_selection/tests/test_validation.py | 8 ++++---- sklearn/utils/estimator_checks.py | 2 +- sklearn/utils/tests/test_validation.py | 2 +- sklearn/utils/validation.py | 2 +- 12 files changed, 19 insertions(+), 16 deletions(-) diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py index 31a91fb37c766..2c3a1e6f4cea6 100644 --- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py +++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py @@ -79,7 +79,7 @@ def load_mtpl2(n_samples=None): df["ClaimAmount"] = df["ClaimAmount"].fillna(0) # unquote string fields - for column_name in df.columns[df.dtypes.values == object]: + for column_name in df.columns[df.dtypes.values is object]: df[column_name] = df[column_name].str.strip("'") return df.iloc[:n_samples] diff --git a/pyproject.toml b/pyproject.toml index c96e3cfb51ccc..2c551d4c72984 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,6 +152,7 @@ ignore=[ # do not use variables named 'l', 'O', or 'I' "E741", # ignore assigned but unused variables + # this comes up with preview=true "F841", ] diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index 46f795e94ffb2..347c33869aaf4 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -327,7 +327,7 @@ def fit(self, X, y=None): Returns a fitted instance of self. """ dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float - if dtype == bool and X.dtype != bool: + if dtype is bool and X.dtype is not bool: msg = ( "Data will be converted to boolean for" f" metric {self.metric}, to avoid this warning," diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index d42cc2b17d518..556f89312d2fc 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -291,7 +291,7 @@ def test_input_validation(): def test_pickle(): obj = DBSCAN() s = pickle.dumps(obj) - assert type(pickle.loads(s)) == obj.__class__ + assert type(pickle.loads(s)) is obj.__class__ def test_boundaries(): diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 167ce0bac4cba..9be28cac141b1 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1020,7 +1020,7 @@ def _test_ridge_cv(sparse_container): ridge_cv.predict(X) assert len(ridge_cv.coef_.shape) == 1 - assert type(ridge_cv.intercept_) == np.float64 + assert type(ridge_cv.intercept_) is np.float64 cv = KFold(5) ridge_cv.set_params(cv=cv) @@ -1028,7 +1028,7 @@ def _test_ridge_cv(sparse_container): ridge_cv.predict(X) assert len(ridge_cv.coef_.shape) == 1 - assert type(ridge_cv.intercept_) == np.float64 + assert type(ridge_cv.intercept_) is np.float64 @pytest.mark.parametrize( diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index ddf871d30816c..5234e0583cc1d 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -2361,7 +2361,9 @@ def pairwise_distances( dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else "infer_float" - if dtype == bool and (X.dtype != bool or (Y is not None and Y.dtype != bool)): + if dtype is bool and ( + X.dtype is not bool or (Y is not None and Y.dtype is not bool) + ): msg = "Data was converted to boolean for metric %s" % metric warnings.warn(msg, DataConversionWarning) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 110db2c39a4a2..f4c13d6b5dd26 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -1097,14 +1097,14 @@ def _store(key_name, array, weights=None, splits=False, rank=False): except (TypeError, ValueError): arr_dtype = np.dtype(object) else: - if any(np.min_scalar_type(x) == object for x in param_list): + if any(np.min_scalar_type(x) is object for x in param_list): # `np.result_type` might get thrown off by `.dtype` properties # (which some estimators have). # If finding the result dtype this way would give object, # then we use object. # https://github.com/scikit-learn/scikit-learn/issues/29157 arr_dtype = np.dtype(object) - if len(param_list) == n_candidates and arr_dtype != object: + if len(param_list) == n_candidates and arr_dtype is not object: # Exclude `object` else the numpy constructor might infer a list of # tuples to be a 2d array. results[key] = MaskedArray(param_list, mask=False, dtype=arr_dtype) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 9508e9237c9a2..1382ad063f43e 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -2896,7 +2896,7 @@ def _build_repr(self): value = getattr(self, key, None) if value is None and hasattr(self, "cvargs"): value = self.cvargs.get(key, None) - if len(w) and w[0].category == FutureWarning: + if len(w) and w[0].category is FutureWarning: # if the parameter is deprecated, don't show it continue finally: diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index d94d3f054bba2..0996eca367164 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -586,10 +586,10 @@ def custom_scorer(clf, X, y): ) # Make sure all the arrays are of np.ndarray type - assert type(cv_results["test_r2"]) == np.ndarray - assert type(cv_results["test_neg_mean_squared_error"]) == np.ndarray - assert type(cv_results["fit_time"]) == np.ndarray - assert type(cv_results["score_time"]) == np.ndarray + assert type(cv_results["test_r2"]) is np.ndarray + assert type(cv_results["test_neg_mean_squared_error"]) is np.ndarray + assert type(cv_results["fit_time"]) is np.ndarray + assert type(cv_results["score_time"]) is np.ndarray # Ensure all the times are within sane limits assert np.all(cv_results["fit_time"] >= 0) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 5264ec0a7e913..7fbbbfe7c15ec 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1509,7 +1509,7 @@ def _apply_on_subsets(func, X): result_by_batch = [func(batch.reshape(1, n_features)) for batch in X] # func can output tuple (e.g. score_samples) - if type(result_full) == tuple: + if type(result_full) is tuple: result_full = result_full[0] result_by_batch = list(map(lambda x: x[0], result_by_batch)) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 92fff950e875e..3ab70a0680db6 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -1340,7 +1340,7 @@ def test_check_scalar_invalid( include_boundaries=include_boundaries, ) assert str(raised_error.value) == str(err_msg) - assert type(raised_error.value) == type(err_msg) + assert type(raised_error.value) is type(err_msg) _psd_cases_valid = { diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index d632abb77280d..ec93678efbbe2 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -871,7 +871,7 @@ def is_sparse(dtype): ) if all(isinstance(dtype_iter, np.dtype) for dtype_iter in dtypes_orig): dtype_orig = np.result_type(*dtypes_orig) - elif pandas_requires_conversion and any(d == object for d in dtypes_orig): + elif pandas_requires_conversion and any(d is object for d in dtypes_orig): # Force object if any of the dtypes is an object dtype_orig = object From ede3462e14435f5117af65f60f6f1d6a63a1283e Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 12 Jul 2024 13:24:19 +0200 Subject: [PATCH 05/17] fix merge conflict --- .../plot_time_series_lagged_features.py | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py index 7000176aca980..83c34b76e05b2 100644 --- a/examples/applications/plot_time_series_lagged_features.py +++ b/examples/applications/plot_time_series_lagged_features.py @@ -22,27 +22,6 @@ # Analyzing the Bike Sharing Demand dataset # ----------------------------------------- # -# We start by loading the data from the OpenML repository -# as a pandas dataframe. This will be replaced with Polars -# once `fetch_openml` adds a native support for it. -# We convert to Polars for feature engineering, as it automatically caches -# common subexpressions which are reused in multiple expressions -# (like `pl.col("count").shift(1)` below). See -# https://docs.pola.rs/user-guide/lazy/optimizations/ for more information. - -import numpy as np -import polars as pl - -from sklearn.datasets import fetch_openml - -pl.Config.set_fmt_str_lengths(20) - -bike_sharing = fetch_openml( - "Bike_Sharing_Demand", version=2, as_frame=True, parser="pandas" -) -df = bike_sharing.frame -df = pl.DataFrame({col: df[col].to_numpy() for col in df.columns}) -== == == = # We start by loading the data from the OpenML repository as a raw parquet file # to illustrate how to work with an arbitrary parquet file instead of hiding this # step in a convenience tool such as `sklearn.datasets.fetch_openml`. @@ -287,11 +266,7 @@ def consolidate_scores(cv_results, scores, metric): time = cv_results["fit_time"] scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s") -<< << << < HEAD scores["loss"].append(f"quantile {int(quantile * 100)}") -== == == = - scores["loss"].append(f"quantile {int(quantile * 100)}") ->> >> >> > upstream / main for key, value in cv_results.items(): if key.startswith("test_"): metric = key.split("test_")[1] From 6afe10099a2c2f3e23669e97c17dec1fe786c6a4 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 12 Jul 2024 14:17:39 +0200 Subject: [PATCH 06/17] make preview explicit selection --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 2c551d4c72984..707926d193f3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -142,6 +142,8 @@ exclude=[ [tool.ruff.lint] # This enables us to use CPY001: copyright header check preview = true +# This enables us to use the explicit preview rules that we want only +explicit-preview-rules = true # all rules can be found here: https://beta.ruff.rs/docs/rules/ select = ["E", "F", "W", "I", "CPY001"] ignore=[ From 6a394a380bc2ca49934c7645a5e50f3df09ac1df Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 12 Jul 2024 14:20:21 +0200 Subject: [PATCH 07/17] revert some changes --- examples/applications/plot_time_series_lagged_features.py | 2 +- examples/applications/plot_topics_extraction_with_nmf_lda.py | 2 +- examples/model_selection/plot_likelihood_ratios.py | 2 +- examples/model_selection/plot_roc.py | 4 ++-- pyproject.toml | 3 --- sklearn/linear_model/_glm/_newton_solver.py | 2 +- sklearn/mixture/tests/test_bayesian_mixture.py | 2 +- sklearn/model_selection/_validation.py | 4 ++-- 8 files changed, 9 insertions(+), 12 deletions(-) diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py index 83c34b76e05b2..539ff24a6c1da 100644 --- a/examples/applications/plot_time_series_lagged_features.py +++ b/examples/applications/plot_time_series_lagged_features.py @@ -266,7 +266,7 @@ def consolidate_scores(cv_results, scores, metric): time = cv_results["fit_time"] scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s") - scores["loss"].append(f"quantile {int(quantile * 100)}") + scores["loss"].append(f"quantile {int(quantile*100)}") for key, value in cv_results.items(): if key.startswith("test_"): metric = key.split("test_")[1] diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py index a6f774d01e2de..faeef5ae15a11 100644 --- a/examples/applications/plot_topics_extraction_with_nmf_lda.py +++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py @@ -50,7 +50,7 @@ def plot_top_words(model, feature_names, n_top_words, title): ax = axes[topic_idx] ax.barh(top_features, weights, height=0.7) - ax.set_title(f"Topic {topic_idx + 1}", fontdict={"fontsize": 30}) + ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 30}) ax.tick_params(axis="both", which="major", labelsize=20) for i in "top right left".split(): ax.spines[i].set_visible(False) diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py index 260bab2c90bf0..2fc3ad3d040f5 100644 --- a/examples/model_selection/plot_likelihood_ratios.py +++ b/examples/model_selection/plot_likelihood_ratios.py @@ -42,7 +42,7 @@ class proportion than the target application. from sklearn.datasets import make_classification X, y = make_classification(n_samples=10_000, weights=[0.9, 0.1], random_state=0) -print(f"Percentage of people carrying the disease: {100 * y.mean():.2f}%") +print(f"Percentage of people carrying the disease: {100*y.mean():.2f}%") # %% # A machine learning model is built to diagnose if a person with some given diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py index fbc3463a112e5..1b2a9760342a3 100644 --- a/examples/model_selection/plot_roc.py +++ b/examples/model_selection/plot_roc.py @@ -151,9 +151,9 @@ # # We can briefly demo the effect of :func:`numpy.ravel`: -print(f"y_score:\n{y_score[0:2, :]}") +print(f"y_score:\n{y_score[0:2,:]}") print() -print(f"y_score.ravel():\n{y_score[0:2, :].ravel()}") +print(f"y_score.ravel():\n{y_score[0:2,:].ravel()}") # %% # In a multi-class classification setup with highly imbalanced classes, diff --git a/pyproject.toml b/pyproject.toml index 707926d193f3a..1af573efb7527 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -153,9 +153,6 @@ ignore=[ "E731", # do not use variables named 'l', 'O', or 'I' "E741", - # ignore assigned but unused variables - # this comes up with preview=true - "F841", ] [tool.ruff.lint.flake8-copyright] diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py index 870f90c9b6255..b2be604d931c5 100644 --- a/sklearn/linear_model/_glm/_newton_solver.py +++ b/sklearn/linear_model/_glm/_newton_solver.py @@ -253,7 +253,7 @@ def line_search(self, X, y, sample_weight): check = loss_improvement <= t * armijo_term if is_verbose: print( - f" line search iteration={i + 1}, step size={t}\n" + f" line search iteration={i+1}, step size={t}\n" f" check loss improvement <= armijo term: {loss_improvement} " f"<= {t * armijo_term} {check}" ) diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index b9123c3a37c26..925cd2fdb615b 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -118,7 +118,7 @@ def test_bayesian_mixture_precisions_prior_initialisation(): ) msg = ( "The parameter 'degrees_of_freedom_prior' should be greater than" - f" {n_features - 1}, but got {bad_degrees_of_freedom_prior_:.3f}." + f" {n_features -1}, but got {bad_degrees_of_freedom_prior_:.3f}." ) with pytest.raises(ValueError, match=msg): bgmm.fit(X) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 7b6f73646e5ce..ddc9b542b0a5e 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -844,9 +844,9 @@ def _fit_and_score( progress_msg = "" if verbose > 2: if split_progress is not None: - progress_msg = f" {split_progress[0] + 1}/{split_progress[1]}" + progress_msg = f" {split_progress[0]+1}/{split_progress[1]}" if candidate_progress and verbose > 9: - progress_msg += f"; {candidate_progress[0] + 1}/{candidate_progress[1]}" + progress_msg += f"; {candidate_progress[0]+1}/{candidate_progress[1]}" if verbose > 1: if parameters is None: From 575e2e0d3990f5e842d7a6d626730f32e4f801c2 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 12 Jul 2024 14:22:37 +0200 Subject: [PATCH 08/17] revert some changes --- .../plot_tweedie_regression_insurance_claims.py | 2 +- sklearn/cluster/_optics.py | 2 +- sklearn/cluster/tests/test_dbscan.py | 2 +- sklearn/linear_model/tests/test_ridge.py | 4 ++-- sklearn/metrics/pairwise.py | 4 +--- sklearn/model_selection/_split.py | 2 +- sklearn/model_selection/tests/test_validation.py | 8 ++++---- sklearn/utils/estimator_checks.py | 2 +- sklearn/utils/tests/test_validation.py | 2 +- sklearn/utils/validation.py | 2 +- 10 files changed, 14 insertions(+), 16 deletions(-) diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py index 2c3a1e6f4cea6..31a91fb37c766 100644 --- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py +++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py @@ -79,7 +79,7 @@ def load_mtpl2(n_samples=None): df["ClaimAmount"] = df["ClaimAmount"].fillna(0) # unquote string fields - for column_name in df.columns[df.dtypes.values is object]: + for column_name in df.columns[df.dtypes.values == object]: df[column_name] = df[column_name].str.strip("'") return df.iloc[:n_samples] diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index 347c33869aaf4..46f795e94ffb2 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -327,7 +327,7 @@ def fit(self, X, y=None): Returns a fitted instance of self. """ dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float - if dtype is bool and X.dtype is not bool: + if dtype == bool and X.dtype != bool: msg = ( "Data will be converted to boolean for" f" metric {self.metric}, to avoid this warning," diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index 556f89312d2fc..d42cc2b17d518 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -291,7 +291,7 @@ def test_input_validation(): def test_pickle(): obj = DBSCAN() s = pickle.dumps(obj) - assert type(pickle.loads(s)) is obj.__class__ + assert type(pickle.loads(s)) == obj.__class__ def test_boundaries(): diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 9be28cac141b1..167ce0bac4cba 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1020,7 +1020,7 @@ def _test_ridge_cv(sparse_container): ridge_cv.predict(X) assert len(ridge_cv.coef_.shape) == 1 - assert type(ridge_cv.intercept_) is np.float64 + assert type(ridge_cv.intercept_) == np.float64 cv = KFold(5) ridge_cv.set_params(cv=cv) @@ -1028,7 +1028,7 @@ def _test_ridge_cv(sparse_container): ridge_cv.predict(X) assert len(ridge_cv.coef_.shape) == 1 - assert type(ridge_cv.intercept_) is np.float64 + assert type(ridge_cv.intercept_) == np.float64 @pytest.mark.parametrize( diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index aa000c850f047..9382d585a5fe7 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -2364,9 +2364,7 @@ def pairwise_distances( dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else "infer_float" - if dtype is bool and ( - X.dtype is not bool or (Y is not None and Y.dtype is not bool) - ): + if dtype == bool and (X.dtype != bool or (Y is not None and Y.dtype != bool)): msg = "Data was converted to boolean for metric %s" % metric warnings.warn(msg, DataConversionWarning) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index af35f903e4832..bfd741eee5811 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -2935,7 +2935,7 @@ def _build_repr(self): value = getattr(self, key, None) if value is None and hasattr(self, "cvargs"): value = self.cvargs.get(key, None) - if len(w) and w[0].category is FutureWarning: + if len(w) and w[0].category == FutureWarning: # if the parameter is deprecated, don't show it continue finally: diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 911a3bac2d672..33d4d366bf17a 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -586,10 +586,10 @@ def custom_scorer(clf, X, y): ) # Make sure all the arrays are of np.ndarray type - assert type(cv_results["test_r2"]) is np.ndarray - assert type(cv_results["test_neg_mean_squared_error"]) is np.ndarray - assert type(cv_results["fit_time"]) is np.ndarray - assert type(cv_results["score_time"]) is np.ndarray + assert type(cv_results["test_r2"]) == np.ndarray + assert type(cv_results["test_neg_mean_squared_error"]) == np.ndarray + assert type(cv_results["fit_time"]) == np.ndarray + assert type(cv_results["score_time"]) == np.ndarray # Ensure all the times are within sane limits assert np.all(cv_results["fit_time"] >= 0) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 62d9973f004d4..ae7a8737ff2bb 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1509,7 +1509,7 @@ def _apply_on_subsets(func, X): result_by_batch = [func(batch.reshape(1, n_features)) for batch in X] # func can output tuple (e.g. score_samples) - if type(result_full) is tuple: + if type(result_full) == tuple: result_full = result_full[0] result_by_batch = list(map(lambda x: x[0], result_by_batch)) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index c567cafbac624..5bde51ae514d9 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -1341,7 +1341,7 @@ def test_check_scalar_invalid( include_boundaries=include_boundaries, ) assert str(raised_error.value) == str(err_msg) - assert type(raised_error.value) is type(err_msg) + assert type(raised_error.value) == type(err_msg) _psd_cases_valid = { diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 612d93f1b21aa..af9fdb4a79cba 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -879,7 +879,7 @@ def is_sparse(dtype): ) if all(isinstance(dtype_iter, np.dtype) for dtype_iter in dtypes_orig): dtype_orig = np.result_type(*dtypes_orig) - elif pandas_requires_conversion and any(d is object for d in dtypes_orig): + elif pandas_requires_conversion and any(d == object for d in dtypes_orig): # Force object if any of the dtypes is an object dtype_orig = object From b6be6b9eb38334f1f965b7859228160e83ca51b9 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 16 Jul 2024 11:25:10 +0200 Subject: [PATCH 09/17] working --- .../plot_time_series_lagged_features.py | 1 - pyproject.toml | 13 +++++++++++++ sklearn/feature_extraction/__init__.py | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py index 539ff24a6c1da..edb27ade48007 100644 --- a/examples/applications/plot_time_series_lagged_features.py +++ b/examples/applications/plot_time_series_lagged_features.py @@ -52,7 +52,6 @@ # https://docs.pola.rs/user-guide/lazy/optimizations/ for more information. df = pl.read_parquet(bike_sharing_data_file) ->> >> >> > upstream / main # %% # Next, we take a look at the statistical summary of the dataset diff --git a/pyproject.toml b/pyproject.toml index 1af573efb7527..5bd410388e35f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -153,6 +153,13 @@ ignore=[ "E731", # do not use variables named 'l', 'O', or 'I' "E741", + # E721 is in preview (july 2024) and gives many false positives. + # Use `is` and `is not` for type comparisons, or `isinstance()` for + # isinstance checks + "E721", + # F841 is in preview (july 2024), and we don't care much about it. + # Local variable ... is assigned to but never used + "F841", ] [tool.ruff.lint.flake8-copyright] @@ -168,8 +175,14 @@ notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\ "benchmarks/*"=["CPY001"] "doc/*"=["CPY001"] "build_tools/*"=["CPY001"] +"sklearn/_build_utils/*"=["CPY001"] "maint_tools/*"=["CPY001"] ".github/*"=["CPY001"] +# __doc__ is too long (>4096 chars) and therefore false positive on copyright check +"examples/model_selection/plot_precision_recall.py"=["CPY001"] +"examples/svm/plot_rbf_parameters.py"=["CPY001"] +# __all__ has un-imported names +"sklearn/__init__.py"=["F822"] [tool.cython-lint] diff --git a/sklearn/feature_extraction/__init__.py b/sklearn/feature_extraction/__init__.py index 621c8683f441e..3ca86d86bee68 100644 --- a/sklearn/feature_extraction/__init__.py +++ b/sklearn/feature_extraction/__init__.py @@ -3,7 +3,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -from . import text +from . import image, text from ._dict_vectorizer import DictVectorizer from ._hash import FeatureHasher from .image import grid_to_graph, img_to_graph From 2691838f691878a978303e63acc0bd7300f6e8a9 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 16 Jul 2024 11:29:09 +0200 Subject: [PATCH 10/17] bump ruff version --- .pre-commit-config.yaml | 2 +- pyproject.toml | 2 +- sklearn/_min_dependencies.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index abe14acc7778c..e8730b679a5d6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.2.1 + rev: v0.5.1 hooks: - id: ruff args: ["--fix", "--output-format=full"] diff --git a/pyproject.toml b/pyproject.toml index 5bd410388e35f..589ee326ef37e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,7 @@ tests = [ "pandas>=1.1.5", "pytest>=7.1.2", "pytest-cov>=2.9.0", - "ruff>=0.2.1", + "ruff>=0.5.1", "black>=24.3.0", "mypy>=1.9", "pyamg>=4.0.0", diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index f7df37bedda0c..eefaac0f76411 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -32,7 +32,7 @@ "memory_profiler": ("0.57.0", "benchmark, docs"), "pytest": (PYTEST_MIN_VERSION, "tests"), "pytest-cov": ("2.9.0", "tests"), - "ruff": ("0.2.1", "tests"), + "ruff": ("0.5.1", "tests"), "black": ("24.3.0", "tests"), "mypy": ("1.9", "tests"), "pyamg": ("4.0.0", "tests"), From b8edc15b26f1fa7143d96420e96091be9f263f85 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 17 Jul 2024 16:24:25 +0200 Subject: [PATCH 11/17] remove leftover authors --- examples/linear_model/plot_ard.py | 2 -- examples/linear_model/plot_bayesian_ridge_curvefit.py | 2 -- examples/linear_model/plot_lasso_and_elasticnet.py | 2 -- examples/linear_model/plot_ridge_coeffs.py | 2 -- .../plot_sparse_logistic_regression_20newsgroups.py | 2 -- examples/manifold/plot_compare_methods.py | 2 -- examples/mixture/plot_gmm_init.py | 3 --- .../model_selection/plot_grid_search_refit_callable.py | 2 -- examples/neighbors/plot_kde_1d.py | 2 -- sklearn/_loss/link.py | 2 -- sklearn/cluster/_bisect_k_means.py | 2 -- sklearn/decomposition/_lda.py | 2 -- sklearn/decomposition/_truncated_svd.py | 5 ----- sklearn/ensemble/_hist_gradient_boosting/binning.py | 2 -- .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 2 -- sklearn/ensemble/_hist_gradient_boosting/grower.py | 2 -- sklearn/ensemble/_hist_gradient_boosting/predictor.py | 2 -- sklearn/feature_selection/_mutual_info.py | 2 -- sklearn/feature_selection/_variance_threshold.py | 3 +-- sklearn/linear_model/_logistic.py | 8 -------- sklearn/manifold/_mds.py | 3 --- sklearn/neighbors/_kde.py | 1 - 22 files changed, 1 insertion(+), 54 deletions(-) diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py index 9b1c355ef0ef5..46cc619718afb 100644 --- a/examples/linear_model/plot_ard.py +++ b/examples/linear_model/plot_ard.py @@ -22,8 +22,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Arturo Amor - # %% # Models robustness to recover the ground truth weights # ===================================================== diff --git a/examples/linear_model/plot_bayesian_ridge_curvefit.py b/examples/linear_model/plot_bayesian_ridge_curvefit.py index f1c86a196c2a3..45679580dbf34 100644 --- a/examples/linear_model/plot_bayesian_ridge_curvefit.py +++ b/examples/linear_model/plot_bayesian_ridge_curvefit.py @@ -29,8 +29,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Yoshihiro Uchida - # %% # Generate sinusoidal data with noise # ----------------------------------- diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py index f0eb2400c95ef..2db597fac96ac 100644 --- a/examples/linear_model/plot_lasso_and_elasticnet.py +++ b/examples/linear_model/plot_lasso_and_elasticnet.py @@ -24,8 +24,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Arturo Amor - # %% # Generate synthetic dataset # -------------------------- diff --git a/examples/linear_model/plot_ridge_coeffs.py b/examples/linear_model/plot_ridge_coeffs.py index 0cff8c79a55bb..1ad7962f8bfa3 100644 --- a/examples/linear_model/plot_ridge_coeffs.py +++ b/examples/linear_model/plot_ridge_coeffs.py @@ -54,8 +54,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Kornel Kielczewski -- - # %% # Purpose of this example # ----------------------- diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py index fc6b1c57d7ad7..fdf914f3a7ab2 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py +++ b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py @@ -23,8 +23,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Arthur Mensch - import timeit import warnings diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py index 9c123aadda8ea..30ce4e5d8d897 100644 --- a/examples/manifold/plot_compare_methods.py +++ b/examples/manifold/plot_compare_methods.py @@ -23,8 +23,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Jake Vanderplas -- - # %% # Dataset preparation # ------------------- diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py index dd3d1c8a22692..0178d4a07af11 100644 --- a/examples/mixture/plot_gmm_init.py +++ b/examples/mixture/plot_gmm_init.py @@ -36,9 +36,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Gordon Walsh -# Data generation code from Jake Vanderplas - from timeit import default_timer as timer import matplotlib.pyplot as plt diff --git a/examples/model_selection/plot_grid_search_refit_callable.py b/examples/model_selection/plot_grid_search_refit_callable.py index 9a8bf3c70d9cc..2b13ee5ad584c 100644 --- a/examples/model_selection/plot_grid_search_refit_callable.py +++ b/examples/model_selection/plot_grid_search_refit_callable.py @@ -21,8 +21,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Wenhao Zhang - import matplotlib.pyplot as plt import numpy as np diff --git a/examples/neighbors/plot_kde_1d.py b/examples/neighbors/plot_kde_1d.py index 83734ec70bcfc..ed5a454e476ad 100644 --- a/examples/neighbors/plot_kde_1d.py +++ b/examples/neighbors/plot_kde_1d.py @@ -31,8 +31,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Jake Vanderplas -# import matplotlib.pyplot as plt import numpy as np from scipy.stats import norm diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py index 1bb290aecc64a..53dff6c2e9285 100644 --- a/sklearn/_loss/link.py +++ b/sklearn/_loss/link.py @@ -5,8 +5,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Christian Lorentzen - from abc import ABC, abstractmethod from dataclasses import dataclass diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py index d615447d913eb..134d998c90a92 100644 --- a/sklearn/cluster/_bisect_k_means.py +++ b/sklearn/cluster/_bisect_k_means.py @@ -3,8 +3,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Michal Krawczyk - import warnings import numpy as np diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py index f987449f1d057..ed48eb8befa63 100644 --- a/sklearn/decomposition/_lda.py +++ b/sklearn/decomposition/_lda.py @@ -11,8 +11,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Chyi-Kwei Yau -# Author: Matthew D. Hoffman (original onlineldavb implementation) from numbers import Integral, Real import numpy as np diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py index b50ed239c6b19..a68e96fdd0c03 100644 --- a/sklearn/decomposition/_truncated_svd.py +++ b/sklearn/decomposition/_truncated_svd.py @@ -3,11 +3,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Lars Buitinck -# Olivier Grisel -# Michael Becker -# License: 3-clause BSD. - from numbers import Integral, Real import numpy as np diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py index 551b8ffe92b06..ed1bca8558f81 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py @@ -9,8 +9,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Nicolas Hug - import numpy as np from ...base import BaseEstimator, TransformerMixin diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 0735cd70b134e..8db6f7e4d5ff4 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -3,8 +3,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Nicolas Hug - import itertools import warnings from abc import ABC, abstractmethod diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py index 98de6cd428e67..a71e564056f8f 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py @@ -8,8 +8,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Nicolas Hug - import numbers from heapq import heappop, heappush from timeit import default_timer as time diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py index 7ab9542ef6e0f..59bb6499c4501 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py @@ -5,8 +5,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Nicolas Hug - import numpy as np from ._predictor import ( diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py index 42afdfb10dc3a..ede6fa9a21c34 100644 --- a/sklearn/feature_selection/_mutual_info.py +++ b/sklearn/feature_selection/_mutual_info.py @@ -1,7 +1,5 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Nikolay Mayorov -# License: 3-clause BSD from numbers import Integral diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index a9b360037590f..7494b72c1acb8 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -1,7 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Lars Buitinck -# License: 3-clause BSD + from numbers import Real import numpy as np diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 6362a7fa45395..fd34daed115e2 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -5,14 +5,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Gael Varoquaux -# Fabian Pedregosa -# Alexandre Gramfort -# Manoj Kumar -# Lars Buitinck -# Simon Wu -# Arthur Mensch -# SPDX-License-Identifier: BSD-3-Clause - import warnings from numbers import Integral, Real diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py index 0e4b45cc77e36..73c50e848ae2b 100644 --- a/sklearn/neighbors/_kde.py +++ b/sklearn/neighbors/_kde.py @@ -6,7 +6,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Author: Jake Vanderplas import itertools from numbers import Integral, Real From 96f80c7ad25cc411aeb7b674bc23d340d08163f3 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 17 Jul 2024 16:39:45 +0200 Subject: [PATCH 12/17] Jeremie's comments --- pyproject.toml | 2 +- sklearn/utils/_show_versions.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 40c04d3840552..2e26327324b9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -163,7 +163,7 @@ ignore=[ ] [tool.ruff.lint.flake8-copyright] -notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\-License\\-Identifier:\\ BSD\\-3\\-Clause" +notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\-License\\-Identifier:\\ BSD\\-3\\-Clause\\\n" [tool.ruff.lint.per-file-ignores] # It's fine not to put the import at the top of the file in the examples diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py index 43d2c15dbd7a5..cbdece30db326 100644 --- a/sklearn/utils/_show_versions.py +++ b/sklearn/utils/_show_versions.py @@ -7,8 +7,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# SPDX-License-Identifier: BSD-3-Clause - import platform import sys From b7bb8284d6485e10029fcbe3ad23dd55b9eb93d1 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 19 Jul 2024 17:29:19 +0200 Subject: [PATCH 13/17] remove more leftover authors --- benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py | 2 -- examples/applications/plot_digits_denoising.py | 3 --- examples/cluster/plot_inductive_clustering.py | 3 --- examples/compose/plot_compare_reduction.py | 4 ---- examples/model_selection/plot_likelihood_ratios.py | 2 -- sklearn/cluster/_mean_shift.py | 5 ----- sklearn/cluster/tests/test_feature_agglomeration.py | 1 - sklearn/metrics/_scorer.py | 5 ----- sklearn/preprocessing/_csr_polynomial_expansion.pyx | 5 +++-- 9 files changed, 3 insertions(+), 27 deletions(-) diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py index 26789c173688f..a468f7b3e1abf 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py @@ -36,8 +36,6 @@ of components (this takes more time). """ -# Authors: Sylvain MARIE, Schneider Electric - import time import matplotlib.pyplot as plt diff --git a/examples/applications/plot_digits_denoising.py b/examples/applications/plot_digits_denoising.py index 73b5a8034f8d6..8ca31da6a74d2 100644 --- a/examples/applications/plot_digits_denoising.py +++ b/examples/applications/plot_digits_denoising.py @@ -24,9 +24,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Authors: Guillaume Lemaitre -# Licence: BSD 3 clause - # %% # Load the dataset via OpenML # --------------------------- diff --git a/examples/cluster/plot_inductive_clustering.py b/examples/cluster/plot_inductive_clustering.py index b92a814cd4ef3..29846b15cdb60 100644 --- a/examples/cluster/plot_inductive_clustering.py +++ b/examples/cluster/plot_inductive_clustering.py @@ -23,9 +23,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Authors: Chirag Nagpal -# Christos Aridas - import matplotlib.pyplot as plt from sklearn.base import BaseEstimator, clone diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py index 2aeb9fae4af5e..cfe22d646244c 100644 --- a/examples/compose/plot_compare_reduction.py +++ b/examples/compose/plot_compare_reduction.py @@ -23,10 +23,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Authors: Robert McGibbon -# Joel Nothman -# Guillaume Lemaitre - # %% # Illustration of ``Pipeline`` and ``GridSearchCV`` ############################################################################### diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py index 2fc3ad3d040f5..b5a68eb79810f 100644 --- a/examples/model_selection/plot_likelihood_ratios.py +++ b/examples/model_selection/plot_likelihood_ratios.py @@ -28,8 +28,6 @@ class proportion than the target application. # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Authors: Arturo Amor -# Olivier Grisel # %% # Pre-test vs. post-test analysis # =============================== diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py index db5c22d9650bf..900494536381a 100644 --- a/sklearn/cluster/_mean_shift.py +++ b/sklearn/cluster/_mean_shift.py @@ -12,11 +12,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Authors: Conrad Lee -# Alexandre Gramfort -# Gael Varoquaux -# Martino Sorbaro - import warnings from collections import defaultdict from numbers import Integral, Real diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py index 488dd638ad125..ef8596c0813f8 100644 --- a/sklearn/cluster/tests/test_feature_agglomeration.py +++ b/sklearn/cluster/tests/test_feature_agglomeration.py @@ -2,7 +2,6 @@ Tests for sklearn.cluster._feature_agglomeration """ -# Authors: Sergul Aydore 2017 import warnings import numpy as np diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 385ed1564bc58..b735da25d577c 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -16,11 +16,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Authors: Andreas Mueller -# Lars Buitinck -# Arnaud Joly -# License: Simplified BSD - import copy import warnings from collections import Counter diff --git a/sklearn/preprocessing/_csr_polynomial_expansion.pyx b/sklearn/preprocessing/_csr_polynomial_expansion.pyx index 017af83f035b2..38e5c3069d252 100644 --- a/sklearn/preprocessing/_csr_polynomial_expansion.pyx +++ b/sklearn/preprocessing/_csr_polynomial_expansion.pyx @@ -1,5 +1,6 @@ -# Authors: Andrew nystrom -# Meekail Zain +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from ..utils._typedefs cimport uint8_t, int64_t, intp_t ctypedef uint8_t FLAG_t From 1c0d9e65c03b035684b671532e14147bec029286 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 19 Jul 2024 17:33:58 +0200 Subject: [PATCH 14/17] remove Code Source instances --- examples/classification/plot_classifier_comparison.py | 5 ----- examples/cluster/plot_cluster_iris.py | 4 ---- examples/cluster/plot_digits_agglomeration.py | 4 ---- examples/compose/plot_digits_pipe.py | 4 ---- examples/datasets/plot_digits_last_image.py | 4 ---- examples/datasets/plot_iris_dataset.py | 4 ---- examples/decomposition/plot_pca_iris.py | 3 --- examples/linear_model/plot_iris_logistic.py | 4 ---- examples/linear_model/plot_logistic.py | 3 --- examples/linear_model/plot_ols.py | 3 --- examples/linear_model/plot_ols_3d.py | 4 ---- examples/linear_model/plot_ols_ridge_variance.py | 5 ----- examples/preprocessing/plot_discretization_classification.py | 5 ----- examples/svm/plot_svm_kernels.py | 3 --- examples/svm/plot_svm_margin.py | 4 ---- examples/svm/plot_svm_tie_breaking.py | 3 --- 16 files changed, 62 deletions(-) diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py index 3108618091b4f..5747d00ba7950 100644 --- a/examples/classification/plot_classifier_comparison.py +++ b/examples/classification/plot_classifier_comparison.py @@ -22,11 +22,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Andreas Müller -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np from matplotlib.colors import ListedColormap diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index e469eeb3a86ff..1a34a9b3534bc 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -21,10 +21,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt # Though the following import is not directly being used, it is required diff --git a/examples/cluster/plot_digits_agglomeration.py b/examples/cluster/plot_digits_agglomeration.py index d4a427905d91f..8de14b0729f53 100644 --- a/examples/cluster/plot_digits_agglomeration.py +++ b/examples/cluster/plot_digits_agglomeration.py @@ -11,10 +11,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py index c23e9b3b96a08..8a202bb5bd74c 100644 --- a/examples/compose/plot_digits_pipe.py +++ b/examples/compose/plot_digits_pipe.py @@ -13,10 +13,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np import polars as pl diff --git a/examples/datasets/plot_digits_last_image.py b/examples/datasets/plot_digits_last_image.py index 836db4b37758c..eada3b12588da 100644 --- a/examples/datasets/plot_digits_last_image.py +++ b/examples/datasets/plot_digits_last_image.py @@ -17,10 +17,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt from sklearn import datasets diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index ea9c89bbb6082..d9560e51ef245 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -18,10 +18,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - # %% # Loading the iris dataset # ------------------------ diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py index 66c7bc7994129..1ceecc0058b67 100644 --- a/examples/decomposition/plot_pca_iris.py +++ b/examples/decomposition/plot_pca_iris.py @@ -13,9 +13,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt # unused but required import for doing 3d projections with matplotlib < 3.2 diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py index 288de6a886550..481312c94c789 100644 --- a/examples/linear_model/plot_iris_logistic.py +++ b/examples/linear_model/plot_iris_logistic.py @@ -13,10 +13,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt from sklearn import datasets diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py index 8ddd4deef2a8f..b54c1fbf1340d 100644 --- a/examples/linear_model/plot_logistic.py +++ b/examples/linear_model/plot_logistic.py @@ -12,9 +12,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gael Varoquaux -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np from scipy.special import expit diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py index bb4a242cdbad2..8aaa35ed8d899 100644 --- a/examples/linear_model/plot_ols.py +++ b/examples/linear_model/plot_ols.py @@ -17,9 +17,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py index 38fbcca546297..cd848f659e8d8 100644 --- a/examples/linear_model/plot_ols_3d.py +++ b/examples/linear_model/plot_ols_3d.py @@ -12,10 +12,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - # %% # First we load the diabetes dataset. diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py index 86c28b4d3ae23..a65cc6eb7b7d1 100644 --- a/examples/linear_model/plot_ols_ridge_variance.py +++ b/examples/linear_model/plot_ols_ridge_variance.py @@ -22,11 +22,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - - import matplotlib.pyplot as plt import numpy as np diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py index 310089e296883..1eeb9f169bf3b 100644 --- a/examples/preprocessing/plot_discretization_classification.py +++ b/examples/preprocessing/plot_discretization_classification.py @@ -31,11 +31,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Tom Dupré la Tour -# Adapted from plot_classifier_comparison by Gaël Varoquaux and Andreas Müller -# -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np from matplotlib.colors import ListedColormap diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py index 480293a03b9f0..798e62bbb7b4e 100644 --- a/examples/svm/plot_svm_kernels.py +++ b/examples/svm/plot_svm_kernels.py @@ -39,9 +39,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# SPDX-License-Identifier: BSD-3-Clause - # %% # Creating a dataset # ------------------ diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py index 671b7f4902bc8..f38858bb714a9 100644 --- a/examples/svm/plot_svm_margin.py +++ b/examples/svm/plot_svm_margin.py @@ -16,10 +16,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Gaël Varoquaux -# Modified for documentation by Jaques Grobler -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np diff --git a/examples/svm/plot_svm_tie_breaking.py b/examples/svm/plot_svm_tie_breaking.py index a9276efd44572..b5f4fb8dd18c3 100644 --- a/examples/svm/plot_svm_tie_breaking.py +++ b/examples/svm/plot_svm_tie_breaking.py @@ -17,9 +17,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Code source: Andreas Mueller, Adrin Jalali -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np From b5db517f0c810254ab60c9a676824b1b44e1faf8 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 19 Jul 2024 17:37:31 +0200 Subject: [PATCH 15/17] remove duplicate license statements --- examples/ensemble/plot_adaboost_multiclass.py | 3 --- examples/neighbors/plot_nca_classification.py | 2 -- examples/neighbors/plot_nca_dim_reduction.py | 2 -- examples/neighbors/plot_nca_illustration.py | 2 -- sklearn/neural_network/__init__.py | 2 -- sklearn/utils/optimize.py | 2 -- 6 files changed, 13 deletions(-) diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index db0200997d9e0..a18ff4e09c7bb 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -28,9 +28,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# Noel Dawe -# SPDX-License-Identifier: BSD-3-Clause - # %% # Creating the dataset # -------------------- diff --git a/examples/neighbors/plot_nca_classification.py b/examples/neighbors/plot_nca_classification.py index 496038cec7f88..b8d69b82fec42 100644 --- a/examples/neighbors/plot_nca_classification.py +++ b/examples/neighbors/plot_nca_classification.py @@ -18,8 +18,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap diff --git a/examples/neighbors/plot_nca_dim_reduction.py b/examples/neighbors/plot_nca_dim_reduction.py index 1e6305549a46c..fcf2b0f602d20 100644 --- a/examples/neighbors/plot_nca_dim_reduction.py +++ b/examples/neighbors/plot_nca_dim_reduction.py @@ -31,8 +31,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np diff --git a/examples/neighbors/plot_nca_illustration.py b/examples/neighbors/plot_nca_illustration.py index 8d847f28d6e4d..e67bdb4b2d4d7 100644 --- a/examples/neighbors/plot_nca_illustration.py +++ b/examples/neighbors/plot_nca_illustration.py @@ -13,8 +13,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# SPDX-License-Identifier: BSD-3-Clause - import matplotlib.pyplot as plt import numpy as np from matplotlib import cm diff --git a/sklearn/neural_network/__init__.py b/sklearn/neural_network/__init__.py index fa580a8b40642..fa5980ce24f5c 100644 --- a/sklearn/neural_network/__init__.py +++ b/sklearn/neural_network/__init__.py @@ -3,8 +3,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# SPDX-License-Identifier: BSD-3-Clause - from ._multilayer_perceptron import MLPClassifier, MLPRegressor from ._rbm import BernoulliRBM diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py index 519f87470343c..fe60136c12d09 100644 --- a/sklearn/utils/optimize.py +++ b/sklearn/utils/optimize.py @@ -14,8 +14,6 @@ # This is a modified file from scipy.optimize # Original authors: Travis Oliphant, Eric Jones -# Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour -# SPDX-License-Identifier: BSD-3-Clause import warnings From 3c71f31837a07db63eb0096f8a182f4c6bf40202 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 22 Jul 2024 13:59:21 +0200 Subject: [PATCH 16/17] ensure the newline after --- benchmarks/bench_plot_parallel_pairwise.py | 1 + examples/calibration/plot_calibration_curve.py | 1 + examples/calibration/plot_compare_calibration.py | 1 + examples/cluster/plot_agglomerative_dendrogram.py | 1 + examples/cluster/plot_hdbscan.py | 1 + .../linear_model/plot_poisson_regression_non_normal_loss.py | 1 + .../linear_model/plot_tweedie_regression_insurance_claims.py | 1 + examples/neighbors/plot_caching_nearest_neighbors.py | 1 + sklearn/_build_utils/tempita.py | 1 + sklearn/cluster/tests/test_hierarchical.py | 1 + sklearn/cluster/tests/test_optics.py | 1 + sklearn/compose/_column_transformer.py | 1 + sklearn/conftest.py | 1 + sklearn/covariance/_graph_lasso.py | 1 + sklearn/datasets/_base.py | 1 + sklearn/datasets/_openml.py | 1 + sklearn/ensemble/tests/test_bagging.py | 1 + sklearn/feature_extraction/_stop_words.py | 1 + sklearn/impute/_iterative.py | 1 + sklearn/inspection/_pd_utils.py | 2 ++ sklearn/inspection/_plot/decision_boundary.py | 1 + sklearn/inspection/_plot/partial_dependence.py | 1 + sklearn/kernel_ridge.py | 1 + sklearn/linear_model/_glm/__init__.py | 1 + sklearn/linear_model/_glm/_newton_solver.py | 1 + sklearn/linear_model/_glm/glm.py | 1 + sklearn/linear_model/_glm/tests/test_glm.py | 1 + sklearn/linear_model/_passive_aggressive.py | 1 + sklearn/linear_model/_perceptron.py | 1 + sklearn/linear_model/_quantile.py | 1 + sklearn/linear_model/_sgd_fast.pxd | 1 + sklearn/linear_model/_stochastic_gradient.py | 1 + sklearn/linear_model/tests/test_theil_sen.py | 1 + sklearn/manifold/_isomap.py | 1 + sklearn/metrics/_pairwise_distances_reduction/__init__.py | 1 + sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py | 1 + sklearn/metrics/_plot/confusion_matrix.py | 1 + sklearn/metrics/_plot/det_curve.py | 1 + sklearn/metrics/_plot/precision_recall_curve.py | 1 + sklearn/metrics/_plot/regression.py | 1 + sklearn/metrics/_plot/roc_curve.py | 1 + sklearn/metrics/cluster/_bicluster.py | 1 + sklearn/mixture/tests/test_bayesian_mixture.py | 1 + sklearn/model_selection/_classification_threshold.py | 1 + sklearn/model_selection/_plot.py | 1 + sklearn/model_selection/_search_successive_halving.py | 1 + sklearn/naive_bayes.py | 1 + sklearn/neighbors/_base.py | 1 + sklearn/neighbors/_classification.py | 1 + sklearn/neighbors/_graph.py | 1 + sklearn/neighbors/_regression.py | 1 - sklearn/preprocessing/_function_transformer.py | 1 + sklearn/preprocessing/_target_encoder.py | 1 + sklearn/semi_supervised/_label_propagation.py | 1 + sklearn/svm/_base.py | 1 + sklearn/svm/_classes.py | 1 + sklearn/tree/_export.py | 1 + sklearn/utils/_arpack.py | 1 + sklearn/utils/_available_if.py | 1 + sklearn/utils/_bunch.py | 1 + sklearn/utils/_chunking.py | 1 + sklearn/utils/_encode.py | 1 + sklearn/utils/_estimator_html_repr.py | 1 + sklearn/utils/_fast_dict.pxd | 1 + sklearn/utils/_indexing.py | 1 + sklearn/utils/_joblib.py | 1 + sklearn/utils/_mask.py | 1 + sklearn/utils/_missing.py | 1 + sklearn/utils/_mocking.py | 1 + sklearn/utils/_optional_dependencies.py | 2 ++ sklearn/utils/_param_validation.py | 1 + sklearn/utils/_plotting.py | 1 + sklearn/utils/_random.pyx | 1 + sklearn/utils/_set_output.py | 1 + sklearn/utils/_tags.py | 1 + sklearn/utils/_testing.py | 1 + sklearn/utils/_user_interface.py | 1 + sklearn/utils/deprecation.py | 1 + sklearn/utils/metaestimators.py | 1 + sklearn/utils/random.py | 1 + sklearn/utils/sparsefuncs.py | 1 + sklearn/utils/stats.py | 1 + sklearn/utils/tests/test_extmath.py | 1 + 83 files changed, 84 insertions(+), 1 deletion(-) diff --git a/benchmarks/bench_plot_parallel_pairwise.py b/benchmarks/bench_plot_parallel_pairwise.py index a6eb6c04bffab..5b7cf81f8fce4 100644 --- a/benchmarks/bench_plot_parallel_pairwise.py +++ b/benchmarks/bench_plot_parallel_pairwise.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import time import matplotlib.pyplot as plt diff --git a/examples/calibration/plot_calibration_curve.py b/examples/calibration/plot_calibration_curve.py index 03bc28e25740a..1c5e297026ae7 100644 --- a/examples/calibration/plot_calibration_curve.py +++ b/examples/calibration/plot_calibration_curve.py @@ -14,6 +14,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + # %% # Dataset # ------- diff --git a/examples/calibration/plot_compare_calibration.py b/examples/calibration/plot_compare_calibration.py index 33898d8f151f3..aa60de1032765 100644 --- a/examples/calibration/plot_compare_calibration.py +++ b/examples/calibration/plot_compare_calibration.py @@ -19,6 +19,7 @@ # %% # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + # # Dataset # ------- diff --git a/examples/cluster/plot_agglomerative_dendrogram.py b/examples/cluster/plot_agglomerative_dendrogram.py index 2acb3675ff7c1..bea2a5e84653a 100644 --- a/examples/cluster/plot_agglomerative_dendrogram.py +++ b/examples/cluster/plot_agglomerative_dendrogram.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + """ ========================================= Plot Hierarchical Clustering Dendrogram diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py index 07cea76ae072a..64d4936694bf3 100644 --- a/examples/cluster/plot_hdbscan.py +++ b/examples/cluster/plot_hdbscan.py @@ -15,6 +15,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + # %% import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py index 180ee3b70671c..741a92767e953 100644 --- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py +++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + """ ====================================== Poisson regression and non-normal loss diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py index 31a91fb37c766..1e987bfaa6dc2 100644 --- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py +++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + """ ====================================== Tweedie regression on insurance claims diff --git a/examples/neighbors/plot_caching_nearest_neighbors.py b/examples/neighbors/plot_caching_nearest_neighbors.py index f5c3ea200943f..ea6a884c3d486 100644 --- a/examples/neighbors/plot_caching_nearest_neighbors.py +++ b/examples/neighbors/plot_caching_nearest_neighbors.py @@ -20,6 +20,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from tempfile import TemporaryDirectory import matplotlib.pyplot as plt diff --git a/sklearn/_build_utils/tempita.py b/sklearn/_build_utils/tempita.py index fd59fe51c7ba6..c92ea17d2a9b9 100644 --- a/sklearn/_build_utils/tempita.py +++ b/sklearn/_build_utils/tempita.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import argparse import os diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index a36e5537a3636..65f8184e3f444 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -5,6 +5,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import itertools import shutil from functools import partial diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py index 9c1c13a9e12e0..95324704f6371 100644 --- a/sklearn/cluster/tests/test_optics.py +++ b/sklearn/cluster/tests/test_optics.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings import numpy as np diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index a5aa7db17d4ae..5e08a68b63c16 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -6,6 +6,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from collections import Counter, UserList from itertools import chain diff --git a/sklearn/conftest.py b/sklearn/conftest.py index 3dbca84a33ea9..a22c2ec5d39f6 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import builtins import platform import sys diff --git a/sklearn/covariance/_graph_lasso.py b/sklearn/covariance/_graph_lasso.py index 2007aa4085bd2..1de2aeee6c590 100644 --- a/sklearn/covariance/_graph_lasso.py +++ b/sklearn/covariance/_graph_lasso.py @@ -4,6 +4,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import operator import sys import time diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index 62055d296402b..aaf3d738f85f4 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -4,6 +4,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import csv import gzip import hashlib diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py index e270676272b03..4790431506bce 100644 --- a/sklearn/datasets/_openml.py +++ b/sklearn/datasets/_openml.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import gzip import hashlib import json diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index e0558917c59ba..4b1c4323d509a 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -4,6 +4,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from itertools import cycle, product import joblib diff --git a/sklearn/feature_extraction/_stop_words.py b/sklearn/feature_extraction/_stop_words.py index ac5c9f495ae84..6bc8e6d2f37dc 100644 --- a/sklearn/feature_extraction/_stop_words.py +++ b/sklearn/feature_extraction/_stop_words.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + # This list of English stop words is taken from the "Glasgow Information # Retrieval Group". The original list can be found at # http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py index 22cede929ebb3..e2d06844611c9 100644 --- a/sklearn/impute/_iterative.py +++ b/sklearn/impute/_iterative.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from collections import namedtuple from numbers import Integral, Real diff --git a/sklearn/inspection/_pd_utils.py b/sklearn/inspection/_pd_utils.py index 4d890212e2838..a48ba4d9a4490 100644 --- a/sklearn/inspection/_pd_utils.py +++ b/sklearn/inspection/_pd_utils.py @@ -1,5 +1,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + + def _check_feature_names(X, feature_names=None): """Check feature names. diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index b87316f670cc9..3a9cc17df72a2 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numpy as np from ...base import is_regressor diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py index 8a5118df6862b..ecb421ccdd68a 100644 --- a/sklearn/inspection/_plot/partial_dependence.py +++ b/sklearn/inspection/_plot/partial_dependence.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numbers from itertools import chain from math import ceil diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py index 31eade5255e75..443dc7cc9e483 100644 --- a/sklearn/kernel_ridge.py +++ b/sklearn/kernel_ridge.py @@ -2,6 +2,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from numbers import Real import numpy as np diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py index 199b938b023d0..d0a51e65d3211 100644 --- a/sklearn/linear_model/_glm/__init__.py +++ b/sklearn/linear_model/_glm/__init__.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from .glm import ( GammaRegressor, PoissonRegressor, diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py index b2be604d931c5..400ac79c7c55c 100644 --- a/sklearn/linear_model/_glm/_newton_solver.py +++ b/sklearn/linear_model/_glm/_newton_solver.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + """ Newton solver for Generalized Linear Models """ diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py index 14caa4fd733c2..b8f8d9b11f99b 100644 --- a/sklearn/linear_model/_glm/glm.py +++ b/sklearn/linear_model/_glm/glm.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + """ Generalized Linear Models with Exponential Dispersion Family """ diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py index 7f6ec64c15ad4..bf30b071e290b 100644 --- a/sklearn/linear_model/_glm/tests/test_glm.py +++ b/sklearn/linear_model/_glm/tests/test_glm.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import itertools import warnings from functools import partial diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py index 8d4dc2d3a2c23..61eb06edae85f 100644 --- a/sklearn/linear_model/_passive_aggressive.py +++ b/sklearn/linear_model/_passive_aggressive.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from numbers import Real from ..base import _fit_context diff --git a/sklearn/linear_model/_perceptron.py b/sklearn/linear_model/_perceptron.py index d15668bebfef1..e93200ba385fa 100644 --- a/sklearn/linear_model/_perceptron.py +++ b/sklearn/linear_model/_perceptron.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from numbers import Real from ..utils._param_validation import Interval, StrOptions diff --git a/sklearn/linear_model/_quantile.py b/sklearn/linear_model/_quantile.py index 79d2e6b67ca5e..d70d69a7d45ff 100644 --- a/sklearn/linear_model/_quantile.py +++ b/sklearn/linear_model/_quantile.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from numbers import Real diff --git a/sklearn/linear_model/_sgd_fast.pxd b/sklearn/linear_model/_sgd_fast.pxd index c6991b8aa5bc2..bf21557b61e81 100644 --- a/sklearn/linear_model/_sgd_fast.pxd +++ b/sklearn/linear_model/_sgd_fast.pxd @@ -1,4 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause + """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx""" cdef class LossFunction: diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 8c7bf6179de5b..ae1db0a49a442 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + """Classification, regression and One-Class SVM using Stochastic Gradient Descent (SGD). """ diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py index b59ad639f1dcc..216415f2ee927 100644 --- a/sklearn/linear_model/tests/test_theil_sen.py +++ b/sklearn/linear_model/tests/test_theil_sen.py @@ -4,6 +4,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import os import re import sys diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py index dee67461d7fcb..f0102a1aaa979 100644 --- a/sklearn/manifold/_isomap.py +++ b/sklearn/manifold/_isomap.py @@ -2,6 +2,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from numbers import Integral, Real diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py index 6c8deb34dc448..926d54ea74217 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py +++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + # # Pairwise Distances Reductions # ============================= diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index 83bb8cd2e0d5c..d8307cbe84eaa 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from abc import abstractmethod from typing import List diff --git a/sklearn/metrics/_plot/confusion_matrix.py b/sklearn/metrics/_plot/confusion_matrix.py index 0d5f5d84719ad..f1c9a8a3e1db5 100644 --- a/sklearn/metrics/_plot/confusion_matrix.py +++ b/sklearn/metrics/_plot/confusion_matrix.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from itertools import product import numpy as np diff --git a/sklearn/metrics/_plot/det_curve.py b/sklearn/metrics/_plot/det_curve.py index 712a87237c35a..7a9b68fb2e7e9 100644 --- a/sklearn/metrics/_plot/det_curve.py +++ b/sklearn/metrics/_plot/det_curve.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import scipy as sp from ...utils._plotting import _BinaryClassifierCurveDisplayMixin diff --git a/sklearn/metrics/_plot/precision_recall_curve.py b/sklearn/metrics/_plot/precision_recall_curve.py index 869c8bc95e675..95698ee43c22b 100644 --- a/sklearn/metrics/_plot/precision_recall_curve.py +++ b/sklearn/metrics/_plot/precision_recall_curve.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from collections import Counter from ...utils._plotting import _BinaryClassifierCurveDisplayMixin diff --git a/sklearn/metrics/_plot/regression.py b/sklearn/metrics/_plot/regression.py index b079dc02524b3..11450c8311799 100644 --- a/sklearn/metrics/_plot/regression.py +++ b/sklearn/metrics/_plot/regression.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numbers import numpy as np diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py index cb8718705e831..e9d4ca5d5672d 100644 --- a/sklearn/metrics/_plot/roc_curve.py +++ b/sklearn/metrics/_plot/roc_curve.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from ...utils._plotting import _BinaryClassifierCurveDisplayMixin from .._ranking import auc, roc_curve diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py index c6fa0775bde54..b718a5a226598 100644 --- a/sklearn/metrics/cluster/_bicluster.py +++ b/sklearn/metrics/cluster/_bicluster.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numpy as np from scipy.optimize import linear_sum_assignment diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index 925cd2fdb615b..3ef292d523f83 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import copy import numpy as np diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py index d29ad5d28c322..820b90f238723 100644 --- a/sklearn/model_selection/_classification_threshold.py +++ b/sklearn/model_selection/_classification_threshold.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from collections.abc import MutableMapping from numbers import Integral, Real diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py index 0565675e87ee7..b16e0f4c1019a 100644 --- a/sklearn/model_selection/_plot.py +++ b/sklearn/model_selection/_plot.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numpy as np from ..utils._optional_dependencies import check_matplotlib_support diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index 373dbfac22be5..5980d40cb8e40 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from abc import abstractmethod from copy import deepcopy from math import ceil, floor, log diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 7992a911c1be1..e2d5a3dc24de8 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -6,6 +6,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from abc import ABCMeta, abstractmethod from numbers import Integral, Real diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py index 750dd485ed586..3dfd2df16fabd 100644 --- a/sklearn/neighbors/_base.py +++ b/sklearn/neighbors/_base.py @@ -2,6 +2,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import itertools import numbers import warnings diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index c45be05b5fe04..9c88f5f456e0a 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -2,6 +2,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from numbers import Integral diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py index d22b25f785e38..7e378b20cbadc 100644 --- a/sklearn/neighbors/_graph.py +++ b/sklearn/neighbors/_graph.py @@ -2,6 +2,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import itertools from ..base import ClassNamePrefixFeaturesOutMixin, TransformerMixin, _fit_context diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py index 98a5734c5140b..5b96a64a8bc28 100644 --- a/sklearn/neighbors/_regression.py +++ b/sklearn/neighbors/_regression.py @@ -2,7 +2,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -# University of Copenhagen import warnings diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index 9855924b61537..4dbe1e833322c 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from functools import partial diff --git a/sklearn/preprocessing/_target_encoder.py b/sklearn/preprocessing/_target_encoder.py index 779618026def1..1855f340c624d 100644 --- a/sklearn/preprocessing/_target_encoder.py +++ b/sklearn/preprocessing/_target_encoder.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from numbers import Integral, Real import numpy as np diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 9b8f4b8da7b37..ef32313c3c75f 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -54,6 +54,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from abc import ABCMeta, abstractmethod from numbers import Integral, Real diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py index 22adea71cd27c..fb61c407e1fb8 100644 --- a/sklearn/svm/_base.py +++ b/sklearn/svm/_base.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from abc import ABCMeta, abstractmethod from numbers import Integral, Real diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index 8ecdca6f47b56..9ce346780a8eb 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from numbers import Integral, Real import numpy as np diff --git a/sklearn/tree/_export.py b/sklearn/tree/_export.py index 14b9be332f676..9cb55f7aa1aa4 100644 --- a/sklearn/tree/_export.py +++ b/sklearn/tree/_export.py @@ -4,6 +4,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from collections.abc import Iterable from io import StringIO from numbers import Integral diff --git a/sklearn/utils/_arpack.py b/sklearn/utils/_arpack.py index 3087a0f4dbf0a..ba82127f98c43 100644 --- a/sklearn/utils/_arpack.py +++ b/sklearn/utils/_arpack.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from .validation import check_random_state diff --git a/sklearn/utils/_available_if.py b/sklearn/utils/_available_if.py index 9230a35ec9ab0..b0da84189d1f3 100644 --- a/sklearn/utils/_available_if.py +++ b/sklearn/utils/_available_if.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from functools import update_wrapper, wraps from types import MethodType diff --git a/sklearn/utils/_bunch.py b/sklearn/utils/_bunch.py index 4d474e88eb80e..a11e80e366135 100644 --- a/sklearn/utils/_bunch.py +++ b/sklearn/utils/_bunch.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings diff --git a/sklearn/utils/_chunking.py b/sklearn/utils/_chunking.py index e2462fa7f7eda..6cb5bb819cec7 100644 --- a/sklearn/utils/_chunking.py +++ b/sklearn/utils/_chunking.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import warnings from itertools import islice from numbers import Integral diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py index 897980e43ce11..479b11e0f59a2 100644 --- a/sklearn/utils/_encode.py +++ b/sklearn/utils/_encode.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from collections import Counter from contextlib import suppress from typing import NamedTuple diff --git a/sklearn/utils/_estimator_html_repr.py b/sklearn/utils/_estimator_html_repr.py index 1e5db459a04e3..5a9cd4186ffa8 100644 --- a/sklearn/utils/_estimator_html_repr.py +++ b/sklearn/utils/_estimator_html_repr.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import html import itertools from contextlib import closing diff --git a/sklearn/utils/_fast_dict.pxd b/sklearn/utils/_fast_dict.pxd index f9f9bf31db1ee..e37f254661ce6 100644 --- a/sklearn/utils/_fast_dict.pxd +++ b/sklearn/utils/_fast_dict.pxd @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + """ Uses C++ map containers for fast dict-like behavior with keys being integers, and values float. diff --git a/sklearn/utils/_indexing.py b/sklearn/utils/_indexing.py index fd9ed46c3b506..6b4b4779db269 100644 --- a/sklearn/utils/_indexing.py +++ b/sklearn/utils/_indexing.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numbers import sys import warnings diff --git a/sklearn/utils/_joblib.py b/sklearn/utils/_joblib.py index 837dd5102e918..03c10397eea1c 100644 --- a/sklearn/utils/_joblib.py +++ b/sklearn/utils/_joblib.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + # TODO(1.7): remove this file import warnings as _warnings diff --git a/sklearn/utils/_mask.py b/sklearn/utils/_mask.py index 95fefaca6e67c..da21c8e68b72d 100644 --- a/sklearn/utils/_mask.py +++ b/sklearn/utils/_mask.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from contextlib import suppress import numpy as np diff --git a/sklearn/utils/_missing.py b/sklearn/utils/_missing.py index f2e024df887be..daeb9ba68cc1c 100644 --- a/sklearn/utils/_missing.py +++ b/sklearn/utils/_missing.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import math import numbers from contextlib import suppress diff --git a/sklearn/utils/_mocking.py b/sklearn/utils/_mocking.py index f4606d2a98c04..6653a4a17a45e 100644 --- a/sklearn/utils/_mocking.py +++ b/sklearn/utils/_mocking.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numpy as np from ..base import BaseEstimator, ClassifierMixin diff --git a/sklearn/utils/_optional_dependencies.py b/sklearn/utils/_optional_dependencies.py index b2e970502c123..1de7f4479b242 100644 --- a/sklearn/utils/_optional_dependencies.py +++ b/sklearn/utils/_optional_dependencies.py @@ -1,5 +1,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + + def check_matplotlib_support(caller_name): """Raise ImportError with detailed error message if mpl is not installed. diff --git a/sklearn/utils/_param_validation.py b/sklearn/utils/_param_validation.py index e1de052bd01d3..53c9eeee65af4 100644 --- a/sklearn/utils/_param_validation.py +++ b/sklearn/utils/_param_validation.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import functools import math import operator diff --git a/sklearn/utils/_plotting.py b/sklearn/utils/_plotting.py index ab3e21bafa134..8d2c7d3bf101b 100644 --- a/sklearn/utils/_plotting.py +++ b/sklearn/utils/_plotting.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numpy as np from . import check_consistent_length diff --git a/sklearn/utils/_random.pyx b/sklearn/utils/_random.pyx index 838db540a210d..f0e649e60fe7c 100644 --- a/sklearn/utils/_random.pyx +++ b/sklearn/utils/_random.pyx @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + """ Random utility function ======================= diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py index 510be7469b96f..9f1504bd513d9 100644 --- a/sklearn/utils/_set_output.py +++ b/sklearn/utils/_set_output.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import importlib from functools import wraps from typing import Protocol, runtime_checkable diff --git a/sklearn/utils/_tags.py b/sklearn/utils/_tags.py index af2ca444b5d33..db8473721d2b6 100644 --- a/sklearn/utils/_tags.py +++ b/sklearn/utils/_tags.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numpy as np _DEFAULT_TAGS = { diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index 961091e4af71a..d75ca9e19cdff 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -2,6 +2,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import atexit import contextlib import functools diff --git a/sklearn/utils/_user_interface.py b/sklearn/utils/_user_interface.py index f48b934dc7c8b..8e7550b09be2c 100644 --- a/sklearn/utils/_user_interface.py +++ b/sklearn/utils/_user_interface.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import timeit from contextlib import contextmanager diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py index e6ad56264e1c3..df218029829c1 100644 --- a/sklearn/utils/deprecation.py +++ b/sklearn/utils/deprecation.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import functools import warnings diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py index 000722e253d14..f962acb48f74e 100644 --- a/sklearn/utils/metaestimators.py +++ b/sklearn/utils/metaestimators.py @@ -2,6 +2,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + from abc import ABCMeta, abstractmethod from contextlib import suppress from typing import Any, List diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py index e7408de304b89..aad8b84828514 100644 --- a/sklearn/utils/random.py +++ b/sklearn/utils/random.py @@ -2,6 +2,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import array import numpy as np diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py index 8e721c6b3852c..fb29de8ad7c6e 100644 --- a/sklearn/utils/sparsefuncs.py +++ b/sklearn/utils/sparsefuncs.py @@ -2,6 +2,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numpy as np import scipy.sparse as sp from scipy.sparse.linalg import LinearOperator diff --git a/sklearn/utils/stats.py b/sklearn/utils/stats.py index b931cb6b6f90d..0fc3fae8a88f0 100644 --- a/sklearn/utils/stats.py +++ b/sklearn/utils/stats.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numpy as np from .extmath import stable_cumsum diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 5b27a52d29ccf..66d3ec74f5490 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -1,5 +1,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause + import numpy as np import pytest from scipy import linalg, sparse From 660d1a30d7a1d90ee490df3c6e8ef55f61405c74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= Date: Mon, 22 Jul 2024 18:00:48 +0200 Subject: [PATCH 17/17] Update pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index f51fc07a4ba1e..5cd7cd9b54391 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -177,6 +177,7 @@ notice-rgx = "\\#\\ Authors:\\ The\\ scikit\\-learn\\ developers\\\n\\#\\ SPDX\\ "build_tools/*"=["CPY001"] "sklearn/_build_utils/*"=["CPY001"] "maint_tools/*"=["CPY001"] +".spin/*"=["CPY001"] ".github/*"=["CPY001"] # __doc__ is too long (>4096 chars) and therefore false positive on copyright check "examples/model_selection/plot_precision_recall.py"=["CPY001"]