diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e5a6018df4473..d5b92023fdff1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.3.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py index 4fffe57b96deb..c542349839178 100644 --- a/benchmarks/bench_20newsgroups.py +++ b/benchmarks/bench_20newsgroups.py @@ -28,7 +28,6 @@ # Data if __name__ == "__main__": - parser = argparse.ArgumentParser() parser.add_argument( "-e", "--estimators", nargs="+", required=True, choices=ESTIMATORS diff --git a/benchmarks/bench_feature_expansions.py b/benchmarks/bench_feature_expansions.py index 98fa17b99f47a..fd5a4f0ebccff 100644 --- a/benchmarks/bench_feature_expansions.py +++ b/benchmarks/bench_feature_expansions.py @@ -35,7 +35,6 @@ fig, axes = plt.subplots(nrows=len(densities), ncols=1, figsize=(8, 10)) for density, ax in zip(densities, axes): - ax.plot( dimensionalities, csr_times[density] / trials, diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py index 06ca4d1276e1c..c6c2a6f5fa117 100644 --- a/benchmarks/bench_glm.py +++ b/benchmarks/bench_glm.py @@ -10,7 +10,6 @@ if __name__ == "__main__": - import matplotlib.pyplot as plt n_iter = 40 @@ -22,7 +21,6 @@ dimensions = 500 * np.arange(1, n_iter + 1) for i in range(n_iter): - print("Iteration %s of %s" % (i, n_iter)) n_samples, n_features = 10 * i + 3, 10 * i + 3 diff --git a/benchmarks/bench_isolation_forest.py b/benchmarks/bench_isolation_forest.py index 968ecf20876ae..1c85cfb79d321 100644 --- a/benchmarks/bench_isolation_forest.py +++ b/benchmarks/bench_isolation_forest.py @@ -52,7 +52,6 @@ def print_outlier_ratio(y): # Loop over all datasets for fitting and scoring the estimator: for dat in datasets: - # Loading and vectorizing the data: print("====== %s ======" % dat) print("--- Fetching data...") diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py index e4eddf9cb745a..00721aa7f18a9 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py @@ -82,7 +82,6 @@ r_time = np.empty((len(n_compo_range), n_iter)) * np.nan # loop for j, n_components in enumerate(n_compo_range): - n_components = int(n_components) print("Performing kPCA with n_components = %i" % n_components) diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py index b6d82647012d5..a40ddea4506dd 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py @@ -83,7 +83,6 @@ # loop for j, n_samples in enumerate(n_samples_range): - n_samples = int(n_samples) print("Performing kPCA with n_samples = %i" % n_samples) diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py index c50bfc2e594d6..821fe303e8bf7 100644 --- a/benchmarks/bench_mnist.py +++ b/benchmarks/bench_mnist.py @@ -223,7 +223,6 @@ def load_data(dtype=np.float32, order="F"): ) print("-" * 60) for name in sorted(args["classifiers"], key=error.get): - print( "{0: <23} {1: >10.2f}s {2: >10.2f}s {3: >12.4f}".format( name, train_time[name], test_time[name], error[name] diff --git a/benchmarks/bench_online_ocsvm.py b/benchmarks/bench_online_ocsvm.py index c7eaefe082948..37af2fdd76562 100644 --- a/benchmarks/bench_online_ocsvm.py +++ b/benchmarks/bench_online_ocsvm.py @@ -65,7 +65,6 @@ def print_outlier_ratio(y): results_online = np.empty((len(datasets), n_axis + 5)) for dat, dataset_name in enumerate(datasets): - print(dataset_name) # Loading datasets @@ -133,7 +132,6 @@ def print_outlier_ratio(y): gamma = 1 / n_features # OCSVM default parameter for random_state in random_states: - print("random state: %s" % random_state) X, y = shuffle(X, y, random_state=random_state) diff --git a/benchmarks/bench_plot_fastkmeans.py b/benchmarks/bench_plot_fastkmeans.py index edbf9412deca2..1d420d1dabe5d 100644 --- a/benchmarks/bench_plot_fastkmeans.py +++ b/benchmarks/bench_plot_fastkmeans.py @@ -8,7 +8,6 @@ def compute_bench(samples_range, features_range): - it = 0 results = defaultdict(lambda: []) chunk = 100 diff --git a/benchmarks/bench_plot_hierarchical.py b/benchmarks/bench_plot_hierarchical.py index 856203259e8ee..861a0ea0b5296 100644 --- a/benchmarks/bench_plot_hierarchical.py +++ b/benchmarks/bench_plot_hierarchical.py @@ -8,7 +8,6 @@ def compute_bench(samples_range, features_range): - it = 0 results = defaultdict(lambda: []) diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py index 4373c70223976..c372ee07117fc 100644 --- a/benchmarks/bench_plot_lasso_path.py +++ b/benchmarks/bench_plot_lasso_path.py @@ -15,7 +15,6 @@ def compute_bench(samples_range, features_range): - it = 0 results = defaultdict(lambda: []) diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py index e1b71cb7309f7..a800b3ebe2ba9 100644 --- a/benchmarks/bench_plot_omp_lars.py +++ b/benchmarks/bench_plot_omp_lars.py @@ -14,7 +14,6 @@ def compute_bench(samples_range, features_range): - it = 0 results = dict() diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py index ecc1bbb92ce61..2020096a21b88 100644 --- a/benchmarks/bench_plot_randomized_svd.py +++ b/benchmarks/bench_plot_randomized_svd.py @@ -342,7 +342,6 @@ def scalable_frobenius_norm_discrepancy(X, U, s, V): def bench_a(X, dataset_name, power_iter, n_oversamples, n_comps): - all_time = defaultdict(list) if enable_spectral_norm: all_spectral = defaultdict(list) @@ -398,7 +397,6 @@ def bench_a(X, dataset_name, power_iter, n_oversamples, n_comps): def bench_b(power_list): - n_samples, n_features = 1000, 10000 data_params = { "n_samples": n_samples, diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py index 52d22f6a9c8a0..fc370d1073be1 100644 --- a/benchmarks/bench_plot_svd.py +++ b/benchmarks/bench_plot_svd.py @@ -13,7 +13,6 @@ def compute_bench(samples_range, features_range, n_iter=3, rank=50): - it = 0 results = defaultdict(lambda: []) diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py index 6e68073c93860..10baad5a8495f 100644 --- a/benchmarks/bench_sample_without_replacement.py +++ b/benchmarks/bench_sample_without_replacement.py @@ -105,47 +105,53 @@ def bench_sample(sampling, n_population, n_samples): ########################################################################### # Set Python core input - sampling_algorithm[ - "python-core-sample" - ] = lambda n_population, n_sample: random.sample(range(n_population), n_sample) + sampling_algorithm["python-core-sample"] = ( + lambda n_population, n_sample: random.sample(range(n_population), n_sample) + ) ########################################################################### # Set custom automatic method selection - sampling_algorithm[ - "custom-auto" - ] = lambda n_population, n_samples, random_state=None: sample_without_replacement( - n_population, n_samples, method="auto", random_state=random_state + sampling_algorithm["custom-auto"] = ( + lambda n_population, n_samples, random_state=None: sample_without_replacement( + n_population, n_samples, method="auto", random_state=random_state + ) ) ########################################################################### # Set custom tracking based method - sampling_algorithm[ - "custom-tracking-selection" - ] = lambda n_population, n_samples, random_state=None: sample_without_replacement( - n_population, n_samples, method="tracking_selection", random_state=random_state + sampling_algorithm["custom-tracking-selection"] = ( + lambda n_population, n_samples, random_state=None: sample_without_replacement( + n_population, + n_samples, + method="tracking_selection", + random_state=random_state, + ) ) ########################################################################### # Set custom reservoir based method - sampling_algorithm[ - "custom-reservoir-sampling" - ] = lambda n_population, n_samples, random_state=None: sample_without_replacement( - n_population, n_samples, method="reservoir_sampling", random_state=random_state + sampling_algorithm["custom-reservoir-sampling"] = ( + lambda n_population, n_samples, random_state=None: sample_without_replacement( + n_population, + n_samples, + method="reservoir_sampling", + random_state=random_state, + ) ) ########################################################################### # Set custom reservoir based method - sampling_algorithm[ - "custom-pool" - ] = lambda n_population, n_samples, random_state=None: sample_without_replacement( - n_population, n_samples, method="pool", random_state=random_state + sampling_algorithm["custom-pool"] = ( + lambda n_population, n_samples, random_state=None: sample_without_replacement( + n_population, n_samples, method="pool", random_state=random_state + ) ) ########################################################################### # Numpy permutation based - sampling_algorithm[ - "numpy-permutation" - ] = lambda n_population, n_sample: np.random.permutation(n_population)[:n_sample] + sampling_algorithm["numpy-permutation"] = ( + lambda n_population, n_sample: np.random.permutation(n_population)[:n_sample] + ) ########################################################################### # Remove unspecified algorithm diff --git a/benchmarks/bench_text_vectorizers.py b/benchmarks/bench_text_vectorizers.py index 4f40e87f74e14..6d75d57658500 100644 --- a/benchmarks/bench_text_vectorizers.py +++ b/benchmarks/bench_text_vectorizers.py @@ -45,7 +45,6 @@ def f(): [CountVectorizer, TfidfVectorizer, HashingVectorizer], [("word", (1, 1)), ("word", (1, 2)), ("char", (4, 4)), ("char_wb", (4, 4))], ): - bench = {"vectorizer": Vectorizer.__name__} params = {"analyzer": analyzer, "ngram_range": ngram_range} bench.update(params) diff --git a/benchmarks/bench_tree.py b/benchmarks/bench_tree.py index 1809cb7c5e9c0..c23ef627e237e 100644 --- a/benchmarks/bench_tree.py +++ b/benchmarks/bench_tree.py @@ -60,7 +60,6 @@ def bench_scikit_tree_regressor(X, Y): if __name__ == "__main__": - print("============================================") print("Warning: this is going to take a looong time") print("============================================") diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py index aa1a07a67ef44..e399e891cb94e 100644 --- a/benchmarks/bench_tsne_mnist.py +++ b/benchmarks/bench_tsne_mnist.py @@ -129,8 +129,7 @@ def sanitize(filename): try: from bhtsne.bhtsne import run_bh_tsne except ImportError as e: - raise ImportError( - """\ + raise ImportError("""\ If you want comparison with the reference implementation, build the binary from source (https://github.com/lvdmaaten/bhtsne) in the folder benchmarks/bhtsne and add an empty `__init__.py` file in the folder: @@ -140,8 +139,7 @@ def sanitize(filename): $ g++ sptree.cpp tsne.cpp tsne_main.cpp -o bh_tsne -O2 $ touch __init__.py $ cd .. -""" - ) from e +""") from e def bhtsne(X): """Wrapper for the reference lvdmaaten/bhtsne implementation.""" @@ -160,7 +158,6 @@ def bhtsne(X): methods.append(("lvdmaaten/bhtsne", bhtsne)) if args.profile: - try: from memory_profiler import profile except ImportError as e: diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py index 1ecb99626ad5a..d4da0db5be3c1 100644 --- a/build_tools/generate_authors_table.py +++ b/build_tools/generate_authors_table.py @@ -172,7 +172,6 @@ def generate_list(contributors): if __name__ == "__main__": - ( core_devs, emeritus, diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py index 396191fbe6a04..24cd93b17ca7c 100644 --- a/build_tools/update_environments_and_lock_files.py +++ b/build_tools/update_environments_and_lock_files.py @@ -98,8 +98,11 @@ def remove_from(alist, to_remove): "folder": "build_tools/azure", "platform": "osx-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies - + ["ccache", "compilers", "llvm-openmp"], + "conda_dependencies": common_dependencies + [ + "ccache", + "compilers", + "llvm-openmp", + ], "package_constraints": { "blas": "[build=mkl]", }, @@ -182,9 +185,11 @@ def remove_from(alist, to_remove): # sphinx in conda_dependencies as a temporary work-around for # https://github.com/conda-incubator/conda-lock/issues/309 "conda_dependencies": ["python", "ccache", "sphinx"], - "pip_dependencies": remove_from(common_dependencies, ["python", "blas"]) - + remove_from(docstring_test_dependencies, ["sphinx"]) - + ["lightgbm", "scikit-image"], + "pip_dependencies": ( + remove_from(common_dependencies, ["python", "blas"]) + + remove_from(docstring_test_dependencies, ["sphinx"]) + + ["lightgbm", "scikit-image"] + ), "package_constraints": { "python": "3.9", }, @@ -197,40 +202,44 @@ def remove_from(alist, to_remove): # sphinx in conda_dependencies as a temporary work-around for # https://github.com/conda-incubator/conda-lock/issues/309 "conda_dependencies": ["python", "ccache", "sphinx"], - "pip_dependencies": remove_from( - common_dependencies, - [ - "python", - "blas", - "matplotlib", - "pyamg", - # all the dependencies below have a development version - # installed in the CI, so they can be removed from the - # environment.yml - "numpy", - "scipy", - "pandas", - "cython", - "joblib", - "pillow", - ], - ) - + ["pooch"] - + remove_from(docstring_test_dependencies, ["sphinx"]) - # python-dateutil is a dependency of pandas and pandas is removed from - # the environment.yml. Adding python-dateutil so it is pinned - + ["python-dateutil"], + "pip_dependencies": ( + remove_from( + common_dependencies, + [ + "python", + "blas", + "matplotlib", + "pyamg", + # all the dependencies below have a development version + # installed in the CI, so they can be removed from the + # environment.yml + "numpy", + "scipy", + "pandas", + "cython", + "joblib", + "pillow", + ], + ) + + ["pooch"] + + remove_from(docstring_test_dependencies, ["sphinx"]) + # python-dateutil is a dependency of pandas and pandas is removed from + # the environment.yml. Adding python-dateutil so it is pinned + + ["python-dateutil"] + ), }, { "build_name": "pypy3", "folder": "build_tools/azure", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": ["pypy", "python"] - + remove_from( - common_dependencies_without_coverage, ["python", "pandas", "pillow"] - ) - + ["ccache"], + "conda_dependencies": ( + ["pypy", "python"] + + remove_from( + common_dependencies_without_coverage, ["python", "pandas", "pillow"] + ) + + ["ccache"] + ), "package_constraints": { "blas": "[build=openblas]", "python": "3.9", @@ -241,8 +250,10 @@ def remove_from(alist, to_remove): "folder": "build_tools/azure", "platform": "win-64", "channel": "conda-forge", - "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"]) - + ["wheel", "pip"], + "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"]) + [ + "wheel", + "pip", + ], "package_constraints": { "python": "3.8", "blas": "[build=mkl]", @@ -253,8 +264,7 @@ def remove_from(alist, to_remove): "folder": "build_tools/circle", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies_without_coverage - + [ + "conda_dependencies": common_dependencies_without_coverage + [ "scikit-image", "seaborn", "memory_profiler", @@ -288,8 +298,7 @@ def remove_from(alist, to_remove): "folder": "build_tools/circle", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies_without_coverage - + [ + "conda_dependencies": common_dependencies_without_coverage + [ "scikit-image", "seaborn", "memory_profiler", @@ -316,8 +325,7 @@ def remove_from(alist, to_remove): "channel": "conda-forge", "conda_dependencies": remove_from( common_dependencies_without_coverage, ["pandas", "pyamg"] - ) - + ["pip", "ccache"], + ) + ["pip", "ccache"], "package_constraints": { "python": "3.9", }, @@ -413,8 +421,7 @@ def get_package_with_constraint(package_name, build_metadata, uses_pip=False): def get_conda_environment_content(build_metadata): - template = environment.from_string( - """ + template = environment.from_string(""" # DO NOT EDIT: this file is generated from the specification found in the # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py @@ -430,8 +437,7 @@ def get_conda_environment_content(build_metadata): {% for pip_dep in build_metadata.get('pip_dependencies', []) %} - {{ pip_dep | get_package_with_constraint(build_metadata, uses_pip=True) }} {% endfor %} - {% endif %}""".strip() - ) + {% endif %}""".strip()) return template.render(build_metadata=build_metadata) @@ -478,15 +484,13 @@ def write_all_conda_lock_files(build_metadata_list): def get_pip_requirements_content(build_metadata): - template = environment.from_string( - """ + template = environment.from_string(""" # DO NOT EDIT: this file is generated from the specification found in the # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py {% for pip_dep in build_metadata['pip_dependencies'] %} {{ pip_dep | get_package_with_constraint(build_metadata, uses_pip=True) }} -{% endfor %}""".strip() - ) +{% endfor %}""".strip()) return template.render(build_metadata=build_metadata) diff --git a/doc/conf.py b/doc/conf.py index a326047cbec9f..70b496235b09a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -249,9 +249,9 @@ -1 ] latest_highlights = latest_highlights.with_suffix("").name -html_context[ - "release_highlights" -] = f"auto_examples/release_highlights/{latest_highlights}" +html_context["release_highlights"] = ( + f"auto_examples/release_highlights/{latest_highlights}" +) # get version from highlight name assuming highlights have the form # plot_release_highlights_0_22_0 @@ -569,9 +569,11 @@ def setup(app): # The following is used by sphinx.ext.linkcode to provide links to github linkcode_resolve = make_linkcode_resolve( "sklearn", - "https://github.com/scikit-learn/" - "scikit-learn/blob/{revision}/" - "{package}/{path}#L{lineno}", + ( + "https://github.com/scikit-learn/" + "scikit-learn/blob/{revision}/" + "{package}/{path}#L{lineno}" + ), ) warnings.filterwarnings( @@ -617,20 +619,32 @@ def setup(app): # ignore links to specific pdf pages because linkcheck does not handle them # ('utf-8' codec can't decode byte error) r"http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=.*", - "https://www.fordfoundation.org/media/2976/" - "roads-and-bridges-the-unseen-labor-behind-our-digital-infrastructure.pdf#page=.*", + ( + "https://www.fordfoundation.org/media/2976/roads-and-bridges" + "-the-unseen-labor-behind-our-digital-infrastructure.pdf#page=.*" + ), # links falsely flagged as broken - "https://www.researchgate.net/publication/" - "233096619_A_Dendrite_Method_for_Cluster_Analysis", - "https://www.researchgate.net/publication/221114584_Random_Fourier_Approximations_" - "for_Skewed_Multiplicative_Histogram_Kernels", - "https://www.researchgate.net/publication/4974606_" - "Hedonic_housing_prices_and_the_demand_for_clean_air", - "https://www.researchgate.net/profile/Anh-Huy-Phan/publication/220241471_Fast_" - "Local_Algorithms_for_Large_Scale_Nonnegative_Matrix_and_Tensor_Factorizations", + ( + "https://www.researchgate.net/publication/" + "233096619_A_Dendrite_Method_for_Cluster_Analysis" + ), + ( + "https://www.researchgate.net/publication/221114584_Random_Fourier" + "_Approximations_for_Skewed_Multiplicative_Histogram_Kernels" + ), + ( + "https://www.researchgate.net/publication/4974606_" + "Hedonic_housing_prices_and_the_demand_for_clean_air" + ), + ( + "https://www.researchgate.net/profile/Anh-Huy-Phan/publication/220241471_Fast_" + "Local_Algorithms_for_Large_Scale_Nonnegative_Matrix_and_Tensor_Factorizations" + ), "https://doi.org/10.13140/RG.2.2.35280.02565", - "https://www.microsoft.com/en-us/research/uploads/prod/2006/01/" - "Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf", + ( + "https://www.microsoft.com/en-us/research/uploads/prod/2006/01/" + "Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf" + ), "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-99-87.pdf", "https://microsoft.com/", "https://www.jstor.org/stable/2984099", diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst index 8ff924fa04a9f..d726269e665ba 100644 --- a/doc/developers/contributing.rst +++ b/doc/developers/contributing.rst @@ -254,7 +254,7 @@ how to set up your git repository: .. prompt:: bash $ - pip install pytest pytest-cov flake8 mypy numpydoc black==22.3.0 + pip install pytest pytest-cov flake8 mypy numpydoc black==23.3.0 .. _upstream: diff --git a/doc/sphinxext/allow_nan_estimators.py b/doc/sphinxext/allow_nan_estimators.py index 89af4bbee6670..e8f94506daaa5 100755 --- a/doc/sphinxext/allow_nan_estimators.py +++ b/doc/sphinxext/allow_nan_estimators.py @@ -45,7 +45,6 @@ def run(self): def setup(app): - app.add_directive("allow_nan_estimators", AllowNanEstimators) return { diff --git a/doc/sphinxext/sphinx_issues.py b/doc/sphinxext/sphinx_issues.py index aa33a6f38e762..5cd532319cbd7 100644 --- a/doc/sphinxext/sphinx_issues.py +++ b/doc/sphinxext/sphinx_issues.py @@ -75,7 +75,6 @@ def cve_role(name, rawtext, text, lineno, inliner, options=None, content=None): class IssueRole(object): - EXTERNAL_REPO_REGEX = re.compile(r"^(\w+)/(.+)([#@])([\w]+)$") def __init__( diff --git a/examples/applications/plot_digits_denoising.py b/examples/applications/plot_digits_denoising.py index cec8e342367c3..72637b6ab036f 100644 --- a/examples/applications/plot_digits_denoising.py +++ b/examples/applications/plot_digits_denoising.py @@ -135,8 +135,10 @@ def plot_digits(X, title): ) plot_digits( X_reconstructed_kernel_pca, - "Kernel PCA reconstruction\n" - f"MSE: {np.mean((X_test - X_reconstructed_kernel_pca) ** 2):.2f}", + ( + "Kernel PCA reconstruction\n" + f"MSE: {np.mean((X_test - X_reconstructed_kernel_pca) ** 2):.2f}" + ), ) # %% diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py index 721ba0159e692..212cbda9cc71e 100644 --- a/examples/applications/plot_out_of_core_classification.py +++ b/examples/applications/plot_out_of_core_classification.py @@ -298,7 +298,6 @@ def progress(cls_name, stats): # Main loop : iterate on mini-batches of examples for i, (X_train_text, y_train) in enumerate(minibatch_iterators): - tick = time.time() X_train = vectorizer.transform(X_train_text) total_vect_time += time.time() - tick diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py index b7f46c17fe549..91168f434338e 100644 --- a/examples/applications/plot_stock_market.py +++ b/examples/applications/plot_stock_market.py @@ -229,7 +229,6 @@ # Add a label to each node. The challenge here is that we want to # position the labels to avoid overlap with other labels for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): - dx = x - embedding[0] dx[index] = 1 dy = y - embedding[1] diff --git a/examples/covariance/plot_robust_vs_empirical_covariance.py b/examples/covariance/plot_robust_vs_empirical_covariance.py index 9cffa57beda0a..9111ec82bcbf3 100644 --- a/examples/covariance/plot_robust_vs_empirical_covariance.py +++ b/examples/covariance/plot_robust_vs_empirical_covariance.py @@ -82,7 +82,6 @@ # computation for i, n_outliers in enumerate(range_n_outliers): for j in range(repeat): - rng = np.random.RandomState(i * j) # generate data diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py index f8868a7003e4c..4f57b90019e94 100644 --- a/examples/ensemble/plot_bias_variance.py +++ b/examples/ensemble/plot_bias_variance.py @@ -185,7 +185,6 @@ def generate(n_samples, noise, n_repeat=1): plt.ylim([0, 0.1]) if n == n_estimators - 1: - plt.legend(loc=(1.1, 0.5)) plt.subplots_adjust(right=0.75) diff --git a/examples/gaussian_process/plot_gpr_noisy.py b/examples/gaussian_process/plot_gpr_noisy.py index e15c9a6470d38..b76fc745e7df7 100644 --- a/examples/gaussian_process/plot_gpr_noisy.py +++ b/examples/gaussian_process/plot_gpr_noisy.py @@ -103,8 +103,10 @@ def target_generator(X, add_noise=False): plt.xlabel("X") plt.ylabel("y") _ = plt.title( - f"Initial: {kernel}\nOptimum: {gpr.kernel_}\nLog-Marginal-Likelihood: " - f"{gpr.log_marginal_likelihood(gpr.kernel_.theta)}", + ( + f"Initial: {kernel}\nOptimum: {gpr.kernel_}\nLog-Marginal-Likelihood: " + f"{gpr.log_marginal_likelihood(gpr.kernel_.theta)}" + ), fontsize=8, ) # %% @@ -132,8 +134,10 @@ def target_generator(X, add_noise=False): plt.xlabel("X") plt.ylabel("y") _ = plt.title( - f"Initial: {kernel}\nOptimum: {gpr.kernel_}\nLog-Marginal-Likelihood: " - f"{gpr.log_marginal_likelihood(gpr.kernel_.theta)}", + ( + f"Initial: {kernel}\nOptimum: {gpr.kernel_}\nLog-Marginal-Likelihood: " + f"{gpr.log_marginal_likelihood(gpr.kernel_.theta)}" + ), fontsize=8, ) diff --git a/examples/impute/plot_missing_values.py b/examples/impute/plot_missing_values.py index ca800ba3602b2..f6350ad2544dd 100644 --- a/examples/impute/plot_missing_values.py +++ b/examples/impute/plot_missing_values.py @@ -156,7 +156,6 @@ def get_full_score(X_full, y_full): def get_impute_zero_score(X_missing, y_missing): - imputer = SimpleImputer( missing_values=np.nan, add_indicator=True, strategy="constant", fill_value=0 ) diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index 7572f146d6363..43404b356d829 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -271,8 +271,10 @@ ) print(f"done in {time() - tic:.3f}s") _ = display.figure_.suptitle( - "Partial dependence of the number of bike rentals\n" - "for the bike rental dataset with an MLPRegressor", + ( + "Partial dependence of the number of bike rentals\n" + "for the bike rental dataset with an MLPRegressor" + ), fontsize=16, ) @@ -323,8 +325,10 @@ ) print(f"done in {time() - tic:.3f}s") _ = display.figure_.suptitle( - "Partial dependence of the number of bike rentals\n" - "for the bike rental dataset with a gradient boosting", + ( + "Partial dependence of the number of bike rentals\n" + "for the bike rental dataset with a gradient boosting" + ), fontsize=16, ) diff --git a/examples/kernel_approximation/plot_scalable_poly_kernels.py b/examples/kernel_approximation/plot_scalable_poly_kernels.py index ade27e16e349a..b2792c41c959f 100644 --- a/examples/kernel_approximation/plot_scalable_poly_kernels.py +++ b/examples/kernel_approximation/plot_scalable_poly_kernels.py @@ -120,11 +120,9 @@ N_COMPONENTS = [250, 500, 1000, 2000] for n_components in N_COMPONENTS: - ps_lsvm_time = 0 ps_lsvm_score = 0 for _ in range(n_runs): - pipeline = make_pipeline( PolynomialCountSketch(n_components=n_components, degree=4), LinearSVC(), diff --git a/examples/linear_model/plot_quantile_regression.py b/examples/linear_model/plot_quantile_regression.py index 69b268a4955cc..de03edb809ae2 100644 --- a/examples/linear_model/plot_quantile_regression.py +++ b/examples/linear_model/plot_quantile_regression.py @@ -262,16 +262,14 @@ y_pred_lr = linear_regression.fit(X, y_pareto).predict(X) y_pred_qr = quantile_regression.fit(X, y_pareto).predict(X) -print( - f"""Training error (in-sample performance) +print(f"""Training error (in-sample performance) {linear_regression.__class__.__name__}: MAE = {mean_absolute_error(y_pareto, y_pred_lr):.3f} MSE = {mean_squared_error(y_pareto, y_pred_lr):.3f} {quantile_regression.__class__.__name__}: MAE = {mean_absolute_error(y_pareto, y_pred_qr):.3f} MSE = {mean_squared_error(y_pareto, y_pred_qr):.3f} - """ -) + """) # %% # On the training set, we see that MAE is lower for @@ -301,16 +299,14 @@ cv=3, scoring=["neg_mean_absolute_error", "neg_mean_squared_error"], ) -print( - f"""Test error (cross-validated performance) +print(f"""Test error (cross-validated performance) {linear_regression.__class__.__name__}: MAE = {-cv_results_lr["test_neg_mean_absolute_error"].mean():.3f} MSE = {-cv_results_lr["test_neg_mean_squared_error"].mean():.3f} {quantile_regression.__class__.__name__}: MAE = {-cv_results_qr["test_neg_mean_absolute_error"].mean():.3f} MSE = {-cv_results_qr["test_neg_mean_squared_error"].mean():.3f} - """ -) + """) # %% # We reach similar conclusions on the out-of-sample evaluation. diff --git a/examples/miscellaneous/plot_outlier_detection_bench.py b/examples/miscellaneous/plot_outlier_detection_bench.py index f2a4921a590f0..781fa515f50e8 100644 --- a/examples/miscellaneous/plot_outlier_detection_bench.py +++ b/examples/miscellaneous/plot_outlier_detection_bench.py @@ -40,7 +40,6 @@ def preprocess_dataset(dataset_name): - # loading and vectorization print(f"Loading {dataset_name} data") if dataset_name in ["http", "smtp", "SA", "SF"]: @@ -123,7 +122,6 @@ def preprocess_dataset(dataset_name): def compute_prediction(X, model_name): - print(f"Computing {model_name} prediction...") if model_name == "LOF": clf = LocalOutlierFactor(n_neighbors=20, contamination="auto") diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py index 01a2962f3fe2f..8894498430bd2 100644 --- a/examples/model_selection/plot_likelihood_ratios.py +++ b/examples/model_selection/plot_likelihood_ratios.py @@ -197,7 +197,6 @@ def extract_score(cv_results): fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(15, 12)) for ax, (n, weight) in zip(axs.ravel(), enumerate(weights)): - X, y = make_classification( **common_params, weights=[weight, 1 - weight], @@ -251,7 +250,6 @@ def scoring_on_bootstrap(estimator, X, y, rng, n_bootstrap=100): for prevalence, X, y in zip( populations["prevalence"], populations["X"], populations["y"] ): - results_for_prevalence = scoring_on_bootstrap( estimator, X, y, rng, n_bootstrap=n_bootstrap ) diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py index 81d89d93afe91..b6f45255e8a09 100644 --- a/examples/model_selection/plot_nested_cross_validation_iris.py +++ b/examples/model_selection/plot_nested_cross_validation_iris.py @@ -70,7 +70,6 @@ # Loop for each trial for i in range(NUM_TRIALS): - # Choose cross-validation techniques for the inner and outer loops, # independently of the dataset. # E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc. diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py index 7d0ad474e53c0..34346780def26 100644 --- a/examples/model_selection/plot_roc.py +++ b/examples/model_selection/plot_roc.py @@ -324,7 +324,6 @@ mean_tpr = dict() for ix, (label_a, label_b) in enumerate(pair_list): - a_mask = y_test == label_a b_mask = y_test == label_b ab_mask = np.logical_or(a_mask, b_mask) diff --git a/examples/neighbors/approximate_nearest_neighbors.py b/examples/neighbors/approximate_nearest_neighbors.py index 8b73fa28b7a6e..ee848cdc66428 100644 --- a/examples/neighbors/approximate_nearest_neighbors.py +++ b/examples/neighbors/approximate_nearest_neighbors.py @@ -155,7 +155,6 @@ def load_mnist(n_samples): ] for dataset_name, (X, y) in datasets: - msg = f"Benchmarking on {dataset_name}:" print(f"\n{msg}\n" + str("-" * len(msg))) @@ -243,7 +242,6 @@ def load_mnist(n_samples): i_ax = 0 for dataset_name, (X, y) in datasets: - msg = f"Benchmarking on {dataset_name}:" print(f"\n{msg}\n" + str("-" * len(msg))) diff --git a/examples/neighbors/plot_nca_classification.py b/examples/neighbors/plot_nca_classification.py index 17e6a667fcb3b..a08bbe8be3756 100644 --- a/examples/neighbors/plot_nca_classification.py +++ b/examples/neighbors/plot_nca_classification.py @@ -65,7 +65,6 @@ ] for name, clf in zip(names, classifiers): - clf.fit(X_train, y_train) score = clf.score(X_test, y_test) diff --git a/examples/preprocessing/plot_discretization_strategies.py b/examples/preprocessing/plot_discretization_strategies.py index d6b9cb16ee53c..32184afd16161 100644 --- a/examples/preprocessing/plot_discretization_strategies.py +++ b/examples/preprocessing/plot_discretization_strategies.py @@ -58,7 +58,6 @@ figure = plt.figure(figsize=(14, 9)) i = 1 for ds_cnt, X in enumerate(X_list): - ax = plt.subplot(len(X_list), len(strategies) + 1, i) ax.scatter(X[:, 0], X[:, 1], edgecolors="k") if ds_cnt == 0: diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py index 7d4a348cad9b6..3a1f533c8a281 100644 --- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py +++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py @@ -115,8 +115,10 @@ n_labeled_points += len(uncertainty_index) f.suptitle( - "Active learning with Label Propagation.\nRows show 5 most " - "uncertain labels to learn with the next model.", + ( + "Active learning with Label Propagation.\nRows show 5 most " + "uncertain labels to learn with the next model." + ), y=1.15, ) plt.subplots_adjust(left=0.2, bottom=0.03, right=0.9, top=0.9, wspace=0.2, hspace=0.85) diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py index 43cef2544721c..e7e4f6c05fdcd 100644 --- a/examples/svm/plot_svm_margin.py +++ b/examples/svm/plot_svm_margin.py @@ -32,7 +32,6 @@ # fit the model for name, penalty in (("unreg", 1), ("reg", 0.05)): - clf = svm.SVC(kernel="linear", C=penalty) clf.fit(X, Y) diff --git a/examples/svm/plot_svm_tie_breaking.py b/examples/svm/plot_svm_tie_breaking.py index e12460b494c02..93148225b0bb3 100644 --- a/examples/svm/plot_svm_tie_breaking.py +++ b/examples/svm/plot_svm_tie_breaking.py @@ -28,7 +28,6 @@ titles = ("break_ties = False", "break_ties = True") for break_ties, title, ax in zip((False, True), titles, sub.flatten()): - svm = SVC( kernel="linear", C=1, break_ties=break_ties, decision_function_shape="ovr" ).fit(X, y) diff --git a/examples/text/plot_document_classification_20newsgroups.py b/examples/text/plot_document_classification_20newsgroups.py index f86e2a534b6ec..ffee60087d0c3 100644 --- a/examples/text/plot_document_classification_20newsgroups.py +++ b/examples/text/plot_document_classification_20newsgroups.py @@ -93,7 +93,6 @@ def load_dataset(verbose=False, remove=()): feature_names = vectorizer.get_feature_names_out() if verbose: - # compute size of loaded data data_train_size_mb = size_mb(data_train.data) data_test_size_mb = size_mb(data_test.data) diff --git a/maint_tools/check_pxd_in_installation.py b/maint_tools/check_pxd_in_installation.py index e6f64c86a3383..ac1a8f9627a95 100644 --- a/maint_tools/check_pxd_in_installation.py +++ b/maint_tools/check_pxd_in_installation.py @@ -37,9 +37,7 @@ # We set the language to c++ and we use numpy.get_include() because # some modules require it. with open(tmpdir / "setup_tst.py", "w") as f: - f.write( - textwrap.dedent( - """ + f.write(textwrap.dedent(""" from setuptools import setup, Extension from Cython.Build import cythonize import numpy @@ -50,9 +48,7 @@ include_dirs=[numpy.get_include()])] setup(ext_modules=cythonize(extensions)) - """ - ) - ) + """)) subprocess.run( ["python", "setup_tst.py", "build_ext", "-i"], check=True, cwd=tmpdir diff --git a/sklearn/__check_build/__init__.py b/sklearn/__check_build/__init__.py index a52290962f975..3895a0e430082 100644 --- a/sklearn/__check_build/__init__.py +++ b/sklearn/__check_build/__init__.py @@ -28,8 +28,7 @@ def raise_build_error(e): dir_content.append(filename.ljust(26)) else: dir_content.append(filename + "\n") - raise ImportError( - """%s + raise ImportError("""%s ___________________________________________________________________________ Contents of %s: %s @@ -39,9 +38,7 @@ def raise_build_error(e): If you have installed scikit-learn from source, please do not forget to build the package before using it: run `python setup.py install` or `make` in the source directory. -%s""" - % (e, local_dir, "".join(dir_content).strip(), msg) - ) +%s""" % (e, local_dir, "".join(dir_content).strip(), msg)) try: diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py index b31b4c4f67078..f84dfa09a9f94 100644 --- a/sklearn/_build_utils/__init__.py +++ b/sklearn/_build_utils/__init__.py @@ -97,7 +97,6 @@ def gen_from_templates(templates): os.path.exists(outfile) and os.stat(template).st_mtime < os.stat(outfile).st_mtime ): - with open(template, "r") as f: tmpl = f.read() diff --git a/sklearn/_build_utils/openmp_helpers.py b/sklearn/_build_utils/openmp_helpers.py index ed9bf0ea3eea0..9172d40830bb9 100644 --- a/sklearn/_build_utils/openmp_helpers.py +++ b/sklearn/_build_utils/openmp_helpers.py @@ -38,8 +38,7 @@ def check_openmp_support(): # Pyodide doesn't support OpenMP return False - code = textwrap.dedent( - """\ + code = textwrap.dedent("""\ #include #include int main(void) { @@ -47,8 +46,7 @@ def check_openmp_support(): printf("nthreads=%d\\n", omp_get_num_threads()); return 0; } - """ - ) + """) extra_preargs = os.getenv("LDFLAGS", None) if extra_preargs is not None: @@ -96,8 +94,7 @@ def check_openmp_support(): "Failed to build scikit-learn with OpenMP support" ) from openmp_exception else: - message = textwrap.dedent( - """ + message = textwrap.dedent(""" *********** * WARNING * @@ -120,8 +117,7 @@ def check_openmp_support(): parallelism. *** - """ - ) + """) warnings.warn(message) return openmp_supported diff --git a/sklearn/_build_utils/pre_build_helpers.py b/sklearn/_build_utils/pre_build_helpers.py index 2c0e5ef3ada47..c1d50abd3ae0c 100644 --- a/sklearn/_build_utils/pre_build_helpers.py +++ b/sklearn/_build_utils/pre_build_helpers.py @@ -64,12 +64,10 @@ def basic_check_build(): # The following check won't work in pyodide return - code = textwrap.dedent( - """\ + code = textwrap.dedent("""\ #include int main(void) { return 0; } - """ - ) + """) compile_test_program(code) diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index aa24bb4b9a762..ed8272f433580 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -38,7 +38,7 @@ "pytest": (PYTEST_MIN_VERSION, "tests"), "pytest-cov": ("2.9.0", "tests"), "flake8": ("3.8.2", "tests"), - "black": ("22.3.0", "tests"), + "black": ("23.3.0", "tests"), "mypy": ("0.961", "tests"), "pyamg": ("4.0.0", "tests"), "sphinx": ("4.0.1", "docs"), diff --git a/sklearn/base.py b/sklearn/base.py index 379c3143a8e43..7456750ea1157 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -239,9 +239,11 @@ def set_params(self, **params): and self.__module__.startswith("sklearn.") ): warnings.warn( - f"Parameter 'base_estimator' of {self.__class__.__name__} is" - " deprecated in favor of 'estimator'. See" - f" {self.__class__.__name__}'s docstring for more details.", + ( + f"Parameter 'base_estimator' of {self.__class__.__name__} is" + " deprecated in favor of 'estimator'. See" + f" {self.__class__.__name__}'s docstring for more details." + ), FutureWarning, stacklevel=2, ) @@ -1035,8 +1037,8 @@ class _UnstableArchMixin: def _more_tags(self): return { - "non_deterministic": ( - _IS_32BIT or platform.machine().startswith(("ppc", "powerpc")) + "non_deterministic": _IS_32BIT or platform.machine().startswith( + ("ppc", "powerpc") ) } diff --git a/sklearn/calibration.py b/sklearn/calibration.py index e4d46555f3761..36394a925309e 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -314,8 +314,10 @@ def fit(self, X, y, sample_weight=None, **fit_params): "`estimator` since `base_estimator` is deprecated." ) warnings.warn( - "`base_estimator` was renamed to `estimator` in version 1.2 and " - "will be removed in 1.4.", + ( + "`base_estimator` was renamed to `estimator` in version 1.2 and " + "will be removed in 1.4." + ), FutureWarning, ) estimator = self.base_estimator diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index 586b6c2c905a4..8a3c2c2acde62 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -141,8 +141,10 @@ def _affinity_propagation( if K > 0: if never_converged: warnings.warn( - "Affinity propagation did not converge, this model " - "may return degenerate cluster centers and labels.", + ( + "Affinity propagation did not converge, this model " + "may return degenerate cluster centers and labels." + ), ConvergenceWarning, ) c = np.argmax(S[:, I], axis=1) @@ -161,8 +163,10 @@ def _affinity_propagation( labels = np.searchsorted(cluster_centers_indices, labels) else: warnings.warn( - "Affinity propagation did not converge and this model " - "will not have any cluster centers.", + ( + "Affinity propagation did not converge and this model " + "will not have any cluster centers." + ), ConvergenceWarning, ) labels = np.array([-1] * n_samples) @@ -453,7 +457,6 @@ def __init__( verbose=False, random_state=None, ): - self.damping = damping self.max_iter = max_iter self.convergence_iter = convergence_iter @@ -557,9 +560,11 @@ def predict(self, X): return pairwise_distances_argmin(X, self.cluster_centers_) else: warnings.warn( - "This model does not have any cluster centers " - "because affinity propagation did not converge. " - "Labeling every sample as '-1'.", + ( + "This model does not have any cluster centers " + "because affinity propagation did not converge. " + "Labeling every sample as '-1'." + ), ConvergenceWarning, ) return np.array([-1] * X.shape[0]) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 27dd0641f023c..77ac981934468 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -275,12 +275,14 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False): if n_clusters is not None: warnings.warn( - "Partial build of the tree is implemented " - "only for structured clustering (i.e. with " - "explicit connectivity). The algorithm " - "will build the full tree and only " - "retain the lower branches required " - "for the specified number of clusters", + ( + "Partial build of the tree is implemented " + "only for structured clustering (i.e. with " + "explicit connectivity). The algorithm " + "will build the full tree and only " + "retain the lower branches required " + "for the specified number of clusters" + ), stacklevel=2, ) X = np.require(X, requirements="W") @@ -507,12 +509,14 @@ def linkage_tree( if n_clusters is not None: warnings.warn( - "Partial build of the tree is implemented " - "only for structured clustering (i.e. with " - "explicit connectivity). The algorithm " - "will build the full tree and only " - "retain the lower branches required " - "for the specified number of clusters", + ( + "Partial build of the tree is implemented " + "only for structured clustering (i.e. with " + "explicit connectivity). The algorithm " + "will build the full tree and only " + "retain the lower branches required " + "for the specified number of clusters" + ), stacklevel=2, ) @@ -541,7 +545,6 @@ def linkage_tree( and not callable(affinity) and affinity in METRIC_MAPPING ): - # We need the fast cythonized metric from neighbors dist_metric = DistanceMetric.get_metric(affinity) @@ -995,8 +998,10 @@ def _fit(self, X): " 1.4. To avoid this error, only set the `metric` attribute." ) warnings.warn( - "Attribute `affinity` was deprecated in version 1.2 and will be removed" - " in 1.4. Use `metric` instead", + ( + "Attribute `affinity` was deprecated in version 1.2 and will be" + " removed in 1.4. Use `metric` instead" + ), FutureWarning, ) self._metric = self.affinity diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py index d6c419a1f650d..fc2b38cc1bca9 100644 --- a/sklearn/cluster/_bisect_k_means.py +++ b/sklearn/cluster/_bisect_k_means.py @@ -226,7 +226,6 @@ def __init__( algorithm="lloyd", bisecting_strategy="biggest_inertia", ): - super().__init__( n_clusters=n_clusters, init=init, diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index 42aafcf3ce733..5c85ba405aa70 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -888,9 +888,11 @@ def _check_params_vs_input(self, X, default_n_init=None): self._n_init = self.n_init if self._n_init == "warn": warnings.warn( - "The default value of `n_init` will change from " - f"{default_n_init} to 'auto' in 1.4. Set the value of `n_init`" - " explicitly to suppress the warning", + ( + "The default value of `n_init` will change from " + f"{default_n_init} to 'auto' in 1.4. Set the value of `n_init`" + " explicitly to suppress the warning" + ), FutureWarning, ) self._n_init = default_n_init @@ -902,9 +904,11 @@ def _check_params_vs_input(self, X, default_n_init=None): if _is_arraylike_not_scalar(self.init) and self._n_init != 1: warnings.warn( - "Explicit initial center position passed: performing only" - f" one init in {self.__class__.__name__} instead of " - f"n_init={self._n_init}.", + ( + "Explicit initial center position passed: performing only" + f" one init in {self.__class__.__name__} instead of " + f"n_init={self._n_init}." + ), RuntimeWarning, stacklevel=2, ) @@ -1101,8 +1105,10 @@ def predict(self, X, sample_weight="deprecated"): X = self._check_test_data(X) if not (isinstance(sample_weight, str) and sample_weight == "deprecated"): warnings.warn( - "'sample_weight' was deprecated in version 1.3 and " - "will be removed in 1.5.", + ( + "'sample_weight' was deprecated in version 1.3 and " + "will be removed in 1.5." + ), FutureWarning, ) sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) @@ -1415,15 +1421,19 @@ def _check_params_vs_input(self, X): self._algorithm = self.algorithm if self._algorithm in ("auto", "full"): warnings.warn( - f"algorithm='{self._algorithm}' is deprecated, it will be " - "removed in 1.3. Using 'lloyd' instead.", + ( + f"algorithm='{self._algorithm}' is deprecated, it will be " + "removed in 1.3. Using 'lloyd' instead." + ), FutureWarning, ) self._algorithm = "lloyd" if self._algorithm == "elkan" and self.n_clusters == 1: warnings.warn( - "algorithm='elkan' doesn't make sense for a single " - "cluster. Using 'lloyd' instead.", + ( + "algorithm='elkan' doesn't make sense for a single " + "cluster. Using 'lloyd' instead." + ), RuntimeWarning, ) self._algorithm = "lloyd" @@ -1907,7 +1917,6 @@ def __init__( n_init="warn", reassignment_ratio=0.01, ): - super().__init__( n_clusters=n_clusters, init=init, @@ -1937,9 +1946,11 @@ def _check_params_vs_input(self, X): self._init_size = 3 * self.n_clusters elif self._init_size < self.n_clusters: warnings.warn( - f"init_size={self._init_size} should be larger than " - f"n_clusters={self.n_clusters}. Setting it to " - "min(3*n_clusters, n_samples)", + ( + f"init_size={self._init_size} should be larger than " + f"n_clusters={self.n_clusters}. Setting it to " + "min(3*n_clusters, n_samples)" + ), RuntimeWarning, stacklevel=2, ) diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index 0dd5fb7f7daad..0f1c66ada2d4e 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -619,8 +619,10 @@ def compute_optics_graph( ) if np.all(np.isinf(reachability_)): warnings.warn( - "All reachability values are inf. Set a larger" - " max_eps or all data will be considered outliers.", + ( + "All reachability values are inf. Set a larger" + " max_eps or all data will be considered outliers." + ), UserWarning, ) return ordering, core_distances_, reachability_, predecessor_ diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py index 8fdd47300b2d9..e0ab7da938bfd 100644 --- a/sklearn/cluster/_spectral.py +++ b/sklearn/cluster/_spectral.py @@ -778,6 +778,8 @@ def fit_predict(self, X, y=None): def _more_tags(self): return { - "pairwise": self.affinity - in ["precomputed", "precomputed_nearest_neighbors"] + "pairwise": self.affinity in [ + "precomputed", + "precomputed_nearest_neighbors", + ] } diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py index d04e9dba4fade..0a68e97d6fb22 100644 --- a/sklearn/cluster/tests/test_bicluster.py +++ b/sklearn/cluster/tests/test_bicluster.py @@ -105,7 +105,6 @@ def test_spectral_biclustering(global_random_seed): for mat in (S, csr_matrix(S)): for param_name, param_values in non_default_params.items(): for param_value in param_values: - model = SpectralBiclustering( n_clusters=3, n_init=3, @@ -253,7 +252,6 @@ def test_spectralbiclustering_parameter_validation(params, type_err, err_msg): @pytest.mark.parametrize("est", (SpectralBiclustering(), SpectralCoclustering())) def test_n_features_in_(est): - X, _, _ = make_biclusters((3, 3), 3, random_state=0) assert not hasattr(est, "n_features_in_") diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py index 77a11619531f2..f31a5a49b641e 100644 --- a/sklearn/compose/_target.py +++ b/sklearn/compose/_target.py @@ -188,10 +188,12 @@ def _fit_transformer(self, y): y_sel_t = self.transformer_.transform(y_sel) if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)): warnings.warn( - "The provided functions or transformer are" - " not strictly inverse of each other. If" - " you are sure you want to proceed regardless" - ", set 'check_inverse=False'", + ( + "The provided functions or transformer are" + " not strictly inverse of each other. If" + " you are sure you want to proceed regardless" + ", set 'check_inverse=False'" + ), UserWarning, ) diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index 942ffc45f87eb..a2d1cef0412ba 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -803,7 +803,6 @@ def test_column_transformer_get_feature_names(): def test_column_transformer_special_strings(): - # one 'drop' -> ignore X_array = np.array([[0.0, 1.0, 2.0], [2.0, 4.0, 6.0]]).T ct = ColumnTransformer([("trans1", Trans(), [0]), ("trans2", "drop", [1])]) diff --git a/sklearn/conftest.py b/sklearn/conftest.py index f2cd5dafa17c2..4fea5668b552d 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -153,7 +153,6 @@ def pytest_collection_modifyitems(config, items): item.name.endswith("GradientBoostingClassifier") and platform.machine() == "aarch64" ): - marker = pytest.mark.xfail( reason=( "know failure. See " diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py index 97a6cfb1d738d..213f3d7e8f04b 100644 --- a/sklearn/covariance/tests/test_robust_covariance.py +++ b/sklearn/covariance/tests/test_robust_covariance.py @@ -58,7 +58,6 @@ def test_mcd_class_on_invalid_input(): def launch_mcd_on_dataset( n_samples, n_features, n_outliers, tol_loc, tol_cov, tol_support, seed ): - rand_gen = np.random.RandomState(seed) data = rand_gen.randn(n_samples, n_features) # add some outliers diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index bf3456791e660..dce2c23db2087 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -501,10 +501,12 @@ def coef_(self): # update the docstring of `coef_` and `intercept_` attribute if hasattr(self, "_coef_") and getattr(self, "_coef_warning", True): warnings.warn( - "The attribute `coef_` will be transposed in version 1.3 to be " - "consistent with other linear models in scikit-learn. Currently, " - "`coef_` has a shape of (n_features, n_targets) and in the future it " - "will have a shape of (n_targets, n_features).", + ( + "The attribute `coef_` will be transposed in version 1.3 to be" + " consistent with other linear models in scikit-learn. Currently," + " `coef_` has a shape of (n_features, n_targets) and in the future" + " it will have a shape of (n_targets, n_features)." + ), FutureWarning, ) # Only warn the first time diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py index 7731fe84b8421..465d4159a32c4 100644 --- a/sklearn/datasets/__init__.py +++ b/sklearn/datasets/__init__.py @@ -103,8 +103,7 @@ def __getattr__(name): if name == "load_boston": - msg = textwrap.dedent( - """ + msg = textwrap.dedent(""" `load_boston` has been removed from scikit-learn since version 1.2. The Boston housing prices dataset has an ethical problem: as @@ -151,8 +150,7 @@ def __getattr__(name): "Hedonic housing prices and the demand for clean air." Journal of environmental economics and management 5.1 (1978): 81-102. - """ - ) + """) raise ImportError(msg) try: return globals()[name] diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py index cee95d740a3c2..653cb6d1a0580 100644 --- a/sklearn/datasets/_openml.py +++ b/sklearn/datasets/_openml.py @@ -966,12 +966,14 @@ def fetch_openml( # TODO(1.4): remove this warning parser = "liac-arff" warn( - "The default value of `parser` will change from `'liac-arff'` to " - "`'auto'` in 1.4. You can set `parser='auto'` to silence this " - "warning. Therefore, an `ImportError` will be raised from 1.4 if " - "the dataset is dense and pandas is not installed. Note that the pandas " - "parser may return different data types. See the Notes Section in " - "fetch_openml's API doc for details.", + ( + "The default value of `parser` will change from `'liac-arff'` to" + " `'auto'` in 1.4. You can set `parser='auto'` to silence this warning." + " Therefore, an `ImportError` will be raised from 1.4 if the dataset is" + " dense and pandas is not installed. Note that the pandas parser may" + " return different data types. See the Notes Section in fetch_openml's" + " API doc for details." + ), FutureWarning, ) @@ -1007,9 +1009,11 @@ def fetch_openml( # TODO(1.4): In version 1.4, we will raise an error instead of # a warning. warn( - "From version 1.4, `parser='auto'` with `as_frame=False` " - "will use pandas. Either install pandas or set explicitely " - "`parser='liac-arff'` to preserve the current behavior.", + ( + "From version 1.4, `parser='auto'` with `as_frame=False` " + "will use pandas. Either install pandas or set explicitely " + "`parser='liac-arff'` to preserve the current behavior." + ), FutureWarning, ) parser_ = "liac-arff" diff --git a/sklearn/datasets/tests/test_arff_parser.py b/sklearn/datasets/tests/test_arff_parser.py index 3b8e7ac069c86..8465289d187ee 100644 --- a/sklearn/datasets/tests/test_arff_parser.py +++ b/sklearn/datasets/tests/test_arff_parser.py @@ -83,9 +83,7 @@ def test_pandas_arff_parser_strip_single_quotes(parser_func): """Check that we properly strip single quotes from the data.""" pd = pytest.importorskip("pandas") - arff_file = BytesIO( - textwrap.dedent( - """ + arff_file = BytesIO(textwrap.dedent(""" @relation 'toy' @attribute 'cat_single_quote' {'A', 'B', 'C'} @attribute 'str_single_quote' string @@ -93,9 +91,7 @@ def test_pandas_arff_parser_strip_single_quotes(parser_func): @attribute 'class' numeric @data 'A','some text','\"expect double quotes\"',0 - """ - ).encode("utf-8") - ) + """).encode("utf-8")) columns_info = { "cat_single_quote": { @@ -154,9 +150,7 @@ def test_pandas_arff_parser_strip_double_quotes(parser_func): """Check that we properly strip double quotes from the data.""" pd = pytest.importorskip("pandas") - arff_file = BytesIO( - textwrap.dedent( - """ + arff_file = BytesIO(textwrap.dedent(""" @relation 'toy' @attribute 'cat_double_quote' {"A", "B", "C"} @attribute 'str_double_quote' string @@ -164,9 +158,7 @@ def test_pandas_arff_parser_strip_double_quotes(parser_func): @attribute 'class' numeric @data "A","some text","\'expect double quotes\'",0 - """ - ).encode("utf-8") - ) + """).encode("utf-8")) columns_info = { "cat_double_quote": { @@ -225,9 +217,7 @@ def test_pandas_arff_parser_strip_no_quotes(parser_func): """Check that we properly parse with no quotes characters.""" pd = pytest.importorskip("pandas") - arff_file = BytesIO( - textwrap.dedent( - """ + arff_file = BytesIO(textwrap.dedent(""" @relation 'toy' @attribute 'cat_without_quote' {A, B, C} @attribute 'str_without_quote' string @@ -235,9 +225,7 @@ def test_pandas_arff_parser_strip_no_quotes(parser_func): @attribute 'class' numeric @data A,some text,'internal' quote,0 - """ - ).encode("utf-8") - ) + """).encode("utf-8")) columns_info = { "cat_without_quote": { diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py index d2bf1ce4262fc..99220fbfb2da6 100644 --- a/sklearn/datasets/tests/test_openml.py +++ b/sklearn/datasets/tests/test_openml.py @@ -187,6 +187,7 @@ def _mock_urlopen(request, *args, **kwargs): ############################################################################### # Test the behaviour of `fetch_openml` depending of the input parameters. + # Known failure of PyPy for OpenML. See the following issue: # https://github.com/scikit-learn/scikit-learn/issues/18906 @fails_if_pypy @@ -974,6 +975,7 @@ def test_fetch_openml_types_inference( ############################################################################### # Test some more specific behaviour + # TODO(1.4): remove this filterwarning decorator @pytest.mark.filterwarnings("ignore:The default value of `parser` will change") @pytest.mark.parametrize( @@ -1210,8 +1212,10 @@ def test_fetch_openml_inactive(monkeypatch, gzip_response, dataset_params): 40945, {"data_id": 40945, "as_frame": False}, ValueError, - "STRING attributes are not supported for array representation. Try" - " as_frame=True", + ( + "STRING attributes are not supported for array representation. Try" + " as_frame=True" + ), ), ( 2, @@ -1636,6 +1640,7 @@ def test_fetch_openml_quotechar_escapechar(monkeypatch): ############################################################################### # Deprecation-changed parameters + # TODO(1.4): remove this test def test_fetch_openml_deprecation_parser(monkeypatch): """Check that we raise a deprecation warning for parser parameter.""" diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index 4c87d8f8414fc..1d7311e3097ef 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -53,7 +53,6 @@ def test_load_svmlight_file(): (1, 12, -3), (2, 20, 27), ): - assert X[i, j] == val # tests X's zero values @@ -133,7 +132,6 @@ def test_load_svmlight_file_n_features(): # test X's non-zero values for i, j, val in ((0, 2, 2.5), (0, 10, -5.2), (1, 5, 1.0), (1, 12, -3)): - assert X[i, j] == val # 21 features in file diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py index 7d70be20adb53..d22062d1ae396 100644 --- a/sklearn/decomposition/_dict_learning.py +++ b/sklearn/decomposition/_dict_learning.py @@ -1754,7 +1754,6 @@ def __init__( positive_dict=False, transform_max_iter=1000, ): - super().__init__( transform_algorithm, transform_n_nonzero_coefs, @@ -2133,7 +2132,6 @@ def __init__( tol=1e-3, max_no_improvement=10, ): - super().__init__( transform_algorithm, transform_n_nonzero_coefs, @@ -2347,10 +2345,12 @@ def fit(self, X, y=None): if self.n_iter != "deprecated": warnings.warn( - "'n_iter' is deprecated in version 1.1 and will be removed " - "in version 1.4. Use 'max_iter' and let 'n_iter' to its default " - "value instead. 'n_iter' is also ignored if 'max_iter' is " - "specified.", + ( + "'n_iter' is deprecated in version 1.1 and will be removed " + "in version 1.4. Use 'max_iter' and let 'n_iter' to its default " + "value instead. 'n_iter' is also ignored if 'max_iter' is " + "specified." + ), FutureWarning, ) n_iter = self.n_iter @@ -2378,7 +2378,6 @@ def fit(self, X, y=None): self._B = np.zeros((n_features, self._n_components), dtype=X_train.dtype) if self.max_iter is not None: - # Attributes to monitor the convergence self._ewa_cost = None self._ewa_cost_min = None diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py index bb0c4ccdd78ea..1df6a6e10706c 100644 --- a/sklearn/decomposition/_fastica.py +++ b/sklearn/decomposition/_fastica.py @@ -121,8 +121,10 @@ def _ica_par(X, tol, g, fun_args, max_iter, w_init): break else: warnings.warn( - "FastICA did not converge. Consider increasing " - "tolerance or the maximum number of iterations.", + ( + "FastICA did not converge. Consider increasing " + "tolerance or the maximum number of iterations." + ), ConvergenceWarning, ) @@ -556,9 +558,11 @@ def _fit_transform(self, X, compute_sources=False): if self._whiten is True: warnings.warn( - "Starting in v1.3, whiten=True should be specified as " - "whiten='arbitrary-variance' (its current behaviour). This " - "behavior is deprecated in 1.1 and will raise ValueError in 1.3.", + ( + "Starting in v1.3, whiten=True should be specified as " + "whiten='arbitrary-variance' (its current behaviour). This " + "behavior is deprecated in 1.1 and will raise ValueError in 1.3." + ), FutureWarning, stacklevel=2, ) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 65efcb7e1a65d..f88d9add951f0 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1527,10 +1527,12 @@ def _check_params(self, X): ) if self.solver == "mu" and self.init == "nndsvd": warnings.warn( - "The multiplicative update ('mu') solver cannot update " - "zeros present in the initialization, and so leads to " - "poorer results when used jointly with init='nndsvd'. " - "You may try init='nndsvda' or init='nndsvdar' instead.", + ( + "The multiplicative update ('mu') solver cannot update " + "zeros present in the initialization, and so leads to " + "poorer results when used jointly with init='nndsvd'. " + "You may try init='nndsvda' or init='nndsvdar' instead." + ), UserWarning, ) @@ -1938,7 +1940,6 @@ def __init__( random_state=None, verbose=0, ): - super().__init__( n_components=n_components, init=init, @@ -2238,7 +2239,6 @@ def _fit_transform(self, X, W=None, H=None, update_H=True): n_steps = self.max_iter * n_steps_per_iter for i, batch in zip(range(n_steps), batches): - batch_cost = self._minibatch_step(X[batch], W[batch], H, update_H) if update_H and self._minibatch_convergence( @@ -2256,8 +2256,10 @@ def _fit_transform(self, X, W=None, H=None, update_H=True): if n_iter == self.max_iter and self.tol > 0: warnings.warn( - f"Maximum number of iterations {self.max_iter} reached. " - "Increase it to improve convergence.", + ( + f"Maximum number of iterations {self.max_iter} reached. " + "Increase it to improve convergence." + ), ConvergenceWarning, ) diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 961c6aa4de9a5..25f910e8419f4 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -168,10 +168,12 @@ def fit(self, X, y, sample_weight=None): if self._strategy == "uniform" and sp.issparse(y): y = y.toarray() warnings.warn( - "A local copy of the target data has been converted " - "to a numpy array. Predicting on sparse target data " - "with the uniform strategy would not save memory " - "and would be slower.", + ( + "A local copy of the target data has been converted " + "to a numpy array. Predicting on sparse target data " + "with the uniform strategy would not save memory " + "and would be slower." + ), UserWarning, ) diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py index c4fb966aacac3..bad6dcfb033ec 100644 --- a/sklearn/ensemble/_bagging.py +++ b/sklearn/ensemble/_bagging.py @@ -741,7 +741,6 @@ def __init__( verbose=0, base_estimator="deprecated", ): - super().__init__( estimator=estimator, n_estimators=n_estimators, diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py index fa1041799afcf..32725d65f90bd 100644 --- a/sklearn/ensemble/_base.py +++ b/sklearn/ensemble/_base.py @@ -159,8 +159,10 @@ def _validate_estimator(self, default=None): self.estimator_ = self.estimator elif self.base_estimator not in [None, "deprecated"]: warnings.warn( - "`base_estimator` was renamed to `estimator` in version 1.2 and " - "will be removed in 1.4.", + ( + "`base_estimator` was renamed to `estimator` in version 1.2 and " + "will be removed in 1.4." + ), FutureWarning, ) self.estimator_ = self.base_estimator diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index 19203da4fce1f..de729264e35c0 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -357,9 +357,11 @@ def fit(self, X, y, sample_weight=None): y = np.atleast_1d(y) if y.ndim == 2 and y.shape[1] == 1: warn( - "A column-vector y was passed when a 1d array was" - " expected. Please change the shape of y to " - "(n_samples,), for example using ravel().", + ( + "A column-vector y was passed when a 1d array was" + " expected. Please change the shape of y to " + "(n_samples,), for example using ravel()." + ), DataConversionWarning, stacklevel=2, ) @@ -570,9 +572,11 @@ def _compute_oob_predictions(self, X, y): for k in range(n_outputs): if (n_oob_pred == 0).any(): warn( - "Some inputs do not have OOB scores. This probably means " - "too few trees were used to compute any reliable OOB " - "estimates.", + ( + "Some inputs do not have OOB scores. This probably means " + "too few trees were used to compute any reliable OOB " + "estimates." + ), UserWarning, ) n_oob_pred[n_oob_pred == 0] = 1 diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 33448c280715b..21f2e9701ad1a 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -174,7 +174,6 @@ def __init__( n_iter_no_change=None, tol=1e-4, ): - self.n_estimators = n_estimators self.learning_rate = learning_rate self.loss = loss @@ -598,7 +597,6 @@ def _fit_stages( # perform boosting iterations i = begin_at_stage for i in range(begin_at_stage, self.n_estimators): - # subsampling if do_oob: sample_mask = _random_sample_mask(n_samples, n_inbag, random_state) @@ -1189,7 +1187,6 @@ def __init__( tol=1e-4, ccp_alpha=0.0, ): - super().__init__( loss=loss, learning_rate=learning_rate, @@ -1743,7 +1740,6 @@ def __init__( tol=1e-4, ccp_alpha=0.0, ): - super().__init__( loss=loss, learning_rate=learning_rate, diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index b35f37d4d7252..976335ea684d0 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -632,7 +632,6 @@ def fit(self, X, y, sample_weight=None): ) for iteration in range(begin_at_stage, self.max_iter): - if self.verbose: iteration_start_time = time() print( diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py index c4669da4a60a9..b8c0c17969e99 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py @@ -242,7 +242,6 @@ def __init__( shrinkage=1.0, n_threads=None, ): - self._validate_parameters( X_binned, min_gain_to_split, @@ -564,7 +563,6 @@ def split_next(self): should_split_left = not left_child_node.is_leaf should_split_right = not right_child_node.is_leaf if should_split_left or should_split_right: - # We will compute the histograms of both nodes even if one of them # is a leaf, since computing the second histogram is very cheap # (using histogram subtraction). diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py index a697d385140d5..6bd5b38d5a4ee 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py @@ -180,7 +180,6 @@ def test_same_predictions_classification( np.testing.assert_almost_equal(acc_lightgbm, acc_sklearn) if max_leaf_nodes < 10 and n_samples >= 1000: - pred_lightgbm = est_lightgbm.predict(X_test) pred_sklearn = est_sklearn.predict(X_test) assert np.mean(pred_sklearn == pred_lightgbm) > 0.89 @@ -263,7 +262,6 @@ def test_same_predictions_multiclass_classification( np.testing.assert_allclose(acc_lightgbm, acc_sklearn, rtol=0, atol=5e-2) if max_leaf_nodes < 10 and n_samples >= 1000: - pred_lightgbm = est_lightgbm.predict(X_test) pred_sklearn = est_sklearn.predict(X_test) assert np.mean(pred_sklearn == pred_lightgbm) > 0.89 diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 33f39f3dbf584..94d8960b6e813 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -80,7 +80,6 @@ def _make_dumb_dataset(n_samples): ], ) def test_init_parameters_validation(GradientBoosting, X, y, params, err_msg): - with pytest.raises(ValueError, match=err_msg): GradientBoosting(**params).fit(X, y) @@ -100,7 +99,6 @@ def test_init_parameters_validation(GradientBoosting, X, y, params, err_msg): def test_early_stopping_regression( scoring, validation_fraction, early_stopping, n_iter_no_change, tol ): - max_iter = 200 X, y = make_regression(n_samples=50, random_state=0) @@ -148,7 +146,6 @@ def test_early_stopping_regression( def test_early_stopping_classification( data, scoring, validation_fraction, early_stopping, n_iter_no_change, tol ): - max_iter = 50 X, y = data @@ -208,7 +205,6 @@ def test_early_stopping_default(GradientBoosting, X, y): ], ) def test_should_stop(scores, n_iter_no_change, tol, stopping): - gbdt = HistGradientBoostingClassifier(n_iter_no_change=n_iter_no_change, tol=tol) assert gbdt._should_stop(scores) == stopping @@ -417,8 +413,10 @@ def test_missing_values_trivial(): @pytest.mark.parametrize("problem", ("classification", "regression")) @pytest.mark.parametrize( - "missing_proportion, expected_min_score_classification, " - "expected_min_score_regression", + ( + "missing_proportion, expected_min_score_classification, " + "expected_min_score_regression" + ), [(0.1, 0.97, 0.89), (0.2, 0.93, 0.81), (0.5, 0.79, 0.52)], ) def test_missing_values_resilience( @@ -908,7 +906,6 @@ def test_custom_loss(Est, loss, X, y): ], ) def test_staged_predict(HistGradientBoosting, X, y): - # Test whether staged predictor eventually gives # the same prediction. X_train, X_test, y_train, y_test = train_test_split( @@ -932,7 +929,6 @@ def test_staged_predict(HistGradientBoosting, X, y): else ["predict", "predict_proba", "decision_function"] ) for method_name in method_names: - staged_method = getattr(gb, "staged_" + method_name) staged_predictions = list(staged_method(X_test)) assert len(staged_predictions) == gb.n_iter_ diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py index c4ae90b7e7d96..f3380fbf2af6d 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py @@ -323,7 +323,6 @@ def test_max_depth(max_depth): def test_input_validation(): - X_binned, all_gradients, all_hessians = _make_training_data() X_binned_float = X_binned.astype(np.float32) @@ -340,7 +339,6 @@ def test_input_validation(): def test_init_parameters_validation(): X_binned, all_gradients, all_hessians = _make_training_data() with pytest.raises(ValueError, match="min_gain_to_split=-1 must be positive"): - TreeGrower(X_binned, all_gradients, all_hessians, min_gain_to_split=-1) with pytest.raises(ValueError, match="min_hessian_to_split=-1 must be positive"): diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py index d1da34015a2a4..255d13bb08456 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py @@ -381,8 +381,10 @@ def test_min_gain_to_split(): @pytest.mark.parametrize( - "X_binned, all_gradients, has_missing_values, n_bins_non_missing, " - " expected_split_on_nan, expected_bin_idx, expected_go_to_left", + ( + "X_binned, all_gradients, has_missing_values, n_bins_non_missing, " + " expected_split_on_nan, expected_bin_idx, expected_go_to_left" + ), [ # basic sanity check with no missing values: given the gradient # values, the split must occur on bin_idx=3 @@ -672,8 +674,10 @@ def _assert_categories_equals_bitset(categories, bitset): @pytest.mark.parametrize( - "X_binned, all_gradients, expected_categories_left, n_bins_non_missing," - "missing_values_bin_idx, has_missing_values, expected_missing_go_to_left", + ( + "X_binned, all_gradients, expected_categories_left, n_bins_non_missing," + "missing_values_bin_idx, has_missing_values, expected_missing_go_to_left" + ), [ # 4 categories ( diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index 99ea4acfa1d8b..97600b0b116dc 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -78,7 +78,6 @@ def __init__( random_state=None, base_estimator="deprecated", ): - super().__init__( estimator=estimator, n_estimators=n_estimators, @@ -177,9 +176,11 @@ def fit(self, X, y, sample_weight=None): if not np.isfinite(sample_weight_sum): warnings.warn( - "Sample weights have reached infinite values," - f" at iteration {iboost}, causing overflow. " - "Iterations stopped. Try lowering the learning rate.", + ( + "Sample weights have reached infinite values," + f" at iteration {iboost}, causing overflow. " + "Iterations stopped. Try lowering the learning rate." + ), stacklevel=2, ) break @@ -496,7 +497,6 @@ def __init__( random_state=None, base_estimator="deprecated", ): - super().__init__( estimator=estimator, n_estimators=n_estimators, @@ -1077,7 +1077,6 @@ def __init__( random_state=None, base_estimator="deprecated", ): - super().__init__( estimator=estimator, n_estimators=n_estimators, diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index 542c98cb6954d..f6311e8c459d4 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -212,7 +212,6 @@ def fit(self, X, y): X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) for params in parameter_sets: - # Trained on sparse format sparse_classifier = BaggingRegressor( estimator=CustomSVR(), random_state=1, **params diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index a1b5b4a7692dd..56db8b3c7fbf5 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -552,7 +552,6 @@ def test_none_estimator_with_weights(X, y, voter): ids=["VotingRegressor", "VotingClassifier"], ) def test_n_features_in(est): - X = [[1, 2], [3, 4], [5, 6]] y = [0, 1, 2] @@ -581,7 +580,6 @@ def test_n_features_in(est): ], ) def test_voting_verbose(estimator, capsys): - X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) y = np.array([1, 1, 2, 2]) diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py index 52489e7da55be..5a89062e7de19 100644 --- a/sklearn/feature_extraction/tests/test_image.py +++ b/sklearn/feature_extraction/tests/test_image.py @@ -282,7 +282,6 @@ def test_patch_extractor_color(orange_face): def test_extract_patches_strided(): - image_shapes_1D = [(10,), (10,), (11,), (10,)] patch_sizes_1D = [(1,), (2,), (3,), (8,)] patch_steps_1D = [(1,), (1,), (4,), (2,)] diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 70aa6e7714149..d14a86d8f675e 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -1502,8 +1502,10 @@ def analyzer(doc): "Vectorizer", [CountVectorizer, HashingVectorizer, TfidfVectorizer] ) @pytest.mark.parametrize( - "stop_words, tokenizer, preprocessor, ngram_range, token_pattern," - "analyzer, unused_name, ovrd_name, ovrd_msg", + ( + "stop_words, tokenizer, preprocessor, ngram_range, token_pattern," + "analyzer, unused_name, ovrd_name, ovrd_msg" + ), [ ( ["you've", "you'll"], @@ -1585,7 +1587,6 @@ def test_unused_parameters_warn( ovrd_name, ovrd_msg, ): - train_data = JUNK_FOOD_DOCS # setting parameter and checking for corresponding warning messages vect = Vectorizer() diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 9d56d2d1bf2dd..21863d75eff2f 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -447,7 +447,6 @@ def build_analyzer(self): ) elif self.analyzer == "char_wb": - return partial( _analyze, ngrams=self._char_wb_ngrams, @@ -524,7 +523,6 @@ def _validate_ngram_range(self): ) def _warn_for_unused_params(self): - if self.tokenizer is not None and self.token_pattern is not None: warnings.warn( "The parameter 'token_pattern' will not be used" @@ -2002,7 +2000,6 @@ def __init__( smooth_idf=True, sublinear_tf=False, ): - super().__init__( input=input, encoding=encoding, diff --git a/sklearn/feature_selection/_base.py b/sklearn/feature_selection/_base.py index 356f1a48c5567..1af3d10c986de 100644 --- a/sklearn/feature_selection/_base.py +++ b/sklearn/feature_selection/_base.py @@ -101,8 +101,10 @@ def _transform(self, X): mask = self.get_support() if not mask.any(): warnings.warn( - "No features were selected: either the data is" - " too noisy or the selection test too strict.", + ( + "No features were selected: either the data is" + " too noisy or the selection test too strict." + ), UserWarning, ) if hasattr(X, "iloc"): diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py index 2498cd53b39f6..55468c5a219e3 100644 --- a/sklearn/feature_selection/_sequential.py +++ b/sklearn/feature_selection/_sequential.py @@ -177,7 +177,6 @@ def __init__( cv=5, n_jobs=None, ): - self.estimator = estimator self.n_features_to_select = n_features_to_select self.tol = tol @@ -210,13 +209,15 @@ def fit(self, X, y=None): if self.n_features_to_select in ("warn", None): # for backwards compatibility warnings.warn( - "Leaving `n_features_to_select` to " - "None is deprecated in 1.0 and will become 'auto' " - "in 1.3. To keep the same behaviour as with None " - "(i.e. select half of the features) and avoid " - "this warning, you should manually set " - "`n_features_to_select='auto'` and set tol=None " - "when creating an instance.", + ( + "Leaving `n_features_to_select` to " + "None is deprecated in 1.0 and will become 'auto' " + "in 1.3. To keep the same behaviour as with None " + "(i.e. select half of the features) and avoid " + "this warning, you should manually set " + "`n_features_to_select='auto'` and set tol=None " + "when creating an instance." + ), FutureWarning, ) diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py index 1cdd636c24f9e..18e23d105b8bb 100644 --- a/sklearn/feature_selection/_univariate_selection.py +++ b/sklearn/feature_selection/_univariate_selection.py @@ -969,6 +969,7 @@ def _get_support_mask(self): # Generic filter ###################################################################### + # TODO this class should fit on either p-values or scores, # depending on the mode. class GenericUnivariateSelect(_BaseFilter): diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index c0a8dc71b7352..3a657c4bf28f7 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -324,9 +324,11 @@ def obj_func(theta, eval_gradient=True): self.L_ = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False) except np.linalg.LinAlgError as exc: exc.args = ( - f"The kernel, {self.kernel_}, is not returning a positive " - "definite matrix. Try gradually increasing the 'alpha' " - "parameter of your GaussianProcessRegressor estimator.", + ( + f"The kernel, {self.kernel_}, is not returning a positive " + "definite matrix. Try gradually increasing the 'alpha' " + "parameter of your GaussianProcessRegressor estimator." + ), ) + exc.args raise # Alg 2.1, page 19, line 3 -> alpha = L^T \ (L \ y) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index ca776ac68d595..1fbb92497a34e 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -299,7 +299,6 @@ def __init__( self.copy = copy def _validate_input(self, X, in_fit): - if self.strategy in ("most_frequent", "constant"): # If input is a list of strings, dtype = object. # Otherwise ValueError is raised in SimpleImputer @@ -380,10 +379,12 @@ def fit(self, X, y=None): self._validate_params() if self.verbose != "deprecated": warnings.warn( - "The 'verbose' parameter was deprecated in version " - "1.1 and will be removed in 1.3. A warning will " - "always be raised upon the removal of empty columns " - "in the future version.", + ( + "The 'verbose' parameter was deprecated in version " + "1.1 and will be removed in 1.3. A warning will " + "always be raised upon the removal of empty columns " + "in the future version." + ), FutureWarning, ) @@ -684,8 +685,8 @@ def inverse_transform(self, X): def _more_tags(self): return { - "allow_nan": ( - _is_pandas_na(self.missing_values) or is_scalar_nan(self.missing_values) + "allow_nan": _is_pandas_na(self.missing_values) or is_scalar_nan( + self.missing_values ) } diff --git a/sklearn/impute/tests/test_impute.py b/sklearn/impute/tests/test_impute.py index 8851d10b0f14c..5d0df0c877151 100644 --- a/sklearn/impute/tests/test_impute.py +++ b/sklearn/impute/tests/test_impute.py @@ -110,7 +110,6 @@ def test_imputation_deletion_warning(strategy): @pytest.mark.parametrize("strategy", ["mean", "median", "most_frequent"]) def test_imputation_deletion_warning_feature_names(strategy): - pd = pytest.importorskip("pandas") missing_values = np.nan diff --git a/sklearn/impute/tests/test_knn.py b/sklearn/impute/tests/test_knn.py index 8a489ab23701f..80ee1d0c2b574 100644 --- a/sklearn/impute/tests/test_knn.py +++ b/sklearn/impute/tests/test_knn.py @@ -227,7 +227,6 @@ def test_knn_imputer_verify(na): @pytest.mark.parametrize("na", [np.nan, -1]) def test_knn_imputer_one_n_neighbors(na): - X = np.array([[0, 0], [na, 2], [4, 3], [5, na], [7, 7], [na, 8], [14, 13]]) X_imputed = np.array([[0, 0], [4, 2], [4, 3], [5, 3], [7, 7], [7, 8], [14, 13]]) @@ -255,7 +254,6 @@ def test_knn_imputer_all_samples_are_neighbors(na): @pytest.mark.parametrize("na", [np.nan, -1]) def test_knn_imputer_weight_uniform(na): - X = np.array([[0, 0], [na, 2], [4, 3], [5, 6], [7, 7], [9, 8], [11, 10]]) # Test with "uniform" weight (or unweighted) @@ -431,7 +429,6 @@ def test_knn_imputer_weight_distance(na): def test_knn_imputer_callable_metric(): - # Define callable metric that returns the l1 norm: def custom_callable(x, y, missing_values=np.nan, squared=False): x = np.ma.array(x, mask=np.isnan(x)) @@ -457,7 +454,6 @@ def custom_callable(x, y, missing_values=np.nan, squared=False): # for a small dataset. However, it should raise a UserWarning that we ignore. @pytest.mark.filterwarnings("ignore:adhere to working_memory") def test_knn_imputer_with_simple_example(na, working_memory): - X = np.array( [ [0, na, 0, na], diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py index 9e10977af2581..a1a9b6915a17a 100644 --- a/sklearn/inspection/_partial_dependence.py +++ b/sklearn/inspection/_partial_dependence.py @@ -139,7 +139,6 @@ def _partial_dependence_recursion(est, grid, features): def _partial_dependence_brute(est, grid, features, X, response_method): - predictions = [] averaged_predictions = [] diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py index eb6fd78f628ee..dd00932bf9a96 100644 --- a/sklearn/inspection/_plot/partial_dependence.py +++ b/sklearn/inspection/_plot/partial_dependence.py @@ -1236,16 +1236,20 @@ def plot( # FIXME: remove in 1.3 if self.pdp_lim != "deprecated": warnings.warn( - "The `pdp_lim` parameter is deprecated in version 1.1 and will be " - "removed in version 1.3. Provide `pdp_lim` to the `plot` method." - "instead.", + ( + "The `pdp_lim` parameter is deprecated in version 1.1 and will be " + "removed in version 1.3. Provide `pdp_lim` to the `plot` method." + "instead." + ), FutureWarning, ) if pdp_lim is not None and self.pdp_lim != pdp_lim: warnings.warn( - "`pdp_lim` has been passed in both the constructor and the `plot` " - "method. For backward compatibility, the parameter from the " - "constructor will be used.", + ( + "`pdp_lim` has been passed in both the constructor and the" + " `plot` method. For backward compatibility, the parameter from" + " the constructor will be used." + ), UserWarning, ) pdp_lim = self.pdp_lim diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py index 97b1b98e3db93..09885e4b8d6d4 100644 --- a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py +++ b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py @@ -212,8 +212,10 @@ def test_decision_boundary_display(pyplot, fitted_clf, response_method, plot_met ), ( "auto", - "MyClassifier has none of the following attributes: decision_function, " - "predict_proba, predict", + ( + "MyClassifier has none of the following attributes: decision_function, " + "predict_proba, predict" + ), ), ( "bad_method", diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py index 52389519d6c00..8e55d44a435bd 100644 --- a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py +++ b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py @@ -21,8 +21,10 @@ # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( - "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" - "matplotlib.*", + ( + "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" + "matplotlib.*" + ), ) diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py index cfc0683ca094f..b79b4dcad457f 100644 --- a/sklearn/kernel_approximation.py +++ b/sklearn/kernel_approximation.py @@ -1002,7 +1002,6 @@ def __init__( random_state=None, n_jobs=None, ): - self.kernel = kernel self.gamma = gamma self.coef0 = coef0 diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index c0cace3fad9fa..e8e907906efc3 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -114,12 +114,14 @@ def _deprecate_normalize(normalize, estimator_name): ) elif not normalize: warnings.warn( - "'normalize' was deprecated in version 1.2 and will be " - "removed in 1.4. " - "Please leave the normalize parameter to its default value to " - "silence this warning. The default behavior of this estimator " - "is to not do any normalization. If normalization is needed " - "please use sklearn.preprocessing.StandardScaler instead.", + ( + "'normalize' was deprecated in version 1.2 and will be " + "removed in 1.4. " + "Please leave the normalize parameter to its default value to " + "silence this warning. The default behavior of this estimator " + "is to not do any normalization. If normalization is needed " + "please use sklearn.preprocessing.StandardScaler instead." + ), FutureWarning, ) @@ -825,8 +827,10 @@ def _pre_fit( and not np.allclose(X_scale, np.ones(n_features)) ): warnings.warn( - "Gram matrix was provided but X was centered to fit " - "intercept, or X was normalized : recomputing Gram matrix.", + ( + "Gram matrix was provided but X was centered to fit " + "intercept, or X was normalized : recomputing Gram matrix." + ), UserWarning, ) # recompute Gram diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 22d76162db936..887c6a3ebcbbc 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -50,8 +50,10 @@ def _deprecate_n_iter(n_iter, max_iter): " 1.5. To avoid this error, only set the `max_iter` attribute." ) warnings.warn( - "'n_iter' was renamed to 'max_iter' in version 1.3 and " - "will be removed in 1.5", + ( + "'n_iter' was renamed to 'max_iter' in version 1.3 and " + "will be removed in 1.5" + ), FutureWarning, ) max_iter = n_iter @@ -337,7 +339,6 @@ def fit(self, X, y, sample_weight=None): # Convergence loop of the bayesian ridge regression for iter_ in range(max_iter): - # update posterior mean coef_ based on alpha_ and lambda_ and # compute corresponding rmse coef_, rmse_ = self._update_coef_( diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index 7d1c9759466e8..7a0410cc388a2 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -891,9 +891,11 @@ def fit(self, X, y, sample_weight=None, check_input=True): if self.alpha == 0: warnings.warn( - "With alpha=0, this algorithm does not converge " - "well. You are advised to use the LinearRegression " - "estimator", + ( + "With alpha=0, this algorithm does not converge " + "well. You are advised to use the LinearRegression " + "estimator" + ), stacklevel=2, ) diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py index 6a2985a870e9c..68d08d2e7a21b 100644 --- a/sklearn/linear_model/_glm/_newton_solver.py +++ b/sklearn/linear_model/_glm/_newton_solver.py @@ -285,9 +285,11 @@ def line_search(self, X, y, sample_weight): t *= beta else: warnings.warn( - f"Line search of Newton solver {self.__class__.__name__} at iteration " - f"#{self.iteration} did no converge after 21 line search refinement " - "iterations. It will now resort to lbfgs instead.", + ( + f"Line search of Newton solver {self.__class__.__name__} at" + f" iteration #{self.iteration} did no converge after 21 line search" + " refinement iterations. It will now resort to lbfgs instead." + ), ConvergenceWarning, ) if self.verbose: @@ -418,8 +420,10 @@ def solve(self, X, y, sample_weight): self.fallback_lbfgs_solve(X=X, y=y, sample_weight=sample_weight) else: warnings.warn( - f"Newton solver did not converge after {self.iteration - 1} " - "iterations.", + ( + f"Newton solver did not converge after {self.iteration - 1} " + "iterations." + ), ConvergenceWarning, ) @@ -459,9 +463,11 @@ def update_gradient_hessian(self, X, y, sample_weight): def inner_solve(self, X, y, sample_weight): if self.hessian_warning: warnings.warn( - f"The inner solver of {self.__class__.__name__} detected a " - "pointwise hessian with many negative values at iteration " - f"#{self.iteration}. It will now resort to lbfgs instead.", + ( + f"The inner solver of {self.__class__.__name__} detected a " + "pointwise hessian with many negative values at iteration " + f"#{self.iteration}. It will now resort to lbfgs instead." + ), ConvergenceWarning, ) if self.verbose: diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index 35af5dab30267..4be8bb730a0ae 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -587,7 +587,6 @@ def _lars_path_solver( if n_iter >= max_iter or n_active >= n_features: break if not drop: - ########################################################## # Append x_j to the Cholesky factorization of (Xa * Xa') # # # @@ -776,7 +775,6 @@ def _lars_path_solver( # See if any coefficient has changed sign if drop and method == "lasso": - # handle the case when idx is not length of 1 for ii in idx: arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii) diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 861e716a531d9..a00004ae17676 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -48,7 +48,6 @@ def _check_solver(solver, penalty, dual): - # TODO(1.4): Remove "none" option if solver not in ["liblinear", "saga"] and penalty not in ("l2", "none", None): raise ValueError( @@ -484,7 +483,11 @@ def _logistic_regression_path( w0 = sol.solve(X=X, y=target, sample_weight=sample_weight) n_iter_i = sol.iteration elif solver == "liblinear": - coef_, intercept_, n_iter_i, = _fit_liblinear( + ( + coef_, + intercept_, + n_iter_i, + ) = _fit_liblinear( X, target, C, @@ -1110,7 +1113,6 @@ def __init__( n_jobs=None, l1_ratio=None, ): - self.penalty = penalty self.dual = dual self.tol = tol @@ -1174,8 +1176,10 @@ def fit(self, X, y, sample_weight=None): # TODO(1.4): Remove "none" option if self.penalty == "none": warnings.warn( - "`penalty='none'`has been deprecated in 1.2 and will be removed in 1.4." - " To keep the past behaviour, set `penalty=None`.", + ( + "`penalty='none'`has been deprecated in 1.2 and will be removed in" + " 1.4. To keep the past behaviour, set `penalty=None`." + ), FutureWarning, ) @@ -1944,7 +1948,6 @@ def fit(self, X, y, sample_weight=None): for index, (cls, encoded_label) in enumerate( zip(iter_classes, iter_encoded_labels) ): - if multi_class == "ovr": scores = self.scores_[cls] coefs_paths = self.coefs_paths_[cls] diff --git a/sklearn/linear_model/_quantile.py b/sklearn/linear_model/_quantile.py index d26f060c4d8e8..081e3da5b51b7 100644 --- a/sklearn/linear_model/_quantile.py +++ b/sklearn/linear_model/_quantile.py @@ -184,9 +184,11 @@ def fit(self, X, y, sample_weight=None): if self.solver == "warn": warnings.warn( - "The default solver will change from 'interior-point' to 'highs' in " - "version 1.4. Set `solver='highs'` or to the desired solver to silence " - "this warning.", + ( + "The default solver will change from 'interior-point' to 'highs' in" + " version 1.4. Set `solver='highs'` or to the desired solver to" + " silence this warning." + ), FutureWarning, ) solver = "interior-point" diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py index 472f2275ffa6b..0e11a47b442a6 100644 --- a/sklearn/linear_model/_ransac.py +++ b/sklearn/linear_model/_ransac.py @@ -283,7 +283,6 @@ def __init__( random_state=None, base_estimator="deprecated", ): - self.estimator = estimator self.min_samples = min_samples self.residual_threshold = residual_threshold @@ -342,8 +341,10 @@ def fit(self, X, y, sample_weight=None): if self.base_estimator != "deprecated": warnings.warn( - "`base_estimator` was renamed to `estimator` in version 1.1 and " - "will be removed in 1.3.", + ( + "`base_estimator` was renamed to `estimator` in version 1.1 and " + "will be removed in 1.3." + ), FutureWarning, ) self.estimator = self.base_estimator @@ -540,10 +541,12 @@ def fit(self, X, y, sample_weight=None): + self.n_skips_invalid_model_ ) > self.max_skips: warnings.warn( - "RANSAC found a valid consensus set but exited" - " early due to skipping more iterations than" - " `max_skips`. See estimator attributes for" - " diagnostics (n_skips*).", + ( + "RANSAC found a valid consensus set but exited" + " early due to skipping more iterations than" + " `max_skips`. See estimator attributes for" + " diagnostics (n_skips*)." + ), ConvergenceWarning, ) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 18f9f8aa5ee43..28ef7cbd43eb7 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -356,8 +356,10 @@ def func(w): result = optimize.minimize(func, x0, **config) if not result["success"]: warnings.warn( - "The lbfgs solver did not converge. Try increasing max_iter " - f"or tol. Currently: max_iter={max_iter} and tol={tol}", + ( + "The lbfgs solver did not converge. Try increasing max_iter " + f"or tol. Currently: max_iter={max_iter} and tol={tol}" + ), ConvergenceWarning, ) coefs[i] = result["x"] @@ -571,7 +573,6 @@ def _ridge_regression( check_input=True, fit_intercept=False, ): - has_sw = sample_weight is not None if solver == "auto": @@ -781,7 +782,6 @@ def _ridge_regression( class _BaseRidge(LinearModel, metaclass=ABCMeta): - _parameter_constraints: dict = { "alpha": [Interval(Real, 0, None, closed="left"), np.ndarray], "fit_intercept": ["boolean"], @@ -820,7 +820,6 @@ def __init__( self.random_state = random_state def fit(self, X, y, sample_weight=None): - if self.solver == "lbfgs" and not self.positive: raise ValueError( "'lbfgs' solver can be used only when positive=True. " @@ -2101,7 +2100,6 @@ def fit(self, X, y, sample_weight=None): class _BaseRidgeCV(LinearModel): - _parameter_constraints: dict = { "alphas": ["array-like", Interval(Real, 0, None, closed="neither")], "fit_intercept": ["boolean"], diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index ff6878d5e1686..2f27bdee7968b 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -492,7 +492,6 @@ def _get_plain_sgd_function(input_dtype): class BaseSGDClassifier(LinearClassifierMixin, BaseSGD, metaclass=ABCMeta): - loss_functions = { "hinge": (Hinge, 1.0), "squared_hinge": (SquaredHinge, 1.0), @@ -541,7 +540,6 @@ def __init__( warm_start=False, average=False, ): - super().__init__( loss=loss, penalty=penalty, @@ -709,9 +707,11 @@ def _fit( and self.n_iter_ == self.max_iter ): warnings.warn( - "Maximum number of iteration reached before " - "convergence. Consider increasing max_iter to " - "improve the fit.", + ( + "Maximum number of iteration reached before " + "convergence. Consider increasing max_iter to " + "improve the fit." + ), ConvergenceWarning, ) return self @@ -1360,7 +1360,6 @@ def _more_tags(self): class BaseSGDRegressor(RegressorMixin, BaseSGD): - loss_functions = { "squared_error": (SquaredLoss,), "huber": (Huber, DEFAULT_EPSILON), @@ -1556,9 +1555,11 @@ def _fit( and self.n_iter_ == self.max_iter ): warnings.warn( - "Maximum number of iteration reached before " - "convergence. Consider increasing max_iter to " - "improve the fit.", + ( + "Maximum number of iteration reached before " + "convergence. Consider increasing max_iter to " + "improve the fit." + ), ConvergenceWarning, ) @@ -2285,7 +2286,6 @@ def _fit_one_class(self, X, alpha, C, sample_weight, learning_rate, max_iter): self.t_ += self.n_iter_ * n_samples if self.average > 0: - self._average_intercept = np.atleast_1d(average_intercept) self._standard_intercept = np.atleast_1d(intercept) @@ -2443,9 +2443,11 @@ def _fit( and self.n_iter_ == self.max_iter ): warnings.warn( - "Maximum number of iteration reached before " - "convergence. Consider increasing max_iter to " - "improve the fit.", + ( + "Maximum number of iteration reached before " + "convergence. Consider increasing max_iter to " + "improve the fit." + ), ConvergenceWarning, ) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 81a812bf11150..f8b1bd7b7b65c 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -596,7 +596,6 @@ def test_dtype_preprocess_data(global_random_seed): for fit_intercept in [True, False]: for normalize in [True, False]: - Xt_32, yt_32, X_mean_32, y_mean_32, X_scale_32 = _preprocess_data( X_32, y_32, diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index c3f439bd4f150..f8a6759a40dd8 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -742,7 +742,6 @@ def test_logistic_regression_sample_weights(): sample_weight = y + 1 for LR in [LogisticRegression, LogisticRegressionCV]: - kw = {"random_state": 42, "fit_intercept": False, "multi_class": "ovr"} if LR is LogisticRegressionCV: kw.update({"Cs": 3, "cv": 3}) @@ -1918,7 +1917,6 @@ def test_scores_attribute_layout_elasticnet(): for i, C in enumerate(Cs): for j, l1_ratio in enumerate(l1_ratios): - lr = LogisticRegression( penalty="elasticnet", solver="saga", diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index 12c9f93c44d00..d7406fd5ec9b6 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -29,7 +29,6 @@ def test_ransac_inliers_outliers(): - estimator = LinearRegression() ransac_estimator = RANSACRegressor( estimator, min_samples=2, residual_threshold=5, random_state=0 @@ -294,7 +293,6 @@ def test_ransac_sparse_csc(): def test_ransac_none_estimator(): - estimator = LinearRegression() ransac_estimator = RANSACRegressor( @@ -359,7 +357,6 @@ def test_ransac_min_n_samples(): def test_ransac_multi_dimensional_targets(): - estimator = LinearRegression() ransac_estimator = RANSACRegressor( estimator, min_samples=2, residual_threshold=5, random_state=0 diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 51c166869f174..9c921ddf2ebda 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -180,6 +180,7 @@ def SparseSGDOneClassSVM(**kwargs): ############################################################################### # Common Test Case to classification and regression + # a simple implementation of ASGD to use for testing # uses squared loss to find the gradient def asgd(klass, X, y, eta, alpha, weight_init=None, intercept_init=0.0): @@ -1393,6 +1394,7 @@ def test_loss_function_epsilon(klass): ############################################################################### # SGD One Class SVM Test Case + # a simple implementation of ASGD to use for testing SGDOneClassSVM def asgd_oneclass(klass, X, eta, nu, coef_init=None, offset_init=0.0): if coef_init is None: diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py index e2451f31cc1c2..92206721aac15 100644 --- a/sklearn/manifold/_isomap.py +++ b/sklearn/manifold/_isomap.py @@ -274,10 +274,12 @@ def _fit_transform(self, X): "of passing a sparse neighbors graph." ) warnings.warn( - "The number of connected components of the neighbors graph " - f"is {n_connected_components} > 1. Completing the graph to fit" - " Isomap might be slow. Increase the number of neighbors to " - "avoid this issue.", + ( + "The number of connected components of the neighbors graph " + f"is {n_connected_components} > 1. Completing the graph to fit" + " Isomap might be slow. Increase the number of neighbors to " + "avoid this issue." + ), stacklevel=2, ) diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py index d6f99c84f55f1..7fc46325a1ae1 100644 --- a/sklearn/manifold/_mds.py +++ b/sklearn/manifold/_mds.py @@ -297,9 +297,11 @@ def smacof( # TODO(1.4): Remove if normalized_stress == "warn": warnings.warn( - "The default value of `normalized_stress` will change to `'auto'` in" - " version 1.4. To suppress this warning, manually set the value of" - " `normalized_stress`.", + ( + "The default value of `normalized_stress` will change to `'auto'` in" + " version 1.4. To suppress this warning, manually set the value of" + " `normalized_stress`." + ), FutureWarning, ) normalized_stress = False diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py index b35ad3a147b5f..6c0c2c9d1ccc4 100644 --- a/sklearn/manifold/_spectral_embedding.py +++ b/sklearn/manifold/_spectral_embedding.py @@ -588,8 +588,10 @@ def __init__( def _more_tags(self): return { - "pairwise": self.affinity - in ["precomputed", "precomputed_nearest_neighbors"] + "pairwise": self.affinity in [ + "precomputed", + "precomputed_nearest_neighbors", + ] } def _get_affinity_matrix(self, X, Y=None): diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py index 61d43606dedb9..01b74ebdf794f 100644 --- a/sklearn/manifold/_t_sne.py +++ b/sklearn/manifold/_t_sne.py @@ -839,8 +839,10 @@ def _fit(self, X, skip_num_points=0): ) if self.square_distances != "deprecated": warnings.warn( - "The parameter `square_distances` has not effect and will be " - "removed in version 1.3.", + ( + "The parameter `square_distances` has not effect and will be " + "removed in version 1.3." + ), FutureWarning, ) if self.learning_rate == "auto": @@ -871,8 +873,10 @@ def _fit(self, X, skip_num_points=0): check_non_negative( X, - "TSNE.fit(). With metric='precomputed', X " - "should contain positive distances.", + ( + "TSNE.fit(). With metric='precomputed', X " + "should contain positive distances." + ), ) if self.method == "exact" and issparse(X): diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 8e203a48c967b..5beb13df8069c 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -2825,9 +2825,11 @@ def log_loss( else: # TODO: Remove user defined eps in 1.5 warnings.warn( - "Setting the eps parameter is deprecated and will " - "be removed in 1.5. Instead eps will always have" - "a default value of `np.finfo(y_pred.dtype).eps`.", + ( + "Setting the eps parameter is deprecated and will " + "be removed in 1.5. Instead eps will always have" + "a default value of `np.finfo(y_pred.dtype).eps`." + ), FutureWarning, ) @@ -2894,8 +2896,10 @@ def log_loss( y_pred_sum = y_pred.sum(axis=1) if not np.isclose(y_pred_sum, 1, rtol=1e-15, atol=5 * eps).all(): warnings.warn( - "The y_pred values do not sum to one. Starting from 1.5 this" - "will result in an error.", + ( + "The y_pred values do not sum to one. Starting from 1.5 this" + "will result in an error." + ), UserWarning, ) y_pred = y_pred / y_pred_sum[:, np.newaxis] diff --git a/sklearn/metrics/_plot/tests/test_common_curve_display.py b/sklearn/metrics/_plot/tests/test_common_curve_display.py index fde87e2949d0b..b9fda563fc984 100644 --- a/sklearn/metrics/_plot/tests/test_common_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_common_curve_display.py @@ -95,8 +95,10 @@ def test_display_curve_error_regression(pyplot, data_binary, Display): ), ( "auto", - "MyClassifier has none of the following attributes: predict_proba," - " decision_function.", + ( + "MyClassifier has none of the following attributes: predict_proba," + " decision_function." + ), ), ( "bad_method", diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index de89a43f80ed4..0cdead9233898 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -1765,9 +1765,11 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False if y_true.min() < 0: # TODO(1.4): Replace warning w/ ValueError warnings.warn( - "ndcg_score should not be used on negative y_true values. ndcg_score" - " will raise a ValueError on negative y_true values starting from" - " version 1.4.", + ( + "ndcg_score should not be used on negative y_true values. ndcg_score" + " will raise a ValueError on negative y_true values starting from" + " version 1.4." + ), FutureWarning, ) if y_true.ndim > 1 and y_true.shape[1] <= 1: @@ -1925,8 +1927,10 @@ def top_k_accuracy_score( if k >= n_classes: warnings.warn( - f"'k' ({k}) greater than or equal to 'n_classes' ({n_classes}) " - "will result in a perfect score and is therefore meaningless.", + ( + f"'k' ({k}) greater than or equal to 'n_classes' ({n_classes}) " + "will result in a perfect score and is therefore meaningless." + ), UndefinedMetricWarning, ) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 9df1b482bdeb3..d085cd66d8232 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -807,9 +807,11 @@ class _DeprecatedScorers(dict): def __getitem__(self, item): warnings.warn( - "sklearn.metrics.SCORERS is deprecated and will be removed in v1.3. " - "Please use sklearn.metrics.get_scorer_names to get a list of available " - "scorers and sklearn.metrics.get_metric to get scorer.", + ( + "sklearn.metrics.SCORERS is deprecated and will be removed in v1.3." + " Please use sklearn.metrics.get_scorer_names to get a list of" + " available scorers and sklearn.metrics.get_metric to get scorer." + ), FutureWarning, ) return super().__getitem__(item) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index efc0da272b703..fd8c9764a0781 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -963,8 +963,10 @@ def manhattan_distances(X, Y=None, *, sum_over_features="deprecated"): # TODO(1.4): remove sum_over_features if sum_over_features != "deprecated": warnings.warn( - "`sum_over_features` is deprecated in version 1.2 and will be" - " removed in version 1.4.", + ( + "`sum_over_features` is deprecated in version 1.2 and will be" + " removed in version 1.4." + ), FutureWarning, ) else: diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 74a85448a6b11..5b07faf91b49a 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -639,8 +639,10 @@ def test_likelihood_ratios_warnings(params, warn_msg): "y_true": np.array([0, 1, 0, 1, 0]), "y_pred": np.array([1, 1, 0, 0, 2]), }, - "class_likelihood_ratios only supports binary classification " - "problems, got targets of type: multiclass", + ( + "class_likelihood_ratios only supports binary classification " + "problems, got targets of type: multiclass" + ), ), ], ) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 335803c0ba383..810d31d2f7880 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -549,7 +549,6 @@ def _require_positive_targets(y1, y2): def test_symmetry_consistency(): - # We shouldn't forget any metrics assert ( SYMMETRIC_METRICS diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index 285e876517c99..40f889fe8ce7d 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -978,7 +978,6 @@ def test_nan_euclidean_distances_equal_to_euclidean_distance(squared): @pytest.mark.parametrize("X", [np.array([[np.inf, 0]]), np.array([[0, -np.inf]])]) @pytest.mark.parametrize("Y", [np.array([[np.inf, 0]]), np.array([[0, -np.inf]]), None]) def test_nan_euclidean_distances_infinite_values(X, Y): - with pytest.raises(ValueError) as excinfo: nan_euclidean_distances(X, Y=Y) @@ -1002,7 +1001,6 @@ def test_nan_euclidean_distances_infinite_values(X, Y): ], ) def test_nan_euclidean_distances_2x2(X, X_diag, missing_value): - exp_dist = np.array([[0.0, X_diag], [X_diag, 0]]) dist = nan_euclidean_distances(X, missing_values=missing_value) diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py index 7355dfd6ba912..9040efeb4aa76 100644 --- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py +++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py @@ -99,7 +99,6 @@ def relative_rounding(scalar, n_significant_digits): def test_relative_rounding(): - assert relative_rounding(0, 1) == 0.0 assert relative_rounding(0, 10) == 0.0 assert relative_rounding(0, 123456) == 0.0 @@ -238,7 +237,6 @@ def assert_radius_neighbors_results_quasi_equality( # Asserting equality of results one vector at a time for query_idx in range(n_queries): - ref_dist_row = ref_dist[query_idx] dist_row = dist[query_idx] @@ -322,7 +320,6 @@ def assert_radius_neighbors_results_quasi_equality( def test_assert_argkmin_results_quasi_equality(): - rtol = 1e-7 eps = 1e-7 _1m = 1.0 - eps @@ -406,7 +403,6 @@ def test_assert_argkmin_results_quasi_equality(): def test_assert_radius_neighbors_results_quasi_equality(): - rtol = 1e-7 eps = 1e-7 _1m = 1.0 - eps @@ -1119,6 +1115,7 @@ def test_strategies_consistency( # "Concrete Dispatchers"-specific tests + # TODO: Remove filterwarnings in 1.3 when wminkowski is removed @pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn") @pytest.mark.parametrize("n_features", [50, 500]) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index d7bbde546842c..f38f118c38c0a 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -685,8 +685,10 @@ def test_micro_averaged_ovr_roc_auc(global_random_seed): ["a", "a", "b"], ), ( - "Number of classes in y_true not equal to the number of columns " - "in 'y_score'", + ( + "Number of classes in y_true not equal to the number of columns " + "in 'y_score'" + ), np.array([0, 2, 0, 2]), None, ), @@ -696,26 +698,34 @@ def test_micro_averaged_ovr_roc_auc(global_random_seed): ["a", "c", "b"], ), ( - "Number of given labels, 2, not equal to the number of columns in " - "'y_score', 3", + ( + "Number of given labels, 2, not equal to the number of columns in " + "'y_score', 3" + ), np.array([0, 1, 2, 2]), [0, 1], ), ( - "Number of given labels, 2, not equal to the number of columns in " - "'y_score', 3", + ( + "Number of given labels, 2, not equal to the number of columns in " + "'y_score', 3" + ), np.array(["a", "b", "c", "c"]), ["a", "b"], ), ( - "Number of given labels, 4, not equal to the number of columns in " - "'y_score', 3", + ( + "Number of given labels, 4, not equal to the number of columns in " + "'y_score', 3" + ), np.array([0, 1, 2, 2]), [0, 1, 2, 3], ), ( - "Number of given labels, 4, not equal to the number of columns in " - "'y_score', 3", + ( + "Number of given labels, 4, not equal to the number of columns in " + "'y_score', 3" + ), np.array(["a", "b", "c", "c"]), ["a", "b", "c", "d"], ), @@ -2144,8 +2154,10 @@ def test_top_k_accuracy_score_warning(y_true, k): [0, 1], [[0.5, 0.2, 0.2], [0.3, 0.4, 0.2]], None, - "`y_true` is binary while y_score is 2d with 3 classes. If" - " `y_true` does not contain all the labels, `labels` must be provided", + ( + "`y_true` is binary while y_score is 2d with 3 classes. If" + " `y_true` does not contain all the labels, `labels` must be provided" + ), ), ], ) diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index d9223401cec9c..d9065edb9dfb3 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -312,7 +312,6 @@ def test__check_reg_targets(): ] for (type1, y1, n_out1), (type2, y2, n_out2) in product(EXAMPLES, repeat=2): - if type1 == type2 and n_out1 == n_out2: y_type, y_check1, y_check2, multioutput = _check_reg_targets(y1, y2, None) assert type1 == y_type diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index d39db7fc894c4..7f3e804f68d46 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -741,8 +741,10 @@ def test_scoring_is_not_metric(): @pytest.mark.parametrize( - "scorers,expected_predict_count," - "expected_predict_proba_count,expected_decision_func_count", + ( + "scorers,expected_predict_count," + "expected_predict_proba_count,expected_decision_func_count" + ), [ ( { diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index b443f1049b8d0..f2d634b3fffe5 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1050,7 +1050,6 @@ def test_property(): gmm.fit(X) if covar_type == "full": for prec, covar in zip(gmm.precisions_, gmm.covariances_): - assert_array_almost_equal(linalg.inv(prec), covar) elif covar_type == "tied": assert_array_almost_equal(linalg.inv(gmm.precisions_), gmm.covariances_) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 1df8db6e7d5c5..1621dd324f81c 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -405,7 +405,6 @@ def __init__( error_score=np.nan, return_train_score=True, ): - self.scoring = scoring self.estimator = estimator self.n_jobs = n_jobs @@ -971,8 +970,10 @@ def _store(key_name, array, weights=None, splits=False, rank=False): ~np.isfinite(array_means) ): warnings.warn( - f"One or more of the {key_name.split('_')[0]} scores " - f"are non-finite: {array_means}", + ( + f"One or more of the {key_name.split('_')[0]} scores " + f"are non-finite: {array_means}" + ), category=UserWarning, ) diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index 7f6470b4a77c4..4826e7931d4d6 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -124,7 +124,6 @@ def __init__( self.aggressive_elimination = aggressive_elimination def _check_input_parameters(self, X, y, groups): - # We need to enforce that successive calls to cv.split() yield the same # splits: see https://github.com/scikit-learn/scikit-learn/issues/15149 if not _yields_constant_splits(self._checked_cv_orig): @@ -312,7 +311,6 @@ def _run_search(self, evaluate_candidates): self.n_candidates_ = [] for itr in range(n_iterations): - power = itr # default if self.aggressive_elimination: # this will set n_resources to the initial value (i.e. the diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index dded962a350d8..4ad284a728a52 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -308,9 +308,11 @@ def __init__(self, n_splits, *, shuffle, random_state): if not shuffle and random_state is not None: # None is the default raise ValueError( - "Setting a random_state has no effect since shuffle is " - "False. You should leave " - "random_state to its default (None), or set shuffle=True.", + ( + "Setting a random_state has no effect since shuffle is " + "False. You should leave " + "random_state to its default (None), or set shuffle=True." + ), ) self.n_splits = n_splits diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 8bd044bbd40ae..7951417845bff 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -567,7 +567,6 @@ def _fit_and_score( candidate_progress=None, error_score=np.nan, ): - """Fit estimator and compute scores for a given dataset split. Parameters @@ -796,9 +795,11 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"): else: scores = error_score warnings.warn( - "Scoring failed. The score on this train-test partition for " - f"these parameters will be set to {error_score}. Details: \n" - f"{format_exc()}", + ( + "Scoring failed. The score on this train-test partition for " + f"these parameters will be set to {error_score}. Details: \n" + f"{format_exc()}" + ), UserWarning, ) @@ -812,9 +813,11 @@ def _score(estimator, X_test, y_test, scorer, error_score="raise"): for name, str_e in exception_messages: scores[name] = error_score warnings.warn( - "Scoring failed. The score on this train-test partition for " - f"these parameters will be set to {error_score}. Details: \n" - f"{str_e}", + ( + "Scoring failed. The score on this train-test partition for " + f"these parameters will be set to {error_score}. Details: \n" + f"{str_e}" + ), UserWarning, ) @@ -1944,8 +1947,10 @@ def _aggregate_score_dicts(scores): 'b': array([10, 2, 3, 10])} """ return { - key: np.asarray([score[key] for score in scores]) - if isinstance(scores[0][key], numbers.Number) - else [score[key] for score in scores] + key: ( + np.asarray([score[key] for score in scores]) + if isinstance(scores[0][key], numbers.Number) + else [score[key] for score in scores] + ) for key in scores[0] } diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 3943cb26f1ccb..0aef56da2d3c8 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -149,7 +149,6 @@ def test_validate_parameter_input(klass, input, error_type, error_message): def test_parameter_grid(): - # Test basic properties of ParameterGrid. params1 = {"foo": [1, 2, 3]} grid1 = ParameterGrid(params1) @@ -1641,9 +1640,10 @@ def test_grid_search_classifier_all_fits_fail(): ) warning_message = re.compile( - "All the 15 fits failed.+" - "15 fits failed with the following error.+ValueError.+Failing classifier failed" - " as required", + ( + "All the 15 fits failed.+15 fits failed with the following" + " error.+ValueError.+Failing classifier failed as required" + ), flags=re.DOTALL, ) with pytest.raises(ValueError, match=warning_message): @@ -2178,8 +2178,10 @@ def custom_scorer(est, X, y): individual_fit_error_message = "ValueError: Failing classifier failed as required" error_message = re.compile( - "All the 15 fits failed.+your model is misconfigured.+" - f"{individual_fit_error_message}", + ( + "All the 15 fits failed.+your model is misconfigured.+" + f"{individual_fit_error_message}" + ), flags=re.DOTALL, ) diff --git a/sklearn/model_selection/tests/test_successive_halving.py b/sklearn/model_selection/tests/test_successive_halving.py index 2045e95a7707c..d0e3b9f78c918 100644 --- a/sklearn/model_selection/tests/test_successive_halving.py +++ b/sklearn/model_selection/tests/test_successive_halving.py @@ -123,14 +123,16 @@ def test_nan_handling(HalvingSearch, fail_at): @pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV)) @pytest.mark.parametrize( - "aggressive_elimination," - "max_resources," - "expected_n_iterations," - "expected_n_required_iterations," - "expected_n_possible_iterations," - "expected_n_remaining_candidates," - "expected_n_candidates," - "expected_n_resources,", + ( + "aggressive_elimination," + "max_resources," + "expected_n_iterations," + "expected_n_required_iterations," + "expected_n_possible_iterations," + "expected_n_remaining_candidates," + "expected_n_candidates," + "expected_n_resources," + ), [ # notice how it loops at the beginning # also, the number of candidates evaluated at the last iteration is @@ -196,11 +198,13 @@ def test_aggressive_elimination( @pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV)) @pytest.mark.parametrize( - "min_resources," - "max_resources," - "expected_n_iterations," - "expected_n_possible_iterations," - "expected_n_resources,", + ( + "min_resources," + "max_resources," + "expected_n_iterations," + "expected_n_possible_iterations," + "expected_n_resources," + ), [ # with enough resources ("smallest", "auto", 2, 4, [20, 60]), @@ -534,7 +538,6 @@ def test_subsample_splitter_determinism(subsample_test): ], ) def test_top_k(k, itr, expected): - results = { # this isn't a 'real world' result dict "iter": [0, 0, 0, 0, 1, 1, 2, 2, 2], "mean_test_score": [4, 3, 5, 1, 11, 10, 5, 6, 9], diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 863a366eb4410..1ed42d555309a 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -1980,7 +1980,6 @@ def test_cross_val_predict_with_method_multilabel_rf_rare_class(): def get_expected_predictions(X, y, cv, classes, est, method): - expected_predictions = np.zeros([len(y), classes]) func = getattr(est, method) @@ -2001,7 +2000,6 @@ def get_expected_predictions(X, y, cv, classes, est, method): def test_cross_val_predict_class_subset(): - X = np.arange(200).reshape(100, 2) y = np.array([x // 10 for x in range(100)]) classes = 10 @@ -2175,9 +2173,11 @@ def test_cross_validate_some_failing_fits_warning(error_score): "ValueError: Classifier fit failed with 1 values too high" ) warning_message = re.compile( - "2 fits failed.+total of 3.+The score on these" - " train-test partitions for these parameters will be set to" - f" {cross_validate_kwargs['error_score']}.+{individual_fit_error_message}", + ( + "2 fits failed.+total of 3.+The score on these" + " train-test partitions for these parameters will be set to" + f" {cross_validate_kwargs['error_score']}.+{individual_fit_error_message}" + ), flags=re.DOTALL, ) @@ -2198,8 +2198,10 @@ def test_cross_validate_all_failing_fits_error(error_score): individual_fit_error_message = "ValueError: Failing classifier failed as required" error_message = re.compile( - "All the 7 fits failed.+your model is misconfigured.+" - f"{individual_fit_error_message}", + ( + "All the 7 fits failed.+your model is misconfigured.+" + f"{individual_fit_error_message}" + ), flags=re.DOTALL, ) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 4b7015dd40ece..cf49f82245c09 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -84,7 +84,6 @@ def _check(self): class _MultiOutputEstimator(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta): - _parameter_constraints: dict = { "estimator": [HasMethods(["fit", "predict"])], "n_jobs": [Integral, None], @@ -541,7 +540,6 @@ def _check(self): class _BaseChain(BaseEstimator, metaclass=ABCMeta): - _parameter_constraints: dict = { "base_estimator": [HasMethods(["fit", "predict"])], "order": ["array-like", StrOptions({"random"}), None], diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index bba20ca7130f4..20858ac8b5577 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -627,8 +627,11 @@ def _check_alpha(self): if _force_alpha == "warn" and alpha_min < alpha_lower_bound: _force_alpha = False warnings.warn( - "The default value for `force_alpha` will change to `True` in 1.4. To" - " suppress this warning, manually set the value of `force_alpha`.", + ( + "The default value for `force_alpha` will change to `True` in 1.4." + " To suppress this warning, manually set the value of" + " `force_alpha`." + ), FutureWarning, ) if alpha_min < alpha_lower_bound and not _force_alpha: diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py index 9e3dfa248f00e..8ba1f4fa7d093 100644 --- a/sklearn/neighbors/_base.py +++ b/sklearn/neighbors/_base.py @@ -232,9 +232,12 @@ def sort_graph_by_row_values(graph, copy=False, warn_when_not_sorted=True): if warn_when_not_sorted: warnings.warn( - "Precomputed sparse input was not sorted by row values. Use the function" - " sklearn.neighbors.sort_graph_by_row_values to sort the input by row" - " values, with warn_when_not_sorted=False to remove this warning.", + ( + "Precomputed sparse input was not sorted by row values. Use the" + " function sklearn.neighbors.sort_graph_by_row_values to sort the input" + " by row values, with warn_when_not_sorted=False to remove this" + " warning." + ), EfficiencyWarning, ) @@ -400,7 +403,6 @@ def __init__( metric_params=None, n_jobs=None, ): - self.n_neighbors = n_neighbors self.radius = radius self.algorithm = algorithm @@ -441,9 +443,11 @@ def _check_algorithm_metric(self): if self.metric_params is not None and "p" in self.metric_params: if self.p is not None: warnings.warn( - "Parameter p is found in metric_params. " - "The corresponding parameter from __init__ " - "is ignored.", + ( + "Parameter p is found in metric_params. " + "The corresponding parameter from __init__ " + "is ignored." + ), SyntaxWarning, stacklevel=3, ) @@ -460,10 +464,12 @@ def _fit(self, X, y=None): if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1: if y.ndim != 1: warnings.warn( - "A column-vector y was passed when a " - "1d array was expected. Please change " - "the shape of y to (n_samples,), for " - "example using ravel().", + ( + "A column-vector y was passed when a " + "1d array was expected. Please change " + "the shape of y to (n_samples,), for " + "example using ravel()." + ), DataConversionWarning, stacklevel=2, ) diff --git a/sklearn/neighbors/_distance_metric.py b/sklearn/neighbors/_distance_metric.py index c973425d2e7b6..9bfd131c482d8 100644 --- a/sklearn/neighbors/_distance_metric.py +++ b/sklearn/neighbors/_distance_metric.py @@ -8,9 +8,11 @@ class DistanceMetric(_DistanceMetric): @classmethod def _warn(cls): warnings.warn( - "sklearn.neighbors.DistanceMetric has been moved " - "to sklearn.metrics.DistanceMetric in 1.0. " - "This import path will be removed in 1.3", + ( + "sklearn.neighbors.DistanceMetric has been moved " + "to sklearn.metrics.DistanceMetric in 1.0. " + "This import path will be removed in 1.3" + ), category=FutureWarning, ) diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py index 46eda7bbbc735..003b534074ecd 100644 --- a/sklearn/neighbors/_regression.py +++ b/sklearn/neighbors/_regression.py @@ -479,9 +479,11 @@ def predict(self, X): else: y_pred = np.array( [ - np.average(_y[ind, :], axis=0, weights=weights[i]) - if len(ind) - else empty_obs + ( + np.average(_y[ind, :], axis=0, weights=weights[i]) + if len(ind) + else empty_obs + ) for (i, ind) in enumerate(neigh_ind) ] ) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 4ef2831990f0a..3e22193ba55c0 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -1270,7 +1270,6 @@ def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight(): X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for algorithm, weights in product(ALGORITHMS, [None, "uniform"]): - rnn = neighbors.RadiusNeighborsRegressor(weights=weights, algorithm=algorithm) rnn.fit(X_train, y_train) @@ -1839,7 +1838,6 @@ def test_k_and_radius_neighbors_train_is_not_query(): # Test kneighbors et.al when query is not training data for algorithm in ALGORITHMS: - nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm) X = [[0], [1]] diff --git a/sklearn/neighbors/tests/test_neighbors_pipeline.py b/sklearn/neighbors/tests/test_neighbors_pipeline.py index 0893ebf7de630..905f206770769 100644 --- a/sklearn/neighbors/tests/test_neighbors_pipeline.py +++ b/sklearn/neighbors/tests/test_neighbors_pipeline.py @@ -132,7 +132,6 @@ def test_tsne(): X = rng.randn(20, 2) for metric in ["minkowski", "sqeuclidean"]: - # compare the chained version and the compact version est_chain = make_pipeline( KNeighborsTransformer( diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index bc17a77495925..606d7138df2de 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -553,7 +553,6 @@ def _fit_stochastic( layer_units, incremental, ): - params = self.coefs_ + self.intercepts_ if not incremental or not hasattr(self, "_optimizer"): if self.solver == "sgd": diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index abc5de750969e..5a8c41a15fb51 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -222,10 +222,12 @@ def fit(self, X, y=None, sample_weight=None): if self.subsample == "warn": if n_samples > 2e5: warnings.warn( - "In version 1.3 onwards, subsample=2e5 " - "will be used by default. Set subsample explicitly to " - "silence this warning in the mean time. Set " - "subsample=None to disable subsampling explicitly.", + ( + "In version 1.3 onwards, subsample=2e5 " + "will be used by default. Set subsample explicitly to " + "silence this warning in the mean time. Set " + "subsample=None to disable subsampling explicitly." + ), FutureWarning, ) else: diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index 1962f571cfbaa..fd9941f5336ed 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -229,9 +229,11 @@ def _transform( X_int[:, i] = _encode(Xi, uniques=self.categories_[i], check_unknown=False) if columns_with_unknown: warnings.warn( - "Found unknown categories in columns " - f"{columns_with_unknown} during transform. These " - "unknown categories will be encoded as all zeros", + ( + "Found unknown categories in columns " + f"{columns_with_unknown} during transform. These " + "unknown categories will be encoded as all zeros" + ), UserWarning, ) @@ -973,9 +975,11 @@ def fit(self, X, y=None): if self.sparse != "deprecated": warnings.warn( - "`sparse` was renamed to `sparse_output` in version 1.2 and " - "will be removed in 1.4. `sparse_output` is ignored unless you " - "leave `sparse` to its default value.", + ( + "`sparse` was renamed to `sparse_output` in version 1.2 and " + "will be removed in 1.4. `sparse_output` is ignored unless you " + "leave `sparse` to its default value." + ), FutureWarning, ) self.sparse_output = self.sparse diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index a4f5448a0922c..c250c5cd0226e 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -188,10 +188,12 @@ def _check_inverse_transform(self, X): if not _allclose_dense_sparse(X[idx_selected], X_round_trip): warnings.warn( - "The provided functions are not strictly" - " inverse of each other. If you are sure you" - " want to proceed regardless, set" - " 'check_inverse=False'.", + ( + "The provided functions are not strictly" + " inverse of each other. If you are sure you" + " want to proceed regardless, set" + " 'check_inverse=False'." + ), UserWarning, ) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 3d1bf8f3064ac..ca8607b06c2e2 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -264,7 +264,6 @@ class LabelBinarizer(TransformerMixin, BaseEstimator): } def __init__(self, *, neg_label=0, pos_label=1, sparse_output=False): - self.neg_label = neg_label self.pos_label = pos_label self.sparse_output = sparse_output diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py index eefe2936d2d99..83ebbf786d8fc 100644 --- a/sklearn/preprocessing/_polynomial.py +++ b/sklearn/preprocessing/_polynomial.py @@ -221,9 +221,11 @@ def get_feature_names_out(self, input_features=None): inds = np.where(row)[0] if len(inds): name = " ".join( - "%s^%d" % (input_features[ind], exp) - if exp != 1 - else input_features[ind] + ( + "%s^%d" % (input_features[ind], exp) + if exp != 1 + else input_features[ind] + ) for ind, exp in zip(inds, row[inds]) ) else: diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 6c6550068094a..7ea546dd2d258 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -97,7 +97,6 @@ def test_raises_value_error_if_sample_weights_greater_than_1d(): n_featuress = [3, 2] for n_samples, n_features in zip(n_sampless, n_featuress): - X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) @@ -233,7 +232,6 @@ def test_standard_scaler_dtype(add_sample_weight, sparse_constructor): def test_standard_scaler_constant_features( scaler, add_sample_weight, sparse_constructor, dtype, constant ): - if isinstance(scaler, RobustScaler) and add_sample_weight: pytest.skip(f"{scaler.__class__.__name__} does not yet support sample_weight") @@ -618,7 +616,6 @@ def test_partial_fit_sparse_input(sample_weight): null_transform = StandardScaler(with_mean=False, with_std=False, copy=True) for X in [X_csr, X_csc]: - X_null = null_transform.partial_fit(X, sample_weight=sample_weight).transform(X) assert_array_equal(X_null.toarray(), X.toarray()) X_orig = null_transform.inverse_transform(X_null) @@ -636,7 +633,6 @@ def test_standard_scaler_trasform_with_partial_fit(sample_weight): scaler_incr = StandardScaler() for i, batch in enumerate(gen_batches(X.shape[0], 1)): - X_sofar = X[: (i + 1), :] chunks_copy = X_sofar.copy() if sample_weight is None: @@ -766,7 +762,6 @@ def test_minmax_scale_axis1(): def test_min_max_scaler_1d(): # Test scaling of dataset along single axis for X in [X_1row, X_1col, X_list_1row, X_list_1row]: - scaler = MinMaxScaler(copy=True) X_scaled = scaler.fit(X).transform(X) @@ -1731,7 +1726,6 @@ def test_maxabs_scaler_transform_one_row_csr(): def test_maxabs_scaler_1d(): # Test scaling of dataset along single axis for X in [X_1row, X_1col, X_list_1row, X_list_1row]: - scaler = MaxAbsScaler(copy=True) X_scaled = scaler.fit(X).transform(X) @@ -1835,7 +1829,6 @@ def test_normalizer_l1(): # check inputs that support the no-copy optim for X in (X_dense, X_sparse_pruned, X_sparse_unpruned): - normalizer = Normalizer(norm="l1", copy=True) X_norm = normalizer.transform(X) assert X_norm is not X @@ -1884,7 +1877,6 @@ def test_normalizer_l2(): # check inputs that support the no-copy optim for X in (X_dense, X_sparse_pruned, X_sparse_unpruned): - normalizer = Normalizer(norm="l2", copy=True) X_norm1 = normalizer.transform(X) assert X_norm1 is not X @@ -1932,7 +1924,6 @@ def test_normalizer_max(): # check inputs that support the no-copy optim for X in (X_dense, X_sparse_pruned, X_sparse_unpruned): - normalizer = Normalizer(norm="max", copy=True) X_norm1 = normalizer.transform(X) assert X_norm1 is not X @@ -2032,7 +2023,6 @@ def test_binarizer(): X_ = np.array([[1, 0, 5], [2, 3, -1]]) for init in (np.array, list, sparse.csr_matrix, sparse.csc_matrix): - X = init(X_.copy()) binarizer = Binarizer(threshold=2.0, copy=True) diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py index ffd5eda5195d0..42c66980bfeba 100644 --- a/sklearn/preprocessing/tests/test_encoders.py +++ b/sklearn/preprocessing/tests/test_encoders.py @@ -745,7 +745,6 @@ def test_ordinal_encoder_handle_unknowns_nan_non_float_dtype(): def test_ordinal_encoder_raise_categories_shape(): - X = np.array([["Low", "Medium", "High", "Medium", "Low"]], dtype=object).T cats = ["Low", "Medium", "High"] enc = OrdinalEncoder(categories=cats) diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py index ae4888d459a06..f21c37fb694fa 100644 --- a/sklearn/preprocessing/tests/test_polynomial.py +++ b/sklearn/preprocessing/tests/test_polynomial.py @@ -183,6 +183,7 @@ def test_spline_transformer_get_base_knot_positions( @pytest.mark.parametrize(["bias", "intercept"], [(True, False), (False, True)]) def test_spline_transformer_periodic_linear_regression(bias, intercept): """Test that B-splines fit a periodic curve pretty well.""" + # "+ 3" to avoid the value 0 in assert_allclose def f(x): return np.sin(2 * np.pi * x) - np.sin(8 * np.pi * x) + 3 @@ -589,8 +590,8 @@ def test_polynomial_feature_names(): # test some unicode poly = PolynomialFeatures(degree=1, include_bias=True).fit(X) - feature_names = poly.get_feature_names_out(["\u0001F40D", "\u262E", "\u05D0"]) - assert_array_equal(["1", "\u0001F40D", "\u262E", "\u05D0"], feature_names) + feature_names = poly.get_feature_names_out(["\u0001F40D", "\u262e", "\u05d0"]) + assert_array_equal(["1", "\u0001F40D", "\u262e", "\u05d0"], feature_names) @pytest.mark.parametrize( diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index fddd96df67c3f..95fad0713d558 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -128,7 +128,6 @@ def __init__( tol=1e-3, n_jobs=None, ): - self.max_iter = max_iter self.tol = tol @@ -597,7 +596,6 @@ def __init__( tol=1e-3, n_jobs=None, ): - # this one has different base parameters super().__init__( kernel=kernel, diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py index a84ff1d32e050..2438658ed89c8 100644 --- a/sklearn/semi_supervised/_self_training.py +++ b/sklearn/semi_supervised/_self_training.py @@ -215,9 +215,11 @@ def fit(self, X, y): self.k_best > X.shape[0] - np.sum(has_label) ): warnings.warn( - "k_best is larger than the amount of unlabeled " - "samples. All unlabeled samples will be labeled in " - "the first iteration", + ( + "k_best is larger than the amount of unlabeled " + "samples. All unlabeled samples will be labeled in " + "the first iteration" + ), UserWarning, ) diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py index a66dfce695871..55919099e027c 100644 --- a/sklearn/svm/_base.py +++ b/sklearn/svm/_base.py @@ -118,7 +118,6 @@ def __init__( max_iter, random_state, ): - if self._impl not in LIBSVM_IMPL: raise ValueError( "impl should be one of %s, %s was given" % (LIBSVM_IMPL, self._impl) diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index 7a7236d7a7bc7..94ba958b5a905 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -796,7 +796,6 @@ def __init__( break_ties=False, random_state=None, ): - super().__init__( kernel=kernel, degree=degree, @@ -1064,7 +1063,6 @@ def __init__( break_ties=False, random_state=None, ): - super().__init__( kernel=kernel, degree=degree, @@ -1279,7 +1277,6 @@ def __init__( verbose=False, max_iter=-1, ): - super().__init__( kernel=kernel, degree=degree, @@ -1488,7 +1485,6 @@ def __init__( verbose=False, max_iter=-1, ): - super().__init__( kernel=kernel, degree=degree, @@ -1686,7 +1682,6 @@ def __init__( verbose=False, max_iter=-1, ): - super().__init__( kernel, degree, diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 838ded31ba23c..ca23360f9d462 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -574,8 +574,10 @@ def test_negative_sample_weights_mask_all_samples(Estimator, err_msg, sample_wei [ ( svm.SVC, - "Invalid input - all samples with positive weights belong to the same" - " class", + ( + "Invalid input - all samples with positive weights belong to the same" + " class" + ), ), (svm.NuSVC, "specified nu is infeasible"), ], @@ -761,7 +763,6 @@ def test_linearsvc_parameters(loss, penalty, dual): or (loss, penalty, dual) == ("hinge", "l2", False) or (penalty, dual) == ("l1", True) ): - with pytest.raises( ValueError, match="Unsupported set of arguments.*penalty='%s.*loss='%s.*dual=%s" diff --git a/sklearn/tests/random_seed.py b/sklearn/tests/random_seed.py index f282f8002f2c5..41cfe06a1d7e6 100644 --- a/sklearn/tests/random_seed.py +++ b/sklearn/tests/random_seed.py @@ -76,6 +76,8 @@ def pytest_report_header(config): return [ "To reproduce this test run, set the following environment variable:", f' SKLEARN_TESTS_GLOBAL_RANDOM_SEED="{config.option.random_seeds[0]}"', - "See: https://scikit-learn.org/dev/computing/parallelism.html" - "#sklearn-tests-global-random-seed", + ( + "See: https://scikit-learn.org/dev/computing/parallelism.html" + "#sklearn-tests-global-random-seed" + ), ] diff --git a/sklearn/tests/test_build.py b/sklearn/tests/test_build.py index d6affa5e4cc78..7321603dd4e46 100644 --- a/sklearn/tests/test_build.py +++ b/sklearn/tests/test_build.py @@ -14,8 +14,7 @@ def test_openmp_parallelism_enabled(): pytest.skip("test explicitly skipped (SKLEARN_SKIP_OPENMP_TEST)") base_url = "dev" if __version__.endswith(".dev0") else "stable" - err_msg = textwrap.dedent( - """ + err_msg = textwrap.dedent(""" This test fails because scikit-learn has been built without OpenMP. This is not recommended since some estimators will run in sequential mode instead of leveraging thread-based parallelism. @@ -27,7 +26,6 @@ def test_openmp_parallelism_enabled(): You can skip this test by setting the environment variable SKLEARN_SKIP_OPENMP_TEST to any value. - """ - ).format(base_url) + """).format(base_url) assert _openmp_parallelism_enabled(), err_msg diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 6ef0eaa433d20..ae5c2d9cd6953 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -112,8 +112,10 @@ def _sample_func(x, y=1): class_weight="balanced", warm_start=True, ), - "LogisticRegression(class_weight='balanced',random_state=1," - "solver='newton-cg',warm_start=True)", + ( + "LogisticRegression(class_weight='balanced',random_state=1," + "solver='newton-cg',warm_start=True)" + ), ), ], ) @@ -230,13 +232,11 @@ def test_all_tests_are_importable(): # Ensure that for each contentful subpackage, there is a test directory # within it that is also a subpackage (i.e. a directory with __init__.py) - HAS_TESTS_EXCEPTIONS = re.compile( - r"""(?x) + HAS_TESTS_EXCEPTIONS = re.compile(r"""(?x) \.externals(\.|$)| \.tests(\.|$)| \._ - """ - ) + """) resource_modules = { "sklearn.datasets.data", "sklearn.datasets.descr", diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index 8bf3e5dd7b24a..707f60ec7fd90 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -155,7 +155,6 @@ def test_docstring_parameters(): def test_tabs(): # Test that there are no tabs in our source files for importer, modname, ispkg in walk_packages(sklearn.__path__, prefix="sklearn."): - if IS_PYPY and ( "_svmlight_format_io" in modname or "feature_extraction._hashing_fast" in modname diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py index 1beabe15d5ff9..fd6b1108fe878 100644 --- a/sklearn/tests/test_dummy.py +++ b/sklearn/tests/test_dummy.py @@ -236,7 +236,6 @@ def test_classifier_prediction_independent_of_X(strategy, global_random_seed): def test_mean_strategy_regressor(global_random_seed): - random_state = np.random.RandomState(seed=global_random_seed) X = [[0]] * 4 # ignored @@ -248,7 +247,6 @@ def test_mean_strategy_regressor(global_random_seed): def test_mean_strategy_multioutput_regressor(global_random_seed): - random_state = np.random.RandomState(seed=global_random_seed) X_learn = random_state.randn(10, 10) @@ -276,7 +274,6 @@ def test_regressor_exceptions(): def test_median_strategy_regressor(global_random_seed): - random_state = np.random.RandomState(seed=global_random_seed) X = [[0]] * 5 # ignored @@ -288,7 +285,6 @@ def test_median_strategy_regressor(global_random_seed): def test_median_strategy_multioutput_regressor(global_random_seed): - random_state = np.random.RandomState(seed=global_random_seed) X_learn = random_state.randn(10, 10) @@ -310,7 +306,6 @@ def test_median_strategy_multioutput_regressor(global_random_seed): def test_quantile_strategy_regressor(global_random_seed): - random_state = np.random.RandomState(seed=global_random_seed) X = [[0]] * 5 # ignored @@ -334,7 +329,6 @@ def test_quantile_strategy_regressor(global_random_seed): def test_quantile_strategy_multioutput_regressor(global_random_seed): - random_state = np.random.RandomState(seed=global_random_seed) X_learn = random_state.randn(10, 10) @@ -368,7 +362,6 @@ def test_quantile_strategy_multioutput_regressor(global_random_seed): def test_quantile_invalid(): - X = [[0]] * 5 # ignored y = [0] * 5 # ignored @@ -387,7 +380,6 @@ def test_quantile_strategy_empty_train(): def test_constant_strategy_regressor(global_random_seed): - random_state = np.random.RandomState(seed=global_random_seed) X = [[0]] * 5 # ignored @@ -406,7 +398,6 @@ def test_constant_strategy_regressor(global_random_seed): def test_constant_strategy_multioutput_regressor(global_random_seed): - random_state = np.random.RandomState(seed=global_random_seed) X_learn = random_state.randn(10, 10) diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index b175275ea92dc..e04a92c22695d 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -268,17 +268,21 @@ def fit(self, X, y, sample_weight=None, check_input=True): if is_classification: max_features = max(1, int(np.sqrt(self.n_features_in_))) warnings.warn( - "`max_features='auto'` has been deprecated in 1.1 " - "and will be removed in 1.3. To keep the past behaviour, " - "explicitly set `max_features='sqrt'`.", + ( + "`max_features='auto'` has been deprecated in 1.1 " + "and will be removed in 1.3. To keep the past behaviour, " + "explicitly set `max_features='sqrt'`." + ), FutureWarning, ) else: max_features = self.n_features_in_ warnings.warn( - "`max_features='auto'` has been deprecated in 1.1 " - "and will be removed in 1.3. To keep the past behaviour, " - "explicitly set `max_features=1.0'`.", + ( + "`max_features='auto'` has been deprecated in 1.1 " + "and will be removed in 1.3. To keep the past behaviour, " + "explicitly set `max_features=1.0'`." + ), FutureWarning, ) elif self.max_features == "sqrt": diff --git a/sklearn/tree/_export.py b/sklearn/tree/_export.py index 3671c1a92334b..6b84bed891c18 100644 --- a/sklearn/tree/_export.py +++ b/sklearn/tree/_export.py @@ -417,7 +417,6 @@ def __init__( precision=3, fontname="helvetica", ): - super().__init__( max_depth=max_depth, feature_names=feature_names, @@ -526,7 +525,6 @@ def recurse(self, tree, node_id, criterion, parent=None, depth=0): # Add node with description if self.max_depth is None or depth <= self.max_depth: - # Collect ranks for 'leaf' option in plot_options if left_child == _tree.TREE_LEAF: self.ranks["leaves"].append(str(node_id)) @@ -603,7 +601,6 @@ def __init__( precision=3, fontsize=None, ): - super().__init__( max_depth=max_depth, feature_names=feature_names, diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py index 5b4d581951cac..8cdf28b8f7130 100644 --- a/sklearn/tree/tests/test_export.py +++ b/sklearn/tree/tests/test_export.py @@ -304,7 +304,6 @@ def test_friedman_mse_in_graphviz(): def test_precision(): - rng_reg = RandomState(2) rng_clf = RandomState(8) for X, y, clf in zip( @@ -317,7 +316,6 @@ def test_precision(): DecisionTreeClassifier(max_depth=1, random_state=0), ), ): - clf.fit(X, y) for precision in (4, 3): dot_data = export_graphviz( @@ -366,14 +364,12 @@ def test_export_text(): clf = DecisionTreeClassifier(max_depth=2, random_state=0) clf.fit(X, y) - expected_report = dedent( - """ + expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- class: -1 |--- feature_1 > 0.00 | |--- class: 1 - """ - ).lstrip() + """).lstrip() assert export_text(clf) == expected_report # testing that leaves at level 1 are not truncated @@ -381,58 +377,48 @@ def test_export_text(): # testing that the rest of the tree is truncated assert export_text(clf, max_depth=10) == expected_report - expected_report = dedent( - """ + expected_report = dedent(""" |--- b <= 0.00 | |--- class: -1 |--- b > 0.00 | |--- class: 1 - """ - ).lstrip() + """).lstrip() assert export_text(clf, feature_names=["a", "b"]) == expected_report - expected_report = dedent( - """ + expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- class: cat |--- feature_1 > 0.00 | |--- class: dog - """ - ).lstrip() + """).lstrip() assert export_text(clf, class_names=["cat", "dog"]) == expected_report - expected_report = dedent( - """ + expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- weights: [3.00, 0.00] class: -1 |--- feature_1 > 0.00 | |--- weights: [0.00, 3.00] class: 1 - """ - ).lstrip() + """).lstrip() assert export_text(clf, show_weights=True) == expected_report - expected_report = dedent( - """ + expected_report = dedent(""" |- feature_1 <= 0.00 | |- class: -1 |- feature_1 > 0.00 | |- class: 1 - """ - ).lstrip() + """).lstrip() assert export_text(clf, spacing=1) == expected_report X_l = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-1, 1]] y_l = [-1, -1, -1, 1, 1, 1, 2] clf = DecisionTreeClassifier(max_depth=4, random_state=0) clf.fit(X_l, y_l) - expected_report = dedent( - """ + expected_report = dedent(""" |--- feature_1 <= 0.00 | |--- class: -1 |--- feature_1 > 0.00 | |--- truncated branch of depth 2 - """ - ).lstrip() + """).lstrip() assert export_text(clf, max_depth=0) == expected_report X_mo = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] @@ -441,14 +427,12 @@ def test_export_text(): reg = DecisionTreeRegressor(max_depth=2, random_state=0) reg.fit(X_mo, y_mo) - expected_report = dedent( - """ + expected_report = dedent(""" |--- feature_1 <= 0.0 | |--- value: [-1.0, -1.0] |--- feature_1 > 0.0 | |--- value: [1.0, 1.0] - """ - ).lstrip() + """).lstrip() assert export_text(reg, decimals=1) == expected_report assert export_text(reg, decimals=1, show_weights=True) == expected_report @@ -456,14 +440,12 @@ def test_export_text(): reg = DecisionTreeRegressor(max_depth=2, random_state=0) reg.fit(X_single, y_mo) - expected_report = dedent( - """ + expected_report = dedent(""" |--- first <= 0.0 | |--- value: [-1.0, -1.0] |--- first > 0.0 | |--- value: [1.0, 1.0] - """ - ).lstrip() + """).lstrip() assert export_text(reg, decimals=1, feature_names=["first"]) == expected_report assert ( export_text(reg, decimals=1, show_weights=True, feature_names=["first"]) @@ -512,7 +494,6 @@ def test_plot_tree_gini(pyplot): def test_not_fitted_tree(pyplot): - # Testing if not fitted tree throws the correct error clf = DecisionTreeRegressor() with pytest.raises(NotFittedError): diff --git a/sklearn/utils/_estimator_html_repr.py b/sklearn/utils/_estimator_html_repr.py index 622137d4c256a..5cba23c45a4de 100644 --- a/sklearn/utils/_estimator_html_repr.py +++ b/sklearn/utils/_estimator_html_repr.py @@ -364,11 +364,7 @@ def _write_estimator_html( #$id div.sk-text-repr-fallback { display: none; } -""".replace( - " ", "" -).replace( - "\n", "" -) # noqa +""".replace(" ", "").replace("\n", "") # noqa def estimator_html_repr(estimator): diff --git a/sklearn/utils/_param_validation.py b/sklearn/utils/_param_validation.py index 4f3d793322990..abd02fc15c9d5 100644 --- a/sklearn/utils/_param_validation.py +++ b/sklearn/utils/_param_validation.py @@ -168,7 +168,6 @@ def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): - func_sig = signature(func) # Map *args/**kwargs to the function signature @@ -570,8 +569,10 @@ def is_satisfied_by(self, val): # TODO(1.4) remove support for Integral. if isinstance(val, Integral) and not isinstance(val, bool): warnings.warn( - "Passing an int for a boolean parameter is deprecated in version 1.2 " - "and won't be supported anymore in version 1.4.", + ( + "Passing an int for a boolean parameter is deprecated in version" + " 1.2 and won't be supported anymore in version 1.4." + ), FutureWarning, ) diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py index bdcee747129d9..dcf60fb257a27 100644 --- a/sklearn/utils/class_weight.py +++ b/sklearn/utils/class_weight.py @@ -147,7 +147,6 @@ def compute_sample_weight(class_weight, y, *, indices=None): expanded_class_weight = [] for k in range(n_outputs): - y_full = y[:, k] if sparse.issparse(y_full): # Ok to densify a single column at a time diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index ed7f325df5275..4575d29545735 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -275,7 +275,6 @@ def _yield_clustering_checks(clusterer): def _yield_outliers_checks(estimator): - # checks for the contamination parameter if hasattr(estimator, "contamination"): yield check_outlier_contamination @@ -1356,7 +1355,6 @@ def check_methods_subset_invariance(name, estimator_orig): "score_samples", "predict_proba", ]: - msg = ("{method} of {name} is not invariant when applied to a subset.").format( method=method, name=name ) @@ -1650,7 +1648,6 @@ def _check_transformer(name, transformer_orig, X, y): and X.ndim == 2 and X.shape[1] > 1 ): - # If it's not an array, it does not have a 'T' property with raises( ValueError, @@ -2947,7 +2944,6 @@ def check_regressors_no_decision_function(name, regressor_orig): @ignore_warnings(category=FutureWarning) def check_class_weight_classifiers(name, classifier_orig): - if _safe_tags(classifier_orig, key="binary_only"): problems = [2] else: @@ -3563,7 +3559,6 @@ def check_decision_proba_consistency(name, estimator_orig): estimator = clone(estimator_orig) if hasattr(estimator, "decision_function") and hasattr(estimator, "predict_proba"): - estimator.fit(X_train, y_train) # Since the link function from decision_function() to predict_proba() # is sometimes not precise enough (typically expit), we round to the @@ -3972,8 +3967,10 @@ def check_dataframe_column_names_consistency(name, estimator_orig): (names[::-1], "Feature names must be in the same order as they were in fit."), ( [f"another_prefix_{i}" for i in range(n_features)], - "Feature names unseen at fit time:\n- another_prefix_0\n-" - " another_prefix_1\n", + ( + "Feature names unseen at fit time:\n- another_prefix_0\n-" + " another_prefix_1\n" + ), ), ( names[:3], diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 49908fdf1083d..88c2e9fede3ba 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -42,8 +42,10 @@ def squared_norm(x): x = np.ravel(x, order="K") if np.issubdtype(x.dtype, np.integer): warnings.warn( - "Array type is integer, np.dot may overflow. " - "Data should be float type to avoid this issue", + ( + "Array type is integer, np.dot may overflow. " + "Data should be float type to avoid this issue" + ), UserWarning, ) return np.dot(x, x) @@ -141,8 +143,10 @@ def density(w, **kwargs): """ if kwargs: warnings.warn( - "Additional keyword arguments are deprecated in version 1.2 and will be" - " removed in version 1.4.", + ( + "Additional keyword arguments are deprecated in version 1.2 and will be" + " removed in version 1.4." + ), FutureWarning, ) @@ -1143,8 +1147,10 @@ def stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08): ) ): warnings.warn( - "cumsum was found to be unstable: " - "its last element does not correspond to sum", + ( + "cumsum was found to be unstable: " + "its last element does not correspond to sum" + ), RuntimeWarning, ) return out diff --git a/sklearn/utils/parallel.py b/sklearn/utils/parallel.py index 48a31ee93d8a0..b0f65b9a0c1c7 100644 --- a/sklearn/utils/parallel.py +++ b/sklearn/utils/parallel.py @@ -15,10 +15,12 @@ def _with_config(delayed_func, config): return delayed_func.with_config(config) else: warnings.warn( - "`sklearn.utils.parallel.Parallel` needs to be used in " - "conjunction with `sklearn.utils.parallel.delayed` instead of " - "`joblib.delayed` to correctly propagate the scikit-learn " - "configuration to the joblib workers.", + ( + "`sklearn.utils.parallel.Parallel` needs to be used in " + "conjunction with `sklearn.utils.parallel.delayed` instead of " + "`joblib.delayed` to correctly propagate the scikit-learn " + "configuration to the joblib workers." + ), UserWarning, ) return delayed_func @@ -112,10 +114,12 @@ def __call__(self, *args, **kwargs): config = getattr(self, "config", None) if config is None: warnings.warn( - "`sklearn.utils.parallel.delayed` should be used with " - "`sklearn.utils.parallel.Parallel` to make it possible to propagate " - "the scikit-learn configuration of the current thread to the " - "joblib workers.", + ( + "`sklearn.utils.parallel.delayed` should be used with" + " `sklearn.utils.parallel.Parallel` to make it possible to" + " propagate the scikit-learn configuration of the current thread to" + " the joblib workers." + ), UserWarning, ) config = {} diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py index b80b9569a7a32..d69e7ddbca7f3 100644 --- a/sklearn/utils/sparsefuncs.py +++ b/sklearn/utils/sparsefuncs.py @@ -618,7 +618,6 @@ def csc_median_axis_0(X): median = np.zeros(n_features) for f_ind, (start, end) in enumerate(zip(indptr[:-1], indptr[1:])): - # Prevent modifying X in place data = np.copy(X.data[start:end]) nz = n_samples - data.size diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index bdfb06a61c6fd..894b6a03a1abd 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -701,7 +701,6 @@ def test_incremental_weighted_mean_and_variance_simple(rng, dtype): def test_incremental_weighted_mean_and_variance( mean, var, weight_loc, weight_scale, rng ): - # Testing of correctness and numerical stability def _assert(X, sample_weight, expected_mean, expected_var): n = X.shape[0] diff --git a/sklearn/utils/tests/test_pprint.py b/sklearn/utils/tests/test_pprint.py index 420d486840f1f..a4aaa8f21b6b7 100644 --- a/sklearn/utils/tests/test_pprint.py +++ b/sklearn/utils/tests/test_pprint.py @@ -14,6 +14,7 @@ # Ignore flake8 (lots of line too long issues) # flake8: noqa + # Constructors excerpted to test pprinting class LogisticRegression(BaseEstimator): def __init__( @@ -438,7 +439,6 @@ def test_gridsearch_pipeline(print_changed_only_false): def test_n_max_elements_to_show(print_changed_only_false): - n_max_elements_to_show = 30 pp = _EstimatorPrettyPrinter( compact=True, diff --git a/sklearn/utils/tests/test_random.py b/sklearn/utils/tests/test_random.py index 320ebe8b1ae65..192d112337439 100644 --- a/sklearn/utils/tests/test_random.py +++ b/sklearn/utils/tests/test_random.py @@ -34,7 +34,6 @@ def sample_without_replacement_method( def check_edge_case_of_sample_int(sample_without_replacement): - # n_population < n_sample with pytest.raises(ValueError): sample_without_replacement(0, 1) @@ -94,9 +93,9 @@ def check_sample_int_distribution(sample_without_replacement): output = {} for i in range(n_trials): - output[ - frozenset(sample_without_replacement(n_population, n_samples)) - ] = None + output[frozenset(sample_without_replacement(n_population, n_samples))] = ( + None + ) if len(output) == n_expected: break diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py index 70cb3257c26d5..5875eb96bfd8e 100644 --- a/sklearn/utils/tests/test_testing.py +++ b/sklearn/utils/tests/test_testing.py @@ -452,19 +452,27 @@ def test_check_docstring_parameters(): mock_meta = MockMetaEstimator(delegate=MockEst()) mock_meta_name = mock_meta.__class__.__name__ assert incorrect == [ - "sklearn.utils.tests.test_testing.f_check_param_definition There " - "was no space between the param name and colon ('a: int')", - "sklearn.utils.tests.test_testing.f_check_param_definition There " - "was no space between the param name and colon ('b:')", - "sklearn.utils.tests.test_testing.f_check_param_definition There " - "was no space between the param name and colon ('d:int')", + ( + "sklearn.utils.tests.test_testing.f_check_param_definition There " + "was no space between the param name and colon ('a: int')" + ), + ( + "sklearn.utils.tests.test_testing.f_check_param_definition There " + "was no space between the param name and colon ('b:')" + ), + ( + "sklearn.utils.tests.test_testing.f_check_param_definition There " + "was no space between the param name and colon ('d:int')" + ), ] messages = [ [ "In function: sklearn.utils.tests.test_testing.f_bad_order", - "There's a parameter name mismatch in function docstring w.r.t." - " function signature, at index 0 diff: 'b' != 'a'", + ( + "There's a parameter name mismatch in function docstring w.r.t." + " function signature, at index 0 diff: 'b' != 'a'" + ), "Full diff:", "- ['b', 'a']", "+ ['a', 'b']", @@ -472,8 +480,10 @@ def test_check_docstring_parameters(): [ "In function: " + "sklearn.utils.tests.test_testing.f_too_many_param_docstring", - "Parameters in function docstring have more items w.r.t. function" - " signature, first extra item: c", + ( + "Parameters in function docstring have more items w.r.t. function" + " signature, first extra item: c" + ), "Full diff:", "- ['a', 'b']", "+ ['a', 'b', 'c']", @@ -481,16 +491,20 @@ def test_check_docstring_parameters(): ], [ "In function: sklearn.utils.tests.test_testing.f_missing", - "Parameters in function docstring have less items w.r.t. function" - " signature, first missing item: b", + ( + "Parameters in function docstring have less items w.r.t. function" + " signature, first missing item: b" + ), "Full diff:", "- ['a', 'b']", "+ ['a']", ], [ "In function: sklearn.utils.tests.test_testing.Klass.f_missing", - "Parameters in function docstring have less items w.r.t. function" - " signature, first missing item: X", + ( + "Parameters in function docstring have less items w.r.t. function" + " signature, first missing item: X" + ), "Full diff:", "- ['X', 'y']", "+ []", @@ -498,8 +512,10 @@ def test_check_docstring_parameters(): [ "In function: " + f"sklearn.utils.tests.test_testing.{mock_meta_name}.predict", - "There's a parameter name mismatch in function docstring w.r.t." - " function signature, at index 0 diff: 'X' != 'y'", + ( + "There's a parameter name mismatch in function docstring w.r.t." + " function signature, at index 0 diff: 'X' != 'y'" + ), "Full diff:", "- ['X']", "? ^", @@ -523,8 +539,10 @@ def test_check_docstring_parameters(): ], [ "In function: " + f"sklearn.utils.tests.test_testing.{mock_meta_name}.fit", - "Parameters in function docstring have less items w.r.t. function" - " signature, first missing item: X", + ( + "Parameters in function docstring have less items w.r.t. function" + " signature, first missing item: X" + ), "Full diff:", "- ['X', 'y']", "+ []", diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index a000394bbee28..2365a587f3b72 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -710,7 +710,6 @@ def dummy_func(): def test_deprecation_joblib_api(tmpdir): - # Only parallel_backend and register_parallel_backend are not deprecated in # sklearn.utils from sklearn.utils import parallel_backend, register_parallel_backend diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 7baf8856f3876..53b77a6119d98 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -78,9 +78,11 @@ def inner_f(*args, **kwargs): ] args_msg = ", ".join(args_msg) warnings.warn( - f"Pass {args_msg} as keyword args. From version " - f"{version} passing these as positional arguments " - "will result in an error", + ( + f"Pass {args_msg} as keyword args. From version " + f"{version} passing these as positional arguments " + "will result in an error" + ), FutureWarning, ) kwargs.update(zip(sig.parameters, args)) @@ -648,7 +650,6 @@ def check_array( estimator=None, input_name="", ): - """Input validation on an array, list, sparse matrix or similar. By default, the input is checked to be a non-empty 2D array containing @@ -1209,9 +1210,11 @@ def column_or_1d(y, *, dtype=None, warn=False): if len(shape) == 2 and shape[1] == 1: if warn: warnings.warn( - "A column-vector y was passed when a 1d array was" - " expected. Please change the shape of y to " - "(n_samples, ), for example using ravel().", + ( + "A column-vector y was passed when a 1d array was" + " expected. Please change the shape of y to " + "(n_samples, ), for example using ravel()." + ), DataConversionWarning, stacklevel=2, ) @@ -1324,8 +1327,10 @@ def check_symmetric(array, *, tol=1e-10, raise_warning=True, raise_exception=Fal raise ValueError("Array must be symmetric") if raise_warning: warnings.warn( - "Array is not symmetric, and will be converted " - "to symmetric by average with its transpose.", + ( + "Array is not symmetric, and will be converted " + "to symmetric by average with its transpose." + ), stacklevel=2, ) if sp.issparse(array):