diff --git a/.circleci/config.yml b/.circleci/config.yml index e0ec9a85978f2..bd4914056fe10 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,7 @@ jobs: command: | source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint + pip install pytest $(get_dep ruff min) $(get_dep mypy min) cython-lint - run: name: linting command: ./build_tools/linting.sh diff --git a/.github/workflows/arm-unit-tests.yml b/.github/workflows/arm-unit-tests.yml index 1702177b7a718..e7636d55d7945 100644 --- a/.github/workflows/arm-unit-tests.yml +++ b/.github/workflows/arm-unit-tests.yml @@ -27,7 +27,7 @@ jobs: run: | source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint + pip install pytest $(get_dep ruff min) $(get_dep mypy min) cython-lint - name: Run linters run: ./build_tools/linting.sh - name: Run Meson OpenMP checks diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 0ef75cdcce660..9fe670caef441 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -34,11 +34,10 @@ jobs: curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/shared.sh --retry 5 -o ./build_tools/shared.sh source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint + pip install pytest $(get_dep ruff min) $(get_dep mypy min) # we save the versions of the linters to be used in the error message later. python -c "from importlib.metadata import version; print(f\"ruff={version('ruff')}\")" >> /tmp/versions.txt python -c "from importlib.metadata import version; print(f\"mypy={version('mypy')}\")" >> /tmp/versions.txt - python -c "from importlib.metadata import version; print(f\"black={version('black')}\")" >> /tmp/versions.txt python -c "from importlib.metadata import version; print(f\"cython-lint={version('cython-lint')}\")" >> /tmp/versions.txt - name: Run linting diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 98e902e622822..42f2445728028 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,14 +7,11 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.0 + rev: v0.11.2 hooks: - id: ruff args: ["--fix", "--output-format=full"] -- repo: https://github.com/psf/black - rev: 24.3.0 - hooks: - - id: black + - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.15.0 hooks: diff --git a/README.rst b/README.rst index 031b724b5545c..4f4741a090dee 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ .. -*- mode: rst -*- -|Azure| |Codecov| |CircleCI| |Nightly wheels| |Black| |PythonVersion| |PyPi| |DOI| |Benchmark| +|Azure| |Codecov| |CircleCI| |Nightly wheels| |Ruff| |PythonVersion| |PyPi| |DOI| |Benchmark| .. |Azure| image:: https://dev.azure.com/scikit-learn/scikit-learn/_apis/build/status/scikit-learn.scikit-learn?branchName=main :target: https://dev.azure.com/scikit-learn/scikit-learn/_build/latest?definitionId=1&branchName=main @@ -14,15 +14,15 @@ .. |Nightly wheels| image:: https://github.com/scikit-learn/scikit-learn/workflows/Wheel%20builder/badge.svg?event=schedule :target: https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Wheel+builder%22+event%3Aschedule +.. |Ruff| image:: https://img.shields.io/badge/code%20style-ruff-000000.svg + :target: https://github.com/astral-sh/ruff + .. |PythonVersion| image:: https://img.shields.io/pypi/pyversions/scikit-learn.svg :target: https://pypi.org/project/scikit-learn/ .. |PyPi| image:: https://img.shields.io/pypi/v/scikit-learn :target: https://pypi.org/project/scikit-learn -.. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg - :target: https://github.com/psf/black - .. |DOI| image:: https://zenodo.org/badge/21369/scikit-learn/scikit-learn.svg :target: https://zenodo.org/badge/latestdoi/21369/scikit-learn/scikit-learn diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2caa7846994d6..c4d856e42b6b8 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -35,7 +35,7 @@ jobs: - bash: | source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint + pip install pytest $(get_dep ruff min) $(get_dep mypy min) cython-lint displayName: Install linters - bash: | ./build_tools/linting.sh diff --git a/benchmarks/bench_hist_gradient_boosting_adult.py b/benchmarks/bench_hist_gradient_boosting_adult.py index 97c762e8e9230..4d5ce48cded81 100644 --- a/benchmarks/bench_hist_gradient_boosting_adult.py +++ b/benchmarks/bench_hist_gradient_boosting_adult.py @@ -46,7 +46,7 @@ def predict(est, data_test, target_test): toc = time() roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1]) acc = accuracy_score(target_test, predicted_test) - print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}") + print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc:.4f}") data = fetch_openml(data_id=179, as_frame=True) # adult dataset diff --git a/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/benchmarks/bench_hist_gradient_boosting_higgsboson.py index 20057c50dc810..ceab576bc0a52 100644 --- a/benchmarks/bench_hist_gradient_boosting_higgsboson.py +++ b/benchmarks/bench_hist_gradient_boosting_higgsboson.py @@ -74,7 +74,7 @@ def predict(est, data_test, target_test): toc = time() roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1]) acc = accuracy_score(target_test, predicted_test) - print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}") + print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc:.4f}") df = load_data() diff --git a/build_tools/get_comment.py b/build_tools/get_comment.py index b47a29e065619..48ff14a058c9a 100644 --- a/build_tools/get_comment.py +++ b/build_tools/get_comment.py @@ -55,9 +55,7 @@ def get_step_message(log, start, end, title, message, details): if end not in log: return "" res = ( - "-----------------------------------------------\n" - f"### {title}\n\n" - f"{message}\n\n" + f"-----------------------------------------------\n### {title}\n\n{message}\n\n" ) if details: res += ( @@ -92,33 +90,31 @@ def get_message(log_file, repo, pr_number, sha, run_id, details, versions): message = "" - # black + # ruff check message += get_step_message( log, - start="### Running black ###", - end="Problems detected by black", - title="`black`", + start="### Running the ruff linter ###", + end="Problems detected by ruff check", + title="`ruff check`", message=( - "`black` detected issues. Please run `black .` locally and push " - "the changes. Here you can see the detected issues. Note that " - "running black might also fix some of the issues which might be " - "detected by `ruff`. Note that the installed `black` version is " - f"`black={versions['black']}`." + "`ruff` detected issues. Please run " + "`ruff check --fix --output-format=full` locally, fix the remaining " + "issues, and push the changes. Here you can see the detected issues. Note " + f"that the installed `ruff` version is `ruff={versions['ruff']}`." ), details=details, ) - # ruff + # ruff format message += get_step_message( log, - start="### Running ruff ###", - end="Problems detected by ruff", - title="`ruff`", + start="### Running the ruff formatter ###", + end="Problems detected by ruff format", + title="`ruff format`", message=( - "`ruff` detected issues. Please run " - "`ruff check --fix --output-format=full` locally, fix the remaining " - "issues, and push the changes. Here you can see the detected issues. Note " - f"that the installed `ruff` version is `ruff={versions['ruff']}`." + "`ruff` detected issues. Please run `ruff format` locally and push " + "the changes. Here you can see the detected issues. Note that the " + f"installed `ruff` version is `ruff={versions['ruff']}`." ), details=details, ) @@ -239,7 +235,7 @@ def get_headers(token): def find_lint_bot_comments(repo, token, pr_number): """Get the comment from the linting bot.""" # repo is in the form of "org/repo" - # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments # noqa + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments response = requests.get( f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", headers=get_headers(token), @@ -274,7 +270,7 @@ def create_or_update_comment(comment, message, repo, pr_number, token): # repo is in the form of "org/repo" if comment is not None: print("updating existing comment") - # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#update-an-issue-comment # noqa + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#update-an-issue-comment response = requests.patch( f"https://api.github.com/repos/{repo}/issues/comments/{comment['id']}", headers=get_headers(token), @@ -282,7 +278,7 @@ def create_or_update_comment(comment, message, repo, pr_number, token): ) else: print("creating new comment") - # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#create-an-issue-comment # noqa + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#create-an-issue-comment response = requests.post( f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", headers=get_headers(token), diff --git a/build_tools/linting.sh b/build_tools/linting.sh index 67450ad8bed74..34b37530e10ff 100755 --- a/build_tools/linting.sh +++ b/build_tools/linting.sh @@ -10,26 +10,25 @@ set -o pipefail global_status=0 -echo -e "### Running black ###\n" -black --check --diff . +echo -e "### Running the ruff linter ###\n" +ruff check --output-format=full status=$? - if [[ $status -eq 0 ]] then - echo -e "No problem detected by black\n" + echo -e "No problem detected by the ruff linter\n" else - echo -e "Problems detected by black, please run black and commit the result\n" + echo -e "Problems detected by ruff check, please fix them\n" global_status=1 fi -echo -e "### Running ruff ###\n" -ruff check --output-format=full +echo -e "### Running the ruff formatter ###\n" +ruff format --diff status=$? if [[ $status -eq 0 ]] then - echo -e "No problem detected by ruff\n" + echo -e "No problem detected by the ruff formatter\n" else - echo -e "Problems detected by ruff, please fix them\n" + echo -e "Problems detected by ruff format, please run ruff format and commit the result\n" global_status=1 fi diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst index 49ec027be1388..34e8e6d3e2aca 100644 --- a/doc/developers/contributing.rst +++ b/doc/developers/contributing.rst @@ -269,7 +269,7 @@ how to set up your git repository: .. prompt:: bash - pip install pytest pytest-cov ruff mypy numpydoc black==24.3.0 + pip install pytest pytest-cov ruff==0.11.2 mypy numpydoc .. _upstream: @@ -1565,7 +1565,7 @@ make this task easier and faster (in no particular order). variable) in the code base. - Configure `git blame` to ignore the commit that migrated the code style to - `black`. + `black` and then `ruff`. .. prompt:: bash diff --git a/examples/applications/plot_species_distribution_modeling.py b/examples/applications/plot_species_distribution_modeling.py index 5b0d30bc4c8bf..e3120c2cbd19a 100644 --- a/examples/applications/plot_species_distribution_modeling.py +++ b/examples/applications/plot_species_distribution_modeling.py @@ -109,7 +109,7 @@ def create_species_bunch(species_name, train, test, coverages, xgrid, ygrid): def plot_species_distribution( - species=("bradypus_variegatus_0", "microryzomys_minutus_0") + species=("bradypus_variegatus_0", "microryzomys_minutus_0"), ): """ Plot the species distribution. diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py index f2eb039e35fe0..7c5b75e12ccfd 100644 --- a/examples/applications/plot_time_series_lagged_features.py +++ b/examples/applications/plot_time_series_lagged_features.py @@ -265,7 +265,7 @@ def consolidate_scores(cv_results, scores, metric): time = cv_results["fit_time"] scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s") - scores["loss"].append(f"quantile {int(quantile*100)}") + scores["loss"].append(f"quantile {int(quantile * 100)}") for key, value in cv_results.items(): if key.startswith("test_"): metric = key.split("test_")[1] diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py index faeef5ae15a11..a6f774d01e2de 100644 --- a/examples/applications/plot_topics_extraction_with_nmf_lda.py +++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py @@ -50,7 +50,7 @@ def plot_top_words(model, feature_names, n_top_words, title): ax = axes[topic_idx] ax.barh(top_features, weights, height=0.7) - ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 30}) + ax.set_title(f"Topic {topic_idx + 1}", fontdict={"fontsize": 30}) ax.tick_params(axis="both", which="major", labelsize=20) for i in "top right left".split(): ax.spines[i].set_visible(False) diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py index a1507c3ef162e..99ae29ceeb106 100644 --- a/examples/covariance/plot_mahalanobis_distances.py +++ b/examples/covariance/plot_mahalanobis_distances.py @@ -60,7 +60,7 @@ Proceedings of the National Academy of Sciences of the United States of America, 17, 684-688. -""" # noqa: E501 +""" # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py index e1b37c03360f6..72134841c78ea 100644 --- a/examples/ensemble/plot_bias_variance.py +++ b/examples/ensemble/plot_bias_variance.py @@ -177,8 +177,8 @@ def generate(n_samples, noise, n_repeat=1): plt.subplot(2, n_estimators, n_estimators + n + 1) plt.plot(X_test, y_error, "r", label="$error(x)$") - plt.plot(X_test, y_bias, "b", label="$bias^2(x)$"), - plt.plot(X_test, y_var, "g", label="$variance(x)$"), + plt.plot(X_test, y_bias, "b", label="$bias^2(x)$") + plt.plot(X_test, y_var, "g", label="$variance(x)$") plt.plot(X_test, y_noise, "c", label="$noise(x)$") plt.xlim([-5, 5]) diff --git a/examples/feature_selection/plot_rfe_digits.py b/examples/feature_selection/plot_rfe_digits.py index 360a9bd92837f..749cb52e4a72d 100644 --- a/examples/feature_selection/plot_rfe_digits.py +++ b/examples/feature_selection/plot_rfe_digits.py @@ -16,7 +16,7 @@ See also :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py` -""" # noqa: E501 +""" # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/feature_selection/plot_select_from_model_diabetes.py b/examples/feature_selection/plot_select_from_model_diabetes.py index 793a6916e8969..6c3f32d07cfb0 100644 --- a/examples/feature_selection/plot_select_from_model_diabetes.py +++ b/examples/feature_selection/plot_select_from_model_diabetes.py @@ -40,7 +40,7 @@ # were already standardized. # For a more complete example on the interpretations of the coefficients of # linear models, you may refer to -# :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`. # noqa: E501 +# :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`. import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py index 3acc2b5f1472f..ea2365a71d48a 100644 --- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py +++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py @@ -606,8 +606,9 @@ def score_estimator( "predicted, frequency*severity model": np.sum( exposure * glm_freq.predict(X) * glm_sev.predict(X) ), - "predicted, tweedie, power=%.2f" - % glm_pure_premium.power: np.sum(exposure * glm_pure_premium.predict(X)), + "predicted, tweedie, power=%.2f" % glm_pure_premium.power: np.sum( + exposure * glm_pure_premium.predict(X) + ), } ) diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py index 34b221ca0cd1d..45298c944aaee 100644 --- a/examples/manifold/plot_lle_digits.py +++ b/examples/manifold/plot_lle_digits.py @@ -10,7 +10,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - # %% # Load digits dataset # ------------------- diff --git a/examples/manifold/plot_manifold_sphere.py b/examples/manifold/plot_manifold_sphere.py index 7c666c4b7fb7b..d52d99be4d087 100644 --- a/examples/manifold/plot_manifold_sphere.py +++ b/examples/manifold/plot_manifold_sphere.py @@ -50,7 +50,7 @@ t = random_state.rand(n_samples) * np.pi # Sever the poles from the sphere. -indices = (t < (np.pi - (np.pi / 8))) & (t > ((np.pi / 8))) +indices = (t < (np.pi - (np.pi / 8))) & (t > (np.pi / 8)) colors = p[indices] x, y, z = ( np.sin(t[indices]) * np.cos(p[indices]), diff --git a/examples/miscellaneous/plot_partial_dependence_visualization_api.py b/examples/miscellaneous/plot_partial_dependence_visualization_api.py index 8c98b40816496..f941505733579 100644 --- a/examples/miscellaneous/plot_partial_dependence_visualization_api.py +++ b/examples/miscellaneous/plot_partial_dependence_visualization_api.py @@ -11,7 +11,7 @@ See also :ref:`sphx_glr_auto_examples_miscellaneous_plot_roc_curve_visualization_api.py` -""" # noqa: E501 +""" # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py index 24a8f2ef1759e..e4c1a6662ffa3 100644 --- a/examples/model_selection/plot_likelihood_ratios.py +++ b/examples/model_selection/plot_likelihood_ratios.py @@ -40,7 +40,7 @@ class proportion than the target application. from sklearn.datasets import make_classification X, y = make_classification(n_samples=10_000, weights=[0.9, 0.1], random_state=0) -print(f"Percentage of people carrying the disease: {100*y.mean():.2f}%") +print(f"Percentage of people carrying the disease: {100 * y.mean():.2f}%") # %% # A machine learning model is built to diagnose if a person with some given diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py index 1fc2dedf2943e..a482ad5f4ab95 100644 --- a/examples/model_selection/plot_roc.py +++ b/examples/model_selection/plot_roc.py @@ -152,9 +152,9 @@ # # We can briefly demo the effect of :func:`numpy.ravel`: -print(f"y_score:\n{y_score[0:2,:]}") +print(f"y_score:\n{y_score[0:2, :]}") print() -print(f"y_score.ravel():\n{y_score[0:2,:].ravel()}") +print(f"y_score.ravel():\n{y_score[0:2, :].ravel()}") # %% # In a multi-class classification setup with highly imbalanced classes, @@ -359,7 +359,7 @@ plt.plot( fpr_grid, mean_tpr[ix], - label=f"Mean {label_a} vs {label_b} (AUC = {mean_score :.2f})", + label=f"Mean {label_a} vs {label_b} (AUC = {mean_score:.2f})", linestyle=":", linewidth=4, ) diff --git a/examples/text/plot_document_classification_20newsgroups.py b/examples/text/plot_document_classification_20newsgroups.py index aa80b7c1b630b..ce11377e7531f 100644 --- a/examples/text/plot_document_classification_20newsgroups.py +++ b/examples/text/plot_document_classification_20newsgroups.py @@ -356,7 +356,7 @@ def benchmark(clf, custom_name=False): # Notice that the most important hyperparameters values were tuned using a grid # search procedure not shown in this notebook for the sake of simplicity. See # the example script -# :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` # noqa: E501 +# :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` # for a demo on how such tuning can be done. from sklearn.ensemble import RandomForestClassifier diff --git a/maint_tools/bump-dependencies-versions.py b/maint_tools/bump-dependencies-versions.py index 1ae1f69be2720..58be1816f71a3 100644 --- a/maint_tools/bump-dependencies-versions.py +++ b/maint_tools/bump-dependencies-versions.py @@ -43,7 +43,7 @@ def get_min_version_with_wheel(package_name, python_version): for file_info in release_info: if ( file_info["packagetype"] == "bdist_wheel" - and f'cp{python_version.replace(".", "")}' in file_info["filename"] + and f"cp{python_version.replace('.', '')}" in file_info["filename"] and not file_info["yanked"] ): compatible_versions.append(ver) diff --git a/pyproject.toml b/pyproject.toml index 6aa9c81bfaca9..1ba3ba2255af4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,8 +83,7 @@ tests = [ "pandas>=1.4.0", "pytest>=7.1.2", "pytest-cov>=2.9.0", - "ruff>=0.11.0", - "black>=24.3.0", + "ruff>=0.11.2", "mypy>=1.15", "pyamg>=4.2.1", "polars>=0.20.30", @@ -112,36 +111,19 @@ addopts = [ "--color=yes", ] -[tool.black] -line-length = 88 -target-version = ['py310', 'py311'] -preview = true -exclude = ''' -/( - \.eggs # exclude a few common directories in the - | \.git # root of the project - | \.mypy_cache - | \.vscode - | build - | dist - | doc/_build - | doc/auto_examples - | sklearn/externals - | asv_benchmarks/env -)/ -''' - [tool.ruff] -# max line length for black line-length = 88 exclude=[ + ".eggs", ".git", + ".mypy_cache", + ".vscode", "__pycache__", + "build", "dist", "sklearn/externals", "doc/_build", "doc/auto_examples", - "build", "asv_benchmarks/env", "asv_benchmarks/html", "asv_benchmarks/results", @@ -154,10 +136,8 @@ preview = true # This enables us to use the explicit preview rules that we want only explicit-preview-rules = true # all rules can be found here: https://docs.astral.sh/ruff/rules/ -select = ["E", "F", "W", "I", "CPY001", "RUF"] +extend-select = ["W", "I", "CPY001", "RUF"] ignore=[ - # space before : (needed for how black formats slicing) - "E203", # do not assign a lambda expression, use a def "E731", # do not use variables named 'l', 'O', or 'I' @@ -176,6 +156,19 @@ ignore=[ "RUF012", "RUF015", "RUF021", + # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules + "W191", + "E111", + "E114", + "E117", + "D206", + "D300", + "Q000", + "Q001", + "Q002", + "Q003", + "COM812", + "COM819", ] [tool.ruff.lint.flake8-copyright] @@ -217,8 +210,6 @@ follow_imports = "skip" ignore = [ # multiple spaces/tab after comma 'E24', - # space before : (needed for how black formats slicing) - 'E203', # line too long 'E501', # do not assign a lambda expression, use a def diff --git a/sklearn/_loss/tests/test_loss.py b/sklearn/_loss/tests/test_loss.py index 99a89b6226aec..810ca4bde6869 100644 --- a/sklearn/_loss/tests/test_loss.py +++ b/sklearn/_loss/tests/test_loss.py @@ -203,7 +203,8 @@ def test_loss_boundary(loss): @pytest.mark.parametrize( - "loss, y_true_success, y_true_fail", Y_COMMON_PARAMS + Y_TRUE_PARAMS # type: ignore[operator] + "loss, y_true_success, y_true_fail", + Y_COMMON_PARAMS + Y_TRUE_PARAMS, # type: ignore[operator] ) def test_loss_boundary_y_true(loss, y_true_success, y_true_fail): """Test boundaries of y_true for loss functions.""" @@ -214,7 +215,8 @@ def test_loss_boundary_y_true(loss, y_true_success, y_true_fail): @pytest.mark.parametrize( - "loss, y_pred_success, y_pred_fail", Y_COMMON_PARAMS + Y_PRED_PARAMS # type: ignore[operator] + "loss, y_pred_success, y_pred_fail", + Y_COMMON_PARAMS + Y_PRED_PARAMS, # type: ignore[operator] ) def test_loss_boundary_y_pred(loss, y_pred_success, y_pred_fail): """Test boundaries of y_pred for loss functions.""" @@ -502,7 +504,7 @@ def test_loss_same_as_C_functions(loss, sample_weight): raw_prediction=raw_prediction, sample_weight=sample_weight, loss_out=out_l2, - ), + ) assert_allclose(out_l1, out_l2) loss.gradient( y_true=y_true, diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index 03fd53d047249..7e7229d6350e5 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -32,8 +32,7 @@ "memory_profiler": ("0.57.0", "benchmark, docs"), "pytest": (PYTEST_MIN_VERSION, "tests"), "pytest-cov": ("2.9.0", "tests"), - "ruff": ("0.11.0", "tests"), - "black": ("24.3.0", "tests"), + "ruff": ("0.11.2", "tests"), "mypy": ("1.15", "tests"), "pyamg": ("4.2.1", "tests"), "polars": ("0.20.30", "docs, tests"), diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 3471329cb1472..cbde0e37de824 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -6,7 +6,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import numpy as np from scipy.sparse import issparse diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py index c107a6a1a76dd..7e516d71b6f98 100644 --- a/sklearn/cross_decomposition/tests/test_pls.py +++ b/sklearn/cross_decomposition/tests/test_pls.py @@ -404,12 +404,12 @@ def test_copy(Est): X_orig = X.copy() with pytest.raises(AssertionError): - pls.transform(X, y, copy=False), + pls.transform(X, y, copy=False) assert_array_almost_equal(X, X_orig) X_orig = X.copy() with pytest.raises(AssertionError): - pls.predict(X, copy=False), + pls.predict(X, copy=False) assert_array_almost_equal(X, X_orig) # Make sure copy=True gives same transform and predictions as predict=False diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py index b12af847c0cda..d2b170e62c99a 100644 --- a/sklearn/datasets/tests/test_openml.py +++ b/sklearn/datasets/tests/test_openml.py @@ -105,9 +105,9 @@ def _file_name(url, suffix): ) def _mock_urlopen_shared(url, has_gzip_header, expected_prefix, suffix): - assert url.startswith( - expected_prefix - ), f"{expected_prefix!r} does not match {url!r}" + assert url.startswith(expected_prefix), ( + f"{expected_prefix!r} does not match {url!r}" + ) data_file_name = _file_name(url, suffix) data_file_path = resources.files(data_module) / data_file_name @@ -141,7 +141,7 @@ def _mock_urlopen_download_data(url, has_gzip_header): # For simplicity the mock filenames don't contain the filename, i.e. # the last part of the data description url after the last /. # For example for id_1, data description download url is: - # gunzip -c sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz | grep '"url" # noqa: E501 + # gunzip -c sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz | grep '"url" # "https:\/\/www.openml.org\/data\/v1\/download\/1\/anneal.arff" # but the mock filename does not contain anneal.arff and is: # sklearn/datasets/tests/data/openml/id_1/data-v1-dl-1.arff.gz. @@ -156,9 +156,9 @@ def _mock_urlopen_download_data(url, has_gzip_header): ) def _mock_urlopen_data_list(url, has_gzip_header): - assert url.startswith( - url_prefix_data_list - ), f"{url_prefix_data_list!r} does not match {url!r}" + assert url.startswith(url_prefix_data_list), ( + f"{url_prefix_data_list!r} does not match {url!r}" + ) data_file_name = _file_name(url, ".json") data_file_path = resources.files(data_module) / data_file_name diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 5f1fddee0dacd..0ee19102d8de4 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -138,17 +138,17 @@ def test_make_classification_informative_features(): signs = signs.view(dtype="|S{0}".format(signs.strides[0])).ravel() unique_signs, cluster_index = np.unique(signs, return_inverse=True) - assert ( - len(unique_signs) == n_clusters - ), "Wrong number of clusters, or not in distinct quadrants" + assert len(unique_signs) == n_clusters, ( + "Wrong number of clusters, or not in distinct quadrants" + ) clusters_by_class = defaultdict(set) for cluster, cls in zip(cluster_index, y): clusters_by_class[cls].add(cluster) for clusters in clusters_by_class.values(): - assert ( - len(clusters) == n_clusters_per_class - ), "Wrong number of clusters per class" + assert len(clusters) == n_clusters_per_class, ( + "Wrong number of clusters per class" + ) assert len(clusters_by_class) == n_classes, "Wrong number of classes" assert_array_almost_equal( @@ -412,9 +412,9 @@ def test_make_blobs_n_samples_list(): X, y = make_blobs(n_samples=n_samples, n_features=2, random_state=0) assert X.shape == (sum(n_samples), 2), "X shape mismatch" - assert all( - np.bincount(y, minlength=len(n_samples)) == n_samples - ), "Incorrect number of samples per blob" + assert all(np.bincount(y, minlength=len(n_samples)) == n_samples), ( + "Incorrect number of samples per blob" + ) def test_make_blobs_n_samples_list_with_centers(): @@ -426,9 +426,9 @@ def test_make_blobs_n_samples_list_with_centers(): ) assert X.shape == (sum(n_samples), 2), "X shape mismatch" - assert all( - np.bincount(y, minlength=len(n_samples)) == n_samples - ), "Incorrect number of samples per blob" + assert all(np.bincount(y, minlength=len(n_samples)) == n_samples), ( + "Incorrect number of samples per blob" + ) for i, (ctr, std) in enumerate(zip(centers, cluster_stds)): assert_almost_equal((X[y == i] - ctr).std(), std, 1, "Unexpected std") @@ -441,9 +441,9 @@ def test_make_blobs_n_samples_centers_none(n_samples): X, y = make_blobs(n_samples=n_samples, centers=centers, random_state=0) assert X.shape == (sum(n_samples), 2), "X shape mismatch" - assert all( - np.bincount(y, minlength=len(n_samples)) == n_samples - ), "Incorrect number of samples per blob" + assert all(np.bincount(y, minlength=len(n_samples)) == n_samples), ( + "Incorrect number of samples per blob" + ) def test_make_blobs_return_centers(): @@ -681,9 +681,9 @@ def test_make_moons(): def test_make_moons_unbalanced(): X, y = make_moons(n_samples=(7, 5)) - assert ( - np.sum(y == 0) == 7 and np.sum(y == 1) == 5 - ), "Number of samples in a moon is wrong" + assert np.sum(y == 0) == 7 and np.sum(y == 1) == 5, ( + "Number of samples in a moon is wrong" + ) assert X.shape == (12, 2), "X shape mismatch" assert y.shape == (12,), "y shape mismatch" diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py index 901c63c9250bc..adcf7e980965e 100644 --- a/sklearn/ensemble/_bagging.py +++ b/sklearn/ensemble/_bagging.py @@ -3,7 +3,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import itertools import numbers from abc import ABCMeta, abstractmethod diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index 86f4255f1785a..5def6ac60816b 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -35,7 +35,6 @@ class calls the ``fit`` method of each sub-estimator on random samples # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import threading from abc import ABCMeta, abstractmethod from numbers import Integral, Real diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index fcefa31db097c..65906dec99316 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -168,11 +168,12 @@ def test_regression_criterion(name, criterion): reg = ForestRegressor(n_estimators=5, criterion=criterion, random_state=1) reg.fit(X_reg, y_reg) score = reg.score(X_reg, y_reg) - assert ( - score > 0.93 - ), "Failed with max_features=None, criterion %s and score = %f" % ( - criterion, - score, + assert score > 0.93, ( + "Failed with max_features=None, criterion %s and score = %f" + % ( + criterion, + score, + ) ) reg = ForestRegressor( @@ -1068,10 +1069,10 @@ def test_min_weight_fraction_leaf(name): node_weights = np.bincount(out, weights=weights) # drop inner nodes leaf_weights = node_weights[node_weights != 0] - assert ( - np.min(leaf_weights) >= total_weight * est.min_weight_fraction_leaf - ), "Failed with {0} min_weight_fraction_leaf={1}".format( - name, est.min_weight_fraction_leaf + assert np.min(leaf_weights) >= total_weight * est.min_weight_fraction_leaf, ( + "Failed with {0} min_weight_fraction_leaf={1}".format( + name, est.min_weight_fraction_leaf + ) ) diff --git a/sklearn/experimental/enable_hist_gradient_boosting.py b/sklearn/experimental/enable_hist_gradient_boosting.py index 9269b2d0b6d6c..589348fe9bc21 100644 --- a/sklearn/experimental/enable_hist_gradient_boosting.py +++ b/sklearn/experimental/enable_hist_gradient_boosting.py @@ -13,7 +13,6 @@ # Don't remove this file, we don't want to break users code just because the # feature isn't experimental anymore. - import warnings warnings.warn( diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py index 855ba5ad70f12..fe07b48f4fc2e 100644 --- a/sklearn/feature_selection/_univariate_selection.py +++ b/sklearn/feature_selection/_univariate_selection.py @@ -3,7 +3,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import warnings from numbers import Integral, Real diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py index 3ce2229f9e80f..4bd437df34967 100644 --- a/sklearn/gaussian_process/tests/test_gpc.py +++ b/sklearn/gaussian_process/tests/test_gpc.py @@ -147,8 +147,9 @@ def test_custom_optimizer(kernel, global_random_seed): # Define a dummy optimizer that simply tests 10 random hyperparameters def optimizer(obj_func, initial_theta, bounds): rng = np.random.RandomState(global_random_seed) - theta_opt, func_min = initial_theta, obj_func( - initial_theta, eval_gradient=False + theta_opt, func_min = ( + initial_theta, + obj_func(initial_theta, eval_gradient=False), ) for _ in range(10): theta = np.atleast_1d( diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index f49ed71231ad9..f43cc3613b3ff 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -394,8 +394,9 @@ def test_custom_optimizer(kernel): # Define a dummy optimizer that simply tests 50 random hyperparameters def optimizer(obj_func, initial_theta, bounds): rng = np.random.RandomState(0) - theta_opt, func_min = initial_theta, obj_func( - initial_theta, eval_gradient=False + theta_opt, func_min = ( + initial_theta, + obj_func(initial_theta, eval_gradient=False), ) for _ in range(50): theta = np.atleast_1d( diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py index 597b34a2a30e0..75869079be9cc 100644 --- a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py +++ b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py @@ -1186,9 +1186,9 @@ def test_plot_partial_dependence_lines_kw( ) line = disp.lines_[0, 0, -1] - assert ( - line.get_color() == expected_colors[0] - ), f"{line.get_color()}!={expected_colors[0]}\n{line_kw} and {pd_line_kw}" + assert line.get_color() == expected_colors[0], ( + f"{line.get_color()}!={expected_colors[0]}\n{line_kw} and {pd_line_kw}" + ) if pd_line_kw is not None: if "linestyle" in pd_line_kw: assert line.get_linestyle() == pd_line_kw["linestyle"] @@ -1198,9 +1198,9 @@ def test_plot_partial_dependence_lines_kw( assert line.get_linestyle() == "--" line = disp.lines_[0, 0, 0] - assert ( - line.get_color() == expected_colors[1] - ), f"{line.get_color()}!={expected_colors[1]}" + assert line.get_color() == expected_colors[1], ( + f"{line.get_color()}!={expected_colors[1]}" + ) if ice_lines_kw is not None: if "linestyle" in ice_lines_kw: assert line.get_linestyle() == ice_lines_kw["linestyle"] diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py index 35da4d08dcbf4..02c8af755baea 100644 --- a/sklearn/kernel_approximation.py +++ b/sklearn/kernel_approximation.py @@ -716,9 +716,9 @@ def transform(self, X): sparse = sp.issparse(X) if self.sample_interval is None: - # See figure 2 c) of "Efficient additive kernels via explicit feature maps" # noqa + # See figure 2 c) of "Efficient additive kernels via explicit feature maps" # - # A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence, # noqa + # A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence, # 2011 if self.sample_steps == 1: sample_interval = 0.8 diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py index a5c72ba3f51b1..d7c8ed8f0943d 100644 --- a/sklearn/linear_model/_glm/_newton_solver.py +++ b/sklearn/linear_model/_glm/_newton_solver.py @@ -254,7 +254,7 @@ def line_search(self, X, y, sample_weight): check = loss_improvement <= t * armijo_term if is_verbose: print( - f" line search iteration={i+1}, step size={t}\n" + f" line search iteration={i + 1}, step size={t}\n" f" check loss improvement <= armijo term: {loss_improvement} " f"<= {t * armijo_term} {check}" ) @@ -300,7 +300,7 @@ def line_search(self, X, y, sample_weight): self.raw_prediction = raw if is_verbose: print( - f" line search successful after {i+1} iterations with " + f" line search successful after {i + 1} iterations with " f"loss={self.loss_value}." ) diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py index 3bfd5fcd09491..9213008a19841 100644 --- a/sklearn/linear_model/_linear_loss.py +++ b/sklearn/linear_model/_linear_loss.py @@ -537,9 +537,9 @@ def gradient_hessian( # The L2 penalty enters the Hessian on the diagonal only. To add those # terms, we use a flattened view of the array. order = "C" if hess.flags.c_contiguous else "F" - hess.reshape(-1, order=order)[ - : (n_features * n_dof) : (n_dof + 1) - ] += l2_reg_strength + hess.reshape(-1, order=order)[: (n_features * n_dof) : (n_dof + 1)] += ( + l2_reg_strength + ) if self.fit_intercept: # With intercept included as added column to X, the hessian becomes @@ -795,7 +795,7 @@ def hessp(s): # = sum_{i, m} (X')_{ji} * p_i_k # * (X_{im} * s_k_m - sum_l p_i_l * X_{im} * s_l_m) # - # See also https://github.com/scikit-learn/scikit-learn/pull/3646#discussion_r17461411 # noqa + # See also https://github.com/scikit-learn/scikit-learn/pull/3646#discussion_r17461411 def hessp(s): s = s.reshape((n_classes, -1), order="F") # shape = (n_classes, n_dof) if self.fit_intercept: diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index c22690b2b01c6..27bc81c095d7b 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -5,7 +5,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import numbers import warnings from abc import ABCMeta, abstractmethod diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py index e6a4fba57401d..88afc17fcf5ff 100644 --- a/sklearn/linear_model/_theil_sen.py +++ b/sklearn/linear_model/_theil_sen.py @@ -5,7 +5,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import warnings from itertools import combinations from numbers import Integral, Real diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index a7e02c7afb561..60b8a8bb3e144 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -860,9 +860,9 @@ def test_ridge_loo_cv_asym_scoring(): loo_ridge.fit(X, y) gcv_ridge.fit(X, y) - assert gcv_ridge.alpha_ == pytest.approx( - loo_ridge.alpha_ - ), f"{gcv_ridge.alpha_=}, {loo_ridge.alpha_=}" + assert gcv_ridge.alpha_ == pytest.approx(loo_ridge.alpha_), ( + f"{gcv_ridge.alpha_=}, {loo_ridge.alpha_=}" + ) assert_allclose(gcv_ridge.coef_, loo_ridge.coef_, rtol=1e-3) assert_allclose(gcv_ridge.intercept_, loo_ridge.intercept_, rtol=1e-3) @@ -1522,9 +1522,9 @@ def test_ridgecv_alphas_conversion(Estimator): X = rng.randn(n_samples, n_features) ridge_est = Estimator(alphas=alphas) - assert ( - ridge_est.alphas is alphas - ), f"`alphas` was mutated in `{Estimator.__name__}.__init__`" + assert ridge_est.alphas is alphas, ( + f"`alphas` was mutated in `{Estimator.__name__}.__init__`" + ) ridge_est.fit(X, y) assert_array_equal(ridge_est.alphas, np.asarray(alphas)) diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py index 06a2ffbf27a36..1a3b95e023897 100644 --- a/sklearn/manifold/_spectral_embedding.py +++ b/sklearn/manifold/_spectral_embedding.py @@ -3,7 +3,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import warnings from numbers import Integral, Real diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py index 1bc29fb068da7..cadbab1bf022f 100644 --- a/sklearn/manifold/_t_sne.py +++ b/sklearn/manifold/_t_sne.py @@ -964,9 +964,9 @@ def _fit(self, X, skip_num_points=0): P = _joint_probabilities(distances, self.perplexity, self.verbose) assert np.all(np.isfinite(P)), "All probabilities should be finite" assert np.all(P >= 0), "All probabilities should be non-negative" - assert np.all( - P <= 1 - ), "All probabilities should be less or then equal to one" + assert np.all(P <= 1), ( + "All probabilities should be less or then equal to one" + ) else: # Compute the number of nearest neighbors to find. diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 99e4970b64627..5c989e6b439ab 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -10,7 +10,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import warnings from functools import partial from numbers import Integral, Real diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index 0f56513abca8e..cd175887f34f5 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -7,7 +7,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import warnings from math import log from numbers import Real diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py index 21dd22bc17a93..38cec419e73f7 100644 --- a/sklearn/metrics/cluster/_unsupervised.py +++ b/sklearn/metrics/cluster/_unsupervised.py @@ -3,7 +3,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import functools from numbers import Integral diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 6f9e11d4f4780..b31b186054e11 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -641,7 +641,6 @@ def test_symmetric_metric(name): @pytest.mark.parametrize("name", sorted(NOT_SYMMETRIC_METRICS)) def test_not_symmetric_metric(name): - # Test the symmetry of score and loss functions random_state = check_random_state(0) metric = ALL_METRICS[name] @@ -1005,7 +1004,8 @@ def test_regression_thresholded_inf_nan_input(metric, y_true, y_score): @pytest.mark.parametrize("metric", CLASSIFICATION_METRICS.values()) @pytest.mark.parametrize( "y_true, y_score", - invalids_nan_inf + + invalids_nan_inf + + # Add an additional case for classification only # non-regression test for: # https://github.com/scikit-learn/scikit-learn/issues/6809 @@ -2104,7 +2104,6 @@ def check_array_api_regression_metric_multioutput( def check_array_api_metric_pairwise(metric, array_namespace, device, dtype_name): - X_np = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=dtype_name) Y_np = np.array([[0.2, 0.3, 0.4], [0.5, 0.6, 0.7]], dtype=dtype_name) diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py index af055a2091790..0ea6d5d094d56 100644 --- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py +++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py @@ -228,9 +228,9 @@ def _non_trivial_radius( # on average. Yielding too many results would make the test slow (because # checking the results is expensive for large result sets), yielding 0 most # of the time would make the test useless. - assert ( - precomputed_dists is not None or metric is not None - ), "Either metric or precomputed_dists must be provided." + assert precomputed_dists is not None or metric is not None, ( + "Either metric or precomputed_dists must be provided." + ) if precomputed_dists is None: assert X is not None diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index d17e6710ee5a7..d36543903cb87 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -118,7 +118,7 @@ def test_bayesian_mixture_precisions_prior_initialisation(): ) msg = ( "The parameter 'degrees_of_freedom_prior' should be greater than" - f" {n_features -1}, but got {bad_degrees_of_freedom_prior_:.3f}." + f" {n_features - 1}, but got {bad_degrees_of_freedom_prior_:.3f}." ) with pytest.raises(ValueError, match=msg): bgmm.fit(X) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 22d4df2fd81c5..5275cab66b3f7 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -6,7 +6,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import numbers import time import warnings @@ -819,9 +818,9 @@ def _fit_and_score( progress_msg = "" if verbose > 2: if split_progress is not None: - progress_msg = f" {split_progress[0]+1}/{split_progress[1]}" + progress_msg = f" {split_progress[0] + 1}/{split_progress[1]}" if candidate_progress and verbose > 9: - progress_msg += f"; {candidate_progress[0]+1}/{candidate_progress[1]}" + progress_msg += f"; {candidate_progress[0] + 1}/{candidate_progress[1]}" if verbose > 1: if parameters is None: diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index e87bb440c9563..7459d71ea2bd1 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -2422,9 +2422,9 @@ def __sklearn_tags__(self): for _pairwise_setting in [True, False]: est.set_params(pairwise=_pairwise_setting) cv = GridSearchCV(est, {"n_neighbors": [10]}) - assert ( - _pairwise_setting == cv.__sklearn_tags__().input_tags.pairwise - ), attr_message + assert _pairwise_setting == cv.__sklearn_tags__().input_tags.pairwise, ( + attr_message + ) def test_search_cv_pairwise_property_equivalence_of_precomputed(): diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index 2286c0ff2573e..39698a8e17b80 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -886,9 +886,9 @@ def assert_counts_are_ok(idx_counts, p): bf = stats.binom(n_splits, p) for count in idx_counts: prob = bf.pmf(count) - assert ( - prob > threshold - ), "An index is not drawn with chance corresponding to even draws" + assert prob > threshold, ( + "An index is not drawn with chance corresponding to even draws" + ) for n_samples in (6, 22): groups = np.array((n_samples // 2) * [0, 1]) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 86a33d3d8d0b8..48b9fbd3bdf9a 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -8,7 +8,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - import warnings from abc import ABCMeta, abstractmethod from numbers import Integral @@ -687,7 +686,6 @@ def _get_estimator(self): ) if self.base_estimator != "deprecated": - warning_msg = ( "`base_estimator` as an argument was deprecated in 1.7 and will be" " removed in 1.9. Use `estimator` instead." diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index cc20af7432914..6ef690eb8bbe4 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -359,7 +359,7 @@ def predict_proba(self, X): # on many combination of datasets. # Hence, we choose to enforce it here. # For more information, see: - # https://github.com/scikit-learn/scikit-learn/pull/24076#issuecomment-1445258342 # noqa + # https://github.com/scikit-learn/scikit-learn/pull/24076#issuecomment-1445258342 # TODO: adapt the heuristic for `strategy="auto"` for # `ArgKminClassMode` and use `strategy="auto"`. strategy="parallel_on_X", @@ -807,7 +807,7 @@ def predict_proba(self, X): # on many combination of datasets. # Hence, we choose to enforce it here. # For more information, see: - # https://github.com/scikit-learn/scikit-learn/pull/26828/files#r1282398471 # noqa + # https://github.com/scikit-learn/scikit-learn/pull/26828/files#r1282398471 ) return probabilities diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index f947eb2e0c2b5..6f42fdea4819e 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -656,7 +656,7 @@ def test_unsupervised_radius_neighbors( assert_allclose( np.concatenate(list(results[i][0])), np.concatenate(list(results[i + 1][0])), - ), + ) assert_allclose( np.concatenate(list(results[i][1])), np.concatenate(list(results[i + 1][1])), diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py index 81d9d0b8eb843..6bfb5d1367c8d 100644 --- a/sklearn/preprocessing/tests/test_function_transformer.py +++ b/sklearn/preprocessing/tests/test_function_transformer.py @@ -36,13 +36,13 @@ def test_delegate_to_func(): ) # The function should only have received X. - assert args_store == [ - X - ], "Incorrect positional arguments passed to func: {args}".format(args=args_store) + assert args_store == [X], ( + "Incorrect positional arguments passed to func: {args}".format(args=args_store) + ) - assert ( - not kwargs_store - ), "Unexpected keyword arguments passed to func: {args}".format(args=kwargs_store) + assert not kwargs_store, ( + "Unexpected keyword arguments passed to func: {args}".format(args=kwargs_store) + ) # reset the argument stores. args_store[:] = [] @@ -56,13 +56,13 @@ def test_delegate_to_func(): ) # The function should have received X - assert args_store == [ - X - ], "Incorrect positional arguments passed to func: {args}".format(args=args_store) + assert args_store == [X], ( + "Incorrect positional arguments passed to func: {args}".format(args=args_store) + ) - assert ( - not kwargs_store - ), "Unexpected keyword arguments passed to func: {args}".format(args=kwargs_store) + assert not kwargs_store, ( + "Unexpected keyword arguments passed to func: {args}".format(args=kwargs_store) + ) def test_np_log(): diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py index 4b469a2e9f8d8..0fe6f57d6c1ed 100644 --- a/sklearn/semi_supervised/_self_training.py +++ b/sklearn/semi_supervised/_self_training.py @@ -217,8 +217,7 @@ def _get_estimator(self): # TODO(1.8) remove elif self.estimator is None and self.base_estimator == "deprecated": raise ValueError( - "You must pass an estimator to SelfTrainingClassifier." - " Use `estimator`." + "You must pass an estimator to SelfTrainingClassifier. Use `estimator`." ) elif self.estimator is not None and self.base_estimator != "deprecated": raise ValueError( diff --git a/sklearn/tests/metadata_routing_common.py b/sklearn/tests/metadata_routing_common.py index c4af13ef66344..f4dd79581db90 100644 --- a/sklearn/tests/metadata_routing_common.py +++ b/sklearn/tests/metadata_routing_common.py @@ -74,9 +74,9 @@ def check_recorded_metadata(obj, method, parent, split_params=tuple(), **kwargs) for record in all_records: # first check that the names of the metadata passed are the same as # expected. The names are stored as keys in `record`. - assert set(kwargs.keys()) == set( - record.keys() - ), f"Expected {kwargs.keys()} vs {record.keys()}" + assert set(kwargs.keys()) == set(record.keys()), ( + f"Expected {kwargs.keys()} vs {record.keys()}" + ) for key, value in kwargs.items(): recorded_value = record[key] # The following condition is used to check for any specified parameters @@ -87,9 +87,9 @@ def check_recorded_metadata(obj, method, parent, split_params=tuple(), **kwargs) if isinstance(recorded_value, np.ndarray): assert_array_equal(recorded_value, value) else: - assert ( - recorded_value is value - ), f"Expected {recorded_value} vs {value}. Method: {method}" + assert recorded_value is value, ( + f"Expected {recorded_value} vs {value}. Method: {method}" + ) record_metadata_not_default = partial(record_metadata, record_default=False) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 7acf8b47f1cd7..f916f7e9862a5 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -296,7 +296,6 @@ def _include_in_get_feature_names_out_check(transformer): "transformer", GET_FEATURES_OUT_ESTIMATORS, ids=_get_check_estimator_ids ) def test_transformers_get_feature_names_out(transformer): - with ignore_warnings(category=(FutureWarning)): check_transformer_get_feature_names_out( transformer.__class__.__name__, transformer diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index e44e2946cb2bb..3a74ccf3b35c3 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -304,16 +304,16 @@ def test_lda_explained_variance_ratio(): clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen") clf_lda_eigen.fit(X, y) assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3) - assert clf_lda_eigen.explained_variance_ratio_.shape == ( - 2, - ), "Unexpected length for explained_variance_ratio_" + assert clf_lda_eigen.explained_variance_ratio_.shape == (2,), ( + "Unexpected length for explained_variance_ratio_" + ) clf_lda_svd = LinearDiscriminantAnalysis(solver="svd") clf_lda_svd.fit(X, y) assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3) - assert clf_lda_svd.explained_variance_ratio_.shape == ( - 2, - ), "Unexpected length for explained_variance_ratio_" + assert clf_lda_svd.explained_variance_ratio_.shape == (2,), ( + "Unexpected length for explained_variance_ratio_" + ) assert_array_almost_equal( clf_lda_svd.explained_variance_ratio_, clf_lda_eigen.explained_variance_ratio_ diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index 214fc75a68364..3dbc8f96c10a7 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -157,11 +157,12 @@ def score(self, X, y, *args, **kwargs): if method in delegator_data.skip_methods: continue assert hasattr(delegate, method) - assert hasattr( - delegator, method - ), "%s does not have method %r when its delegate does" % ( - delegator_data.name, - method, + assert hasattr(delegator, method), ( + "%s does not have method %r when its delegate does" + % ( + delegator_data.name, + method, + ) ) # delegation before fit raises a NotFittedError if method == "score": @@ -191,11 +192,12 @@ def score(self, X, y, *args, **kwargs): delegate = SubEstimator(hidden_method=method) delegator = delegator_data.construct(delegate) assert not hasattr(delegate, method) - assert not hasattr( - delegator, method - ), "%s has method %r when its delegate does not" % ( - delegator_data.name, - method, + assert not hasattr(delegator, method), ( + "%s has method %r when its delegate does not" + % ( + delegator_data.name, + method, + ) ) diff --git a/sklearn/tree/tests/test_monotonic_tree.py b/sklearn/tree/tests/test_monotonic_tree.py index 6d89c4ae3f8bb..dfe39720df224 100644 --- a/sklearn/tree/tests/test_monotonic_tree.py +++ b/sklearn/tree/tests/test_monotonic_tree.py @@ -80,9 +80,9 @@ def test_monotonic_constraints_classifications( est.fit(X_train, y_train) proba_test = est.predict_proba(X_test) - assert np.logical_and( - proba_test >= 0.0, proba_test <= 1.0 - ).all(), "Probability should always be in [0, 1] range." + assert np.logical_and(proba_test >= 0.0, proba_test <= 1.0).all(), ( + "Probability should always be in [0, 1] range." + ) assert_allclose(proba_test.sum(axis=1), 1.0) # Monotonic increase constraint, it applies to the positive class diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index 8348cd29e1c8e..790ebdcea1127 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -198,10 +198,10 @@ def assert_tree_equal(d, s, message): - assert ( - s.node_count == d.node_count - ), "{0}: inequal number of node ({1} != {2})".format( - message, s.node_count, d.node_count + assert s.node_count == d.node_count, ( + "{0}: inequal number of node ({1} != {2})".format( + message, s.node_count, d.node_count + ) ) assert_array_equal( @@ -330,9 +330,9 @@ def test_diabetes_overfit(name, Tree, criterion): reg = Tree(criterion=criterion, random_state=0) reg.fit(diabetes.data, diabetes.target) score = mean_squared_error(diabetes.target, reg.predict(diabetes.data)) - assert score == pytest.approx( - 0 - ), f"Failed with {name}, criterion = {criterion} and score = {score}" + assert score == pytest.approx(0), ( + f"Failed with {name}, criterion = {criterion} and score = {score}" + ) @skip_if_32bit @@ -697,10 +697,10 @@ def check_min_weight_fraction_leaf(name, datasets, sparse_container=None): node_weights = np.bincount(out, weights=weights) # drop inner nodes leaf_weights = node_weights[node_weights != 0] - assert ( - np.min(leaf_weights) >= total_weight * est.min_weight_fraction_leaf - ), "Failed with {0} min_weight_fraction_leaf={1}".format( - name, est.min_weight_fraction_leaf + assert np.min(leaf_weights) >= total_weight * est.min_weight_fraction_leaf, ( + "Failed with {0} min_weight_fraction_leaf={1}".format( + name, est.min_weight_fraction_leaf + ) ) # test case with no weights passed in @@ -720,10 +720,10 @@ def check_min_weight_fraction_leaf(name, datasets, sparse_container=None): node_weights = np.bincount(out) # drop inner nodes leaf_weights = node_weights[node_weights != 0] - assert ( - np.min(leaf_weights) >= total_weight * est.min_weight_fraction_leaf - ), "Failed with {0} min_weight_fraction_leaf={1}".format( - name, est.min_weight_fraction_leaf + assert np.min(leaf_weights) >= total_weight * est.min_weight_fraction_leaf, ( + "Failed with {0} min_weight_fraction_leaf={1}".format( + name, est.min_weight_fraction_leaf + ) ) @@ -845,10 +845,10 @@ def test_min_impurity_decrease(global_random_seed): (est3, 0.0001), (est4, 0.1), ): - assert ( - est.min_impurity_decrease <= expected_decrease - ), "Failed, min_impurity_decrease = {0} > {1}".format( - est.min_impurity_decrease, expected_decrease + assert est.min_impurity_decrease <= expected_decrease, ( + "Failed, min_impurity_decrease = {0} > {1}".format( + est.min_impurity_decrease, expected_decrease + ) ) est.fit(X, y) for node in range(est.tree_.node_count): @@ -879,10 +879,10 @@ def test_min_impurity_decrease(global_random_seed): imp_parent - wtd_avg_left_right_imp ) - assert ( - actual_decrease >= expected_decrease - ), "Failed with {0} expected min_impurity_decrease={1}".format( - actual_decrease, expected_decrease + assert actual_decrease >= expected_decrease, ( + "Failed with {0} expected min_impurity_decrease={1}".format( + actual_decrease, expected_decrease + ) ) @@ -923,9 +923,9 @@ def test_pickle(): assert type(est2) == est.__class__ score2 = est2.score(X, y) - assert ( - score == score2 - ), "Failed to generate same score after pickling with {0}".format(name) + assert score == score2, ( + "Failed to generate same score after pickling with {0}".format(name) + ) for attribute in fitted_attribute: assert_array_equal( getattr(est2.tree_, attribute), @@ -2614,9 +2614,9 @@ def test_missing_value_is_predictive(Tree, expected_score, global_random_seed): # Check that the tree can learn the predictive feature # over an average of cross-validation fits. tree_cv_score = cross_val_score(tree, X, y, cv=5).mean() - assert ( - tree_cv_score >= expected_score - ), f"Expected CV score: {expected_score} but got {tree_cv_score}" + assert tree_cv_score >= expected_score, ( + f"Expected CV score: {expected_score} but got {tree_cv_score}" + ) @pytest.mark.parametrize( diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index 48c941f3c6e85..eb5b4128782e1 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -854,7 +854,7 @@ def _searchsorted(a, v, *, side="left", sorter=None, xp=None): # Temporary workaround needed as long as searchsorted is not widely # adopted by implementers of the Array API spec. This is a quite # recent addition to the spec: - # https://data-apis.org/array-api/latest/API_specification/generated/array_api.searchsorted.html # noqa + # https://data-apis.org/array-api/latest/API_specification/generated/array_api.searchsorted.html xp, _ = get_namespace(a, v, xp=xp) if hasattr(xp, "searchsorted"): return xp.searchsorted(a, v, side=side, sorter=sorter) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index d7d77a74c6fa8..826c8c61c4480 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -1101,8 +1101,9 @@ def __iter__(self): method_mapping = MethodMapping() for method in METHODS: method_mapping.add(caller=method, callee=method) - yield "$self_request", RouterMappingPair( - mapping=method_mapping, router=self._self_request + yield ( + "$self_request", + RouterMappingPair(mapping=method_mapping, router=self._self_request), ) for name, route_mapping in self._route_mappings.items(): yield (name, route_mapping) diff --git a/sklearn/utils/_test_common/instance_generator.py b/sklearn/utils/_test_common/instance_generator.py index e619deab1c93e..1b6f73264e4d6 100644 --- a/sklearn/utils/_test_common/instance_generator.py +++ b/sklearn/utils/_test_common/instance_generator.py @@ -961,8 +961,7 @@ def _yield_instances_for_check(check, estimator_orig): }, HalvingGridSearchCV: { "check_fit2d_1sample": ( - "Fail during parameter check since min/max resources requires" - " more samples" + "Fail during parameter check since min/max resources requires more samples" ), "check_estimators_nan_inf": "FIXME", "check_classifiers_one_label_sample_weights": "FIXME", @@ -972,8 +971,7 @@ def _yield_instances_for_check(check, estimator_orig): }, HalvingRandomSearchCV: { "check_fit2d_1sample": ( - "Fail during parameter check since min/max resources requires" - " more samples" + "Fail during parameter check since min/max resources requires more samples" ), "check_estimators_nan_inf": "FIXME", "check_classifiers_one_label_sample_weights": "FIXME", diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 5142de2348e2a..6c3d16d98d7fb 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -4759,9 +4759,9 @@ def check_transformer_get_feature_names_out(name, transformer_orig): else: n_features_out = X_transform.shape[1] - assert ( - len(feature_names_out) == n_features_out - ), f"Expected {n_features_out} feature names, got {len(feature_names_out)}" + assert len(feature_names_out) == n_features_out, ( + f"Expected {n_features_out} feature names, got {len(feature_names_out)}" + ) def check_transformer_get_feature_names_out_pandas(name, transformer_orig): @@ -4816,9 +4816,9 @@ def check_transformer_get_feature_names_out_pandas(name, transformer_orig): else: n_features_out = X_transform.shape[1] - assert ( - len(feature_names_out_default) == n_features_out - ), f"Expected {n_features_out} feature names, got {len(feature_names_out_default)}" + assert len(feature_names_out_default) == n_features_out, ( + f"Expected {n_features_out} feature names, got {len(feature_names_out_default)}" + ) def check_param_validation(name, estimator_orig): @@ -5329,9 +5329,7 @@ def check_classifier_not_supporting_multiclass(name, estimator_orig): 'Only binary classification is supported. The type of the target ' f'is {{y_type}}.' ) - """.format( - name=name - ) + """.format(name=name) err_msg = textwrap.dedent(err_msg) with raises( diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index e228825d3d449..bbe7e75d188de 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -337,7 +337,7 @@ def _in_unstable_openblas_configuration(): return False # OpenBLAS 0.3.16 fixed instability for arm64, see: - # https://github.com/xianyi/OpenBLAS/blob/1b6db3dbba672b4f8af935bd43a1ff6cff4d20b7/Changelog.txt#L56-L58 # noqa + # https://github.com/xianyi/OpenBLAS/blob/1b6db3dbba672b4f8af935bd43a1ff6cff4d20b7/Changelog.txt#L56-L58 openblas_arm64_stable_version = parse_version("0.3.16") for info in modules_info: if info["internal_api"] != "openblas": diff --git a/sklearn/utils/tests/test_indexing.py b/sklearn/utils/tests/test_indexing.py index 27b51da5ff962..61feee2304723 100644 --- a/sklearn/utils/tests/test_indexing.py +++ b/sklearn/utils/tests/test_indexing.py @@ -583,7 +583,6 @@ def test_resample_stratify_2dy(): def test_notimplementederror(): - with pytest.raises( NotImplementedError, match="Resampling with sample_weight is only implemented for replace=True.", diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index e361a93e41b10..9a9cbb1f60bdd 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -369,17 +369,17 @@ def test_is_multilabel(): ) ] for exmpl_sparse in examples_sparse: - assert sparse_exp == is_multilabel( - exmpl_sparse - ), f"is_multilabel({exmpl_sparse!r}) should be {sparse_exp}" + assert sparse_exp == is_multilabel(exmpl_sparse), ( + f"is_multilabel({exmpl_sparse!r}) should be {sparse_exp}" + ) # Densify sparse examples before testing if issparse(example): example = example.toarray() - assert dense_exp == is_multilabel( - example - ), f"is_multilabel({example!r}) should be {dense_exp}" + assert dense_exp == is_multilabel(example), ( + f"is_multilabel({example!r}) should be {dense_exp}" + ) @pytest.mark.parametrize( @@ -400,9 +400,9 @@ def test_is_multilabel_array_api_compliance(array_namespace, device, dtype_name) example = xp.asarray(example, device=device) with config_context(array_api_dispatch=True): - assert dense_exp == is_multilabel( - example - ), f"is_multilabel({example!r}) should be {dense_exp}" + assert dense_exp == is_multilabel(example), ( + f"is_multilabel({example!r}) should be {dense_exp}" + ) def test_check_classification_targets(): @@ -420,12 +420,13 @@ def test_check_classification_targets(): def test_type_of_target(): for group, group_examples in EXAMPLES.items(): for example in group_examples: - assert ( - type_of_target(example) == group - ), "type_of_target(%r) should be %r, got %r" % ( - example, - group, - type_of_target(example), + assert type_of_target(example) == group, ( + "type_of_target(%r) should be %r, got %r" + % ( + example, + group, + type_of_target(example), + ) ) for example in NON_ARRAY_LIKE_EXAMPLES: diff --git a/sklearn/utils/tests/test_pprint.py b/sklearn/utils/tests/test_pprint.py index b3df08732d798..e8026ae36d54c 100644 --- a/sklearn/utils/tests/test_pprint.py +++ b/sklearn/utils/tests/test_pprint.py @@ -4,16 +4,12 @@ import numpy as np import pytest -from sklearn.utils._pprint import _EstimatorPrettyPrinter -from sklearn.linear_model import LogisticRegressionCV -from sklearn.pipeline import make_pipeline +from sklearn import config_context from sklearn.base import BaseEstimator, TransformerMixin from sklearn.feature_selection import SelectKBest, chi2 -from sklearn import config_context - - -# Ignore flake8 (lots of line too long issues) -# ruff: noqa +from sklearn.linear_model import LogisticRegressionCV +from sklearn.pipeline import make_pipeline +from sklearn.utils._pprint import _EstimatorPrettyPrinter # Constructors excerpted to test pprinting diff --git a/sklearn/utils/tests/test_seq_dataset.py b/sklearn/utils/tests/test_seq_dataset.py index 0e6f182e7c71b..7c3420aeb83c2 100644 --- a/sklearn/utils/tests/test_seq_dataset.py +++ b/sklearn/utils/tests/test_seq_dataset.py @@ -154,10 +154,10 @@ def test_fused_types_consistency(dataset_32, dataset_64): def test_buffer_dtype_mismatch_error(): with pytest.raises(ValueError, match="Buffer dtype mismatch"): - ArrayDataset64(X32, y32, sample_weight32, seed=42), + ArrayDataset64(X32, y32, sample_weight32, seed=42) with pytest.raises(ValueError, match="Buffer dtype mismatch"): - ArrayDataset32(X64, y64, sample_weight64, seed=42), + ArrayDataset32(X64, y64, sample_weight64, seed=42) for csr_container in CSR_CONTAINERS: X_csr32 = csr_container(X32) @@ -170,7 +170,7 @@ def test_buffer_dtype_mismatch_error(): y32, sample_weight32, seed=42, - ), + ) with pytest.raises(ValueError, match="Buffer dtype mismatch"): CSRDataset32( @@ -180,4 +180,4 @@ def test_buffer_dtype_mismatch_error(): y64, sample_weight64, seed=42, - ), + ) diff --git a/sklearn/utils/tests/test_tags.py b/sklearn/utils/tests/test_tags.py index 72a811c8470ef..88d5593e26d47 100644 --- a/sklearn/utils/tests/test_tags.py +++ b/sklearn/utils/tests/test_tags.py @@ -565,7 +565,6 @@ def __sklearn_tags__(self): assert _to_new_tags(_to_old_tags(new_tags), estimator=estimator) == new_tags class MyClass: - def fit(self, X, y=None): return self # pragma: no cover diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index ae12f13624055..1aaf7c346b1d3 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -852,9 +852,9 @@ class TestClassWithDeprecatedFitMethod: def fit(self, X, y, sample_weight=None): pass - assert has_fit_parameter( - TestClassWithDeprecatedFitMethod, "sample_weight" - ), "has_fit_parameter fails for class with deprecated fit method." + assert has_fit_parameter(TestClassWithDeprecatedFitMethod, "sample_weight"), ( + "has_fit_parameter fails for class with deprecated fit method." + ) def test_check_symmetric(): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 116d12fc5e8ad..8173c431bd930 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1547,8 +1547,7 @@ def has_fit_parameter(estimator, parameter): # hasattr(estimator, "fit") makes it so that we don't fail for an estimator # that does not have a `fit` method during collection of checks. The right # checks will fail later. - hasattr(estimator, "fit") - and parameter in signature(estimator.fit).parameters + hasattr(estimator, "fit") and parameter in signature(estimator.fit).parameters )